1 //===- FormatGen.cpp - Utilities for custom assembly formats ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "FormatGen.h" 10 #include "llvm/ADT/StringSwitch.h" 11 #include "llvm/Support/SourceMgr.h" 12 #include "llvm/TableGen/Error.h" 13 14 using namespace mlir; 15 using namespace mlir::tblgen; 16 17 //===----------------------------------------------------------------------===// 18 // FormatToken 19 //===----------------------------------------------------------------------===// 20 21 llvm::SMLoc FormatToken::getLoc() const { 22 return llvm::SMLoc::getFromPointer(spelling.data()); 23 } 24 25 //===----------------------------------------------------------------------===// 26 // FormatLexer 27 //===----------------------------------------------------------------------===// 28 29 FormatLexer::FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc) 30 : mgr(mgr), loc(loc), 31 curBuffer(mgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer()), 32 curPtr(curBuffer.begin()) {} 33 34 FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) { 35 mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); 36 llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note, 37 "in custom assembly format for this operation"); 38 return formToken(FormatToken::error, loc.getPointer()); 39 } 40 41 FormatToken FormatLexer::emitError(const char *loc, const Twine &msg) { 42 return emitError(llvm::SMLoc::getFromPointer(loc), msg); 43 } 44 45 FormatToken FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, 46 const Twine ¬e) { 47 mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); 48 llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note, 49 "in custom assembly format for this operation"); 50 mgr.PrintMessage(loc, llvm::SourceMgr::DK_Note, note); 51 return formToken(FormatToken::error, loc.getPointer()); 52 } 53 54 int FormatLexer::getNextChar() { 55 char curChar = *curPtr++; 56 switch (curChar) { 57 default: 58 return (unsigned char)curChar; 59 case 0: { 60 // A nul character in the stream is either the end of the current buffer or 61 // a random nul in the file. Disambiguate that here. 62 if (curPtr - 1 != curBuffer.end()) 63 return 0; 64 65 // Otherwise, return end of file. 66 --curPtr; 67 return EOF; 68 } 69 case '\n': 70 case '\r': 71 // Handle the newline character by ignoring it and incrementing the line 72 // count. However, be careful about 'dos style' files with \n\r in them. 73 // Only treat a \n\r or \r\n as a single line. 74 if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar) 75 ++curPtr; 76 return '\n'; 77 } 78 } 79 80 FormatToken FormatLexer::lexToken() { 81 const char *tokStart = curPtr; 82 83 // This always consumes at least one character. 84 int curChar = getNextChar(); 85 switch (curChar) { 86 default: 87 // Handle identifiers: [a-zA-Z_] 88 if (isalpha(curChar) || curChar == '_') 89 return lexIdentifier(tokStart); 90 91 // Unknown character, emit an error. 92 return emitError(tokStart, "unexpected character"); 93 case EOF: 94 // Return EOF denoting the end of lexing. 95 return formToken(FormatToken::eof, tokStart); 96 97 // Lex punctuation. 98 case '^': 99 return formToken(FormatToken::caret, tokStart); 100 case ':': 101 return formToken(FormatToken::colon, tokStart); 102 case ',': 103 return formToken(FormatToken::comma, tokStart); 104 case '=': 105 return formToken(FormatToken::equal, tokStart); 106 case '<': 107 return formToken(FormatToken::less, tokStart); 108 case '>': 109 return formToken(FormatToken::greater, tokStart); 110 case '?': 111 return formToken(FormatToken::question, tokStart); 112 case '(': 113 return formToken(FormatToken::l_paren, tokStart); 114 case ')': 115 return formToken(FormatToken::r_paren, tokStart); 116 case '*': 117 return formToken(FormatToken::star, tokStart); 118 119 // Ignore whitespace characters. 120 case 0: 121 case ' ': 122 case '\t': 123 case '\n': 124 return lexToken(); 125 126 case '`': 127 return lexLiteral(tokStart); 128 case '$': 129 return lexVariable(tokStart); 130 } 131 } 132 133 FormatToken FormatLexer::lexLiteral(const char *tokStart) { 134 assert(curPtr[-1] == '`'); 135 136 // Lex a literal surrounded by ``. 137 while (const char curChar = *curPtr++) { 138 if (curChar == '`') 139 return formToken(FormatToken::literal, tokStart); 140 } 141 return emitError(curPtr - 1, "unexpected end of file in literal"); 142 } 143 144 FormatToken FormatLexer::lexVariable(const char *tokStart) { 145 if (!isalpha(curPtr[0]) && curPtr[0] != '_') 146 return emitError(curPtr - 1, "expected variable name"); 147 148 // Otherwise, consume the rest of the characters. 149 while (isalnum(*curPtr) || *curPtr == '_') 150 ++curPtr; 151 return formToken(FormatToken::variable, tokStart); 152 } 153 154 FormatToken FormatLexer::lexIdentifier(const char *tokStart) { 155 // Match the rest of the identifier regex: [0-9a-zA-Z_\-]* 156 while (isalnum(*curPtr) || *curPtr == '_' || *curPtr == '-') 157 ++curPtr; 158 159 // Check to see if this identifier is a keyword. 160 StringRef str(tokStart, curPtr - tokStart); 161 auto kind = 162 StringSwitch<FormatToken::Kind>(str) 163 .Case("attr-dict", FormatToken::kw_attr_dict) 164 .Case("attr-dict-with-keyword", FormatToken::kw_attr_dict_w_keyword) 165 .Case("custom", FormatToken::kw_custom) 166 .Case("functional-type", FormatToken::kw_functional_type) 167 .Case("operands", FormatToken::kw_operands) 168 .Case("params", FormatToken::kw_params) 169 .Case("ref", FormatToken::kw_ref) 170 .Case("regions", FormatToken::kw_regions) 171 .Case("results", FormatToken::kw_results) 172 .Case("struct", FormatToken::kw_struct) 173 .Case("successors", FormatToken::kw_successors) 174 .Case("type", FormatToken::kw_type) 175 .Case("qualified", FormatToken::kw_qualified) 176 .Default(FormatToken::identifier); 177 return FormatToken(kind, str); 178 } 179 180 //===----------------------------------------------------------------------===// 181 // Utility Functions 182 //===----------------------------------------------------------------------===// 183 184 bool mlir::tblgen::shouldEmitSpaceBefore(StringRef value, 185 bool lastWasPunctuation) { 186 if (value.size() != 1 && value != "->") 187 return true; 188 if (lastWasPunctuation) 189 return !StringRef(">)}],").contains(value.front()); 190 return !StringRef("<>(){}[],").contains(value.front()); 191 } 192 193 bool mlir::tblgen::canFormatStringAsKeyword( 194 StringRef value, function_ref<void(Twine)> emitError) { 195 if (!isalpha(value.front()) && value.front() != '_') { 196 if (emitError) 197 emitError("valid keyword starts with a letter or '_'"); 198 return false; 199 } 200 if (!llvm::all_of(value.drop_front(), [](char c) { 201 return isalnum(c) || c == '_' || c == '$' || c == '.'; 202 })) { 203 if (emitError) 204 emitError( 205 "keywords should contain only alphanum, '_', '$', or '.' characters"); 206 return false; 207 } 208 return true; 209 } 210 211 bool mlir::tblgen::isValidLiteral(StringRef value, 212 function_ref<void(Twine)> emitError) { 213 if (value.empty()) { 214 if (emitError) 215 emitError("literal can't be empty"); 216 return false; 217 } 218 char front = value.front(); 219 220 // If there is only one character, this must either be punctuation or a 221 // single character bare identifier. 222 if (value.size() == 1) { 223 StringRef bare = "_:,=<>()[]{}?+*"; 224 if (isalpha(front) || bare.contains(front)) 225 return true; 226 if (emitError) 227 emitError("single character literal must be a letter or one of '" + bare + 228 "'"); 229 return false; 230 } 231 // Check the punctuation that are larger than a single character. 232 if (value == "->") 233 return true; 234 235 // Otherwise, this must be an identifier. 236 return canFormatStringAsKeyword(value, emitError); 237 } 238 239 //===----------------------------------------------------------------------===// 240 // Commandline Options 241 //===----------------------------------------------------------------------===// 242 243 llvm::cl::opt<bool> mlir::tblgen::formatErrorIsFatal( 244 "asmformat-error-is-fatal", 245 llvm::cl::desc("Emit a fatal error if format parsing fails"), 246 llvm::cl::init(true)); 247