1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringSwitch.h" 17 #include "llvm/ADT/Twine.h" 18 #include <cctype> 19 20 using namespace llvm; 21 22 namespace { 23 24 /// This class provides a way to iterate and get characters from the source 25 /// string. 26 class Cursor { 27 const char *Ptr; 28 const char *End; 29 30 public: 31 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 32 33 explicit Cursor(StringRef Str) { 34 Ptr = Str.data(); 35 End = Ptr + Str.size(); 36 } 37 38 bool isEOF() const { return Ptr == End; } 39 40 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 41 42 void advance(unsigned I = 1) { Ptr += I; } 43 44 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 45 46 StringRef upto(Cursor C) const { 47 assert(C.Ptr >= Ptr && C.Ptr <= End); 48 return StringRef(Ptr, C.Ptr - Ptr); 49 } 50 51 StringRef::iterator location() const { return Ptr; } 52 53 operator bool() const { return Ptr != nullptr; } 54 }; 55 56 } // end anonymous namespace 57 58 /// Skip the leading whitespace characters and return the updated cursor. 59 static Cursor skipWhitespace(Cursor C) { 60 while (isspace(C.peek())) 61 C.advance(); 62 return C; 63 } 64 65 /// Return true if the given character satisfies the following regular 66 /// expression: [-a-zA-Z$._0-9] 67 static bool isIdentifierChar(char C) { 68 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 69 C == '$'; 70 } 71 72 void MIToken::unescapeQuotedStringValue(std::string &Str) const { 73 assert(isStringValueQuoted() && "String value isn't quoted"); 74 StringRef Value = Range.drop_front(StringOffset); 75 assert(Value.front() == '"' && Value.back() == '"'); 76 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 77 78 Str.clear(); 79 Str.reserve(C.remaining().size()); 80 while (!C.isEOF()) { 81 char Char = C.peek(); 82 if (Char == '\\') { 83 if (C.peek(1) == '\\') { 84 // Two '\' become one 85 Str += '\\'; 86 C.advance(2); 87 continue; 88 } 89 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 90 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 91 C.advance(3); 92 continue; 93 } 94 } 95 Str += Char; 96 C.advance(); 97 } 98 } 99 100 /// Lex a string constant using the following regular expression: \"[^\"]*\" 101 static Cursor lexStringConstant( 102 Cursor C, 103 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 104 assert(C.peek() == '"'); 105 for (C.advance(); C.peek() != '"'; C.advance()) { 106 if (C.isEOF()) { 107 ErrorCallback( 108 C.location(), 109 "end of machine instruction reached before the closing '\"'"); 110 return None; 111 } 112 } 113 C.advance(); 114 return C; 115 } 116 117 static Cursor lexName( 118 Cursor C, MIToken &Token, MIToken::TokenKind Type, 119 MIToken::TokenKind QuotedType, unsigned PrefixLength, 120 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 121 auto Range = C; 122 C.advance(PrefixLength); 123 if (C.peek() == '"') { 124 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 125 Token = MIToken(QuotedType, Range.upto(R), PrefixLength); 126 return R; 127 } 128 Token = MIToken(MIToken::Error, Range.remaining()); 129 return Range; 130 } 131 while (isIdentifierChar(C.peek())) 132 C.advance(); 133 Token = MIToken(Type, Range.upto(C), PrefixLength); 134 return C; 135 } 136 137 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 138 return StringSwitch<MIToken::TokenKind>(Identifier) 139 .Case("_", MIToken::underscore) 140 .Case("implicit", MIToken::kw_implicit) 141 .Case("implicit-def", MIToken::kw_implicit_define) 142 .Case("dead", MIToken::kw_dead) 143 .Case("killed", MIToken::kw_killed) 144 .Case("undef", MIToken::kw_undef) 145 .Case("frame-setup", MIToken::kw_frame_setup) 146 .Case("debug-location", MIToken::kw_debug_location) 147 .Case(".cfi_offset", MIToken::kw_cfi_offset) 148 .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) 149 .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 150 .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) 151 .Case("blockaddress", MIToken::kw_blockaddress) 152 .Case("target-index", MIToken::kw_target_index) 153 .Default(MIToken::Identifier); 154 } 155 156 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 157 if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') 158 return None; 159 auto Range = C; 160 while (isIdentifierChar(C.peek())) 161 C.advance(); 162 auto Identifier = Range.upto(C); 163 Token = MIToken(getIdentifierKind(Identifier), Identifier); 164 return C; 165 } 166 167 static Cursor maybeLexMachineBasicBlock( 168 Cursor C, MIToken &Token, 169 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 170 if (!C.remaining().startswith("%bb.")) 171 return None; 172 auto Range = C; 173 C.advance(4); // Skip '%bb.' 174 if (!isdigit(C.peek())) { 175 Token = MIToken(MIToken::Error, C.remaining()); 176 ErrorCallback(C.location(), "expected a number after '%bb.'"); 177 return C; 178 } 179 auto NumberRange = C; 180 while (isdigit(C.peek())) 181 C.advance(); 182 StringRef Number = NumberRange.upto(C); 183 unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>' 184 if (C.peek() == '.') { 185 C.advance(); // Skip '.' 186 ++StringOffset; 187 while (isIdentifierChar(C.peek())) 188 C.advance(); 189 } 190 Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), 191 StringOffset); 192 return C; 193 } 194 195 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 196 MIToken::TokenKind Kind) { 197 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 198 return None; 199 auto Range = C; 200 C.advance(Rule.size()); 201 auto NumberRange = C; 202 while (isdigit(C.peek())) 203 C.advance(); 204 Token = MIToken(Kind, Range.upto(C), APSInt(NumberRange.upto(C))); 205 return C; 206 } 207 208 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 209 MIToken::TokenKind Kind) { 210 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 211 return None; 212 auto Range = C; 213 C.advance(Rule.size()); 214 auto NumberRange = C; 215 while (isdigit(C.peek())) 216 C.advance(); 217 StringRef Number = NumberRange.upto(C); 218 unsigned StringOffset = Rule.size() + Number.size(); 219 if (C.peek() == '.') { 220 C.advance(); 221 ++StringOffset; 222 while (isIdentifierChar(C.peek())) 223 C.advance(); 224 } 225 Token = MIToken(Kind, Range.upto(C), APSInt(Number), StringOffset); 226 return C; 227 } 228 229 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 230 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 231 } 232 233 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 234 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 235 } 236 237 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 238 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 239 } 240 241 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 242 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 243 } 244 245 static Cursor maybeLexIRBlock( 246 Cursor C, MIToken &Token, 247 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 248 const StringRef Rule = "%ir-block."; 249 if (!C.remaining().startswith(Rule)) 250 return None; 251 if (isdigit(C.peek(Rule.size()))) 252 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 253 return lexName(C, Token, MIToken::NamedIRBlock, MIToken::QuotedNamedIRBlock, 254 Rule.size(), ErrorCallback); 255 } 256 257 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 258 auto Range = C; 259 C.advance(); // Skip '%' 260 auto NumberRange = C; 261 while (isdigit(C.peek())) 262 C.advance(); 263 Token = MIToken(MIToken::VirtualRegister, Range.upto(C), 264 APSInt(NumberRange.upto(C))); 265 return C; 266 } 267 268 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 269 if (C.peek() != '%') 270 return None; 271 if (isdigit(C.peek(1))) 272 return lexVirtualRegister(C, Token); 273 auto Range = C; 274 C.advance(); // Skip '%' 275 while (isIdentifierChar(C.peek())) 276 C.advance(); 277 Token = MIToken(MIToken::NamedRegister, Range.upto(C), 278 /*StringOffset=*/1); // Drop the '%' 279 return C; 280 } 281 282 static Cursor maybeLexGlobalValue( 283 Cursor C, MIToken &Token, 284 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 285 if (C.peek() != '@') 286 return None; 287 if (!isdigit(C.peek(1))) 288 return lexName(C, Token, MIToken::NamedGlobalValue, 289 MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1, 290 ErrorCallback); 291 auto Range = C; 292 C.advance(1); // Skip the '@' 293 auto NumberRange = C; 294 while (isdigit(C.peek())) 295 C.advance(); 296 Token = 297 MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); 298 return C; 299 } 300 301 static Cursor maybeLexExternalSymbol( 302 Cursor C, MIToken &Token, 303 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 304 if (C.peek() != '$') 305 return None; 306 return lexName(C, Token, MIToken::ExternalSymbol, 307 MIToken::QuotedExternalSymbol, 308 /*PrefixLength=*/1, ErrorCallback); 309 } 310 311 static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { 312 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 313 return None; 314 auto Range = C; 315 C.advance(); 316 while (isdigit(C.peek())) 317 C.advance(); 318 StringRef StrVal = Range.upto(C); 319 Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); 320 return C; 321 } 322 323 static MIToken::TokenKind symbolToken(char C) { 324 switch (C) { 325 case ',': 326 return MIToken::comma; 327 case '=': 328 return MIToken::equal; 329 case ':': 330 return MIToken::colon; 331 case '!': 332 return MIToken::exclaim; 333 case '(': 334 return MIToken::lparen; 335 case ')': 336 return MIToken::rparen; 337 default: 338 return MIToken::Error; 339 } 340 } 341 342 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 343 auto Kind = symbolToken(C.peek()); 344 if (Kind == MIToken::Error) 345 return None; 346 auto Range = C; 347 C.advance(); 348 Token = MIToken(Kind, Range.upto(C)); 349 return C; 350 } 351 352 StringRef llvm::lexMIToken( 353 StringRef Source, MIToken &Token, 354 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 355 auto C = skipWhitespace(Cursor(Source)); 356 if (C.isEOF()) { 357 Token = MIToken(MIToken::Eof, C.remaining()); 358 return C.remaining(); 359 } 360 361 if (Cursor R = maybeLexIdentifier(C, Token)) 362 return R.remaining(); 363 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 364 return R.remaining(); 365 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 366 return R.remaining(); 367 if (Cursor R = maybeLexStackObject(C, Token)) 368 return R.remaining(); 369 if (Cursor R = maybeLexFixedStackObject(C, Token)) 370 return R.remaining(); 371 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 372 return R.remaining(); 373 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 374 return R.remaining(); 375 if (Cursor R = maybeLexRegister(C, Token)) 376 return R.remaining(); 377 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 378 return R.remaining(); 379 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 380 return R.remaining(); 381 if (Cursor R = maybeLexIntegerLiteral(C, Token)) 382 return R.remaining(); 383 if (Cursor R = maybeLexSymbol(C, Token)) 384 return R.remaining(); 385 386 Token = MIToken(MIToken::Error, C.remaining()); 387 ErrorCallback(C.location(), 388 Twine("unexpected character '") + Twine(C.peek()) + "'"); 389 return C.remaining(); 390 } 391