1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringSwitch.h" 17 #include "llvm/ADT/Twine.h" 18 #include <cctype> 19 20 using namespace llvm; 21 22 namespace { 23 24 /// This class provides a way to iterate and get characters from the source 25 /// string. 26 class Cursor { 27 const char *Ptr; 28 const char *End; 29 30 public: 31 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 32 33 explicit Cursor(StringRef Str) { 34 Ptr = Str.data(); 35 End = Ptr + Str.size(); 36 } 37 38 bool isEOF() const { return Ptr == End; } 39 40 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 41 42 void advance(unsigned I = 1) { Ptr += I; } 43 44 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 45 46 StringRef upto(Cursor C) const { 47 assert(C.Ptr >= Ptr && C.Ptr <= End); 48 return StringRef(Ptr, C.Ptr - Ptr); 49 } 50 51 StringRef::iterator location() const { return Ptr; } 52 53 operator bool() const { return Ptr != nullptr; } 54 }; 55 56 } // end anonymous namespace 57 58 /// Skip the leading whitespace characters and return the updated cursor. 59 static Cursor skipWhitespace(Cursor C) { 60 while (isspace(C.peek())) 61 C.advance(); 62 return C; 63 } 64 65 /// Return true if the given character satisfies the following regular 66 /// expression: [-a-zA-Z$._0-9] 67 static bool isIdentifierChar(char C) { 68 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 69 C == '$'; 70 } 71 72 void MIToken::unescapeQuotedStringValue(std::string &Str) const { 73 assert(isStringValueQuoted() && "String value isn't quoted"); 74 StringRef Value = Range.drop_front(StringOffset); 75 assert(Value.front() == '"' && Value.back() == '"'); 76 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 77 78 Str.clear(); 79 Str.reserve(C.remaining().size()); 80 while (!C.isEOF()) { 81 char Char = C.peek(); 82 if (Char == '\\') { 83 if (C.peek(1) == '\\') { 84 // Two '\' become one 85 Str += '\\'; 86 C.advance(2); 87 continue; 88 } 89 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 90 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 91 C.advance(3); 92 continue; 93 } 94 } 95 Str += Char; 96 C.advance(); 97 } 98 } 99 100 /// Lex a string constant using the following regular expression: \"[^\"]*\" 101 static Cursor lexStringConstant( 102 Cursor C, 103 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 104 assert(C.peek() == '"'); 105 for (C.advance(); C.peek() != '"'; C.advance()) { 106 if (C.isEOF()) { 107 ErrorCallback( 108 C.location(), 109 "end of machine instruction reached before the closing '\"'"); 110 return None; 111 } 112 } 113 C.advance(); 114 return C; 115 } 116 117 static Cursor lexName( 118 Cursor C, MIToken &Token, MIToken::TokenKind Type, 119 MIToken::TokenKind QuotedType, unsigned PrefixLength, 120 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 121 auto Range = C; 122 C.advance(PrefixLength); 123 if (C.peek() == '"') { 124 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 125 Token = MIToken(QuotedType, Range.upto(R), PrefixLength); 126 return R; 127 } 128 Token = MIToken(MIToken::Error, Range.remaining()); 129 return Range; 130 } 131 while (isIdentifierChar(C.peek())) 132 C.advance(); 133 Token = MIToken(Type, Range.upto(C), PrefixLength); 134 return C; 135 } 136 137 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 138 return StringSwitch<MIToken::TokenKind>(Identifier) 139 .Case("_", MIToken::underscore) 140 .Case("implicit", MIToken::kw_implicit) 141 .Case("implicit-def", MIToken::kw_implicit_define) 142 .Case("dead", MIToken::kw_dead) 143 .Case("killed", MIToken::kw_killed) 144 .Case("undef", MIToken::kw_undef) 145 .Case("frame-setup", MIToken::kw_frame_setup) 146 .Case("debug-location", MIToken::kw_debug_location) 147 .Case(".cfi_offset", MIToken::kw_cfi_offset) 148 .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) 149 .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 150 .Case("blockaddress", MIToken::kw_blockaddress) 151 .Case("target-index", MIToken::kw_target_index) 152 .Default(MIToken::Identifier); 153 } 154 155 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 156 if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') 157 return None; 158 auto Range = C; 159 while (isIdentifierChar(C.peek())) 160 C.advance(); 161 auto Identifier = Range.upto(C); 162 Token = MIToken(getIdentifierKind(Identifier), Identifier); 163 return C; 164 } 165 166 static Cursor maybeLexMachineBasicBlock( 167 Cursor C, MIToken &Token, 168 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 169 if (!C.remaining().startswith("%bb.")) 170 return None; 171 auto Range = C; 172 C.advance(4); // Skip '%bb.' 173 if (!isdigit(C.peek())) { 174 Token = MIToken(MIToken::Error, C.remaining()); 175 ErrorCallback(C.location(), "expected a number after '%bb.'"); 176 return C; 177 } 178 auto NumberRange = C; 179 while (isdigit(C.peek())) 180 C.advance(); 181 StringRef Number = NumberRange.upto(C); 182 unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>' 183 if (C.peek() == '.') { 184 C.advance(); // Skip '.' 185 ++StringOffset; 186 while (isIdentifierChar(C.peek())) 187 C.advance(); 188 } 189 Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), 190 StringOffset); 191 return C; 192 } 193 194 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 195 MIToken::TokenKind Kind) { 196 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 197 return None; 198 auto Range = C; 199 C.advance(Rule.size()); 200 auto NumberRange = C; 201 while (isdigit(C.peek())) 202 C.advance(); 203 Token = MIToken(Kind, Range.upto(C), APSInt(NumberRange.upto(C))); 204 return C; 205 } 206 207 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 208 MIToken::TokenKind Kind) { 209 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 210 return None; 211 auto Range = C; 212 C.advance(Rule.size()); 213 auto NumberRange = C; 214 while (isdigit(C.peek())) 215 C.advance(); 216 StringRef Number = NumberRange.upto(C); 217 unsigned StringOffset = Rule.size() + Number.size(); 218 if (C.peek() == '.') { 219 C.advance(); 220 ++StringOffset; 221 while (isIdentifierChar(C.peek())) 222 C.advance(); 223 } 224 Token = MIToken(Kind, Range.upto(C), APSInt(Number), StringOffset); 225 return C; 226 } 227 228 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 229 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 230 } 231 232 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 233 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 234 } 235 236 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 237 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 238 } 239 240 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 241 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 242 } 243 244 static Cursor maybeLexIRBlock( 245 Cursor C, MIToken &Token, 246 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 247 const StringRef Rule = "%ir-block."; 248 if (!C.remaining().startswith(Rule)) 249 return None; 250 if (isdigit(C.peek(Rule.size()))) 251 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 252 return lexName(C, Token, MIToken::NamedIRBlock, MIToken::QuotedNamedIRBlock, 253 Rule.size(), ErrorCallback); 254 } 255 256 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 257 auto Range = C; 258 C.advance(); // Skip '%' 259 auto NumberRange = C; 260 while (isdigit(C.peek())) 261 C.advance(); 262 Token = MIToken(MIToken::VirtualRegister, Range.upto(C), 263 APSInt(NumberRange.upto(C))); 264 return C; 265 } 266 267 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 268 if (C.peek() != '%') 269 return None; 270 if (isdigit(C.peek(1))) 271 return lexVirtualRegister(C, Token); 272 auto Range = C; 273 C.advance(); // Skip '%' 274 while (isIdentifierChar(C.peek())) 275 C.advance(); 276 Token = MIToken(MIToken::NamedRegister, Range.upto(C), 277 /*StringOffset=*/1); // Drop the '%' 278 return C; 279 } 280 281 static Cursor maybeLexGlobalValue( 282 Cursor C, MIToken &Token, 283 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 284 if (C.peek() != '@') 285 return None; 286 if (!isdigit(C.peek(1))) 287 return lexName(C, Token, MIToken::NamedGlobalValue, 288 MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1, 289 ErrorCallback); 290 auto Range = C; 291 C.advance(1); // Skip the '@' 292 auto NumberRange = C; 293 while (isdigit(C.peek())) 294 C.advance(); 295 Token = 296 MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); 297 return C; 298 } 299 300 static Cursor maybeLexExternalSymbol( 301 Cursor C, MIToken &Token, 302 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 303 if (C.peek() != '$') 304 return None; 305 return lexName(C, Token, MIToken::ExternalSymbol, 306 MIToken::QuotedExternalSymbol, 307 /*PrefixLength=*/1, ErrorCallback); 308 } 309 310 static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { 311 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 312 return None; 313 auto Range = C; 314 C.advance(); 315 while (isdigit(C.peek())) 316 C.advance(); 317 StringRef StrVal = Range.upto(C); 318 Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); 319 return C; 320 } 321 322 static MIToken::TokenKind symbolToken(char C) { 323 switch (C) { 324 case ',': 325 return MIToken::comma; 326 case '=': 327 return MIToken::equal; 328 case ':': 329 return MIToken::colon; 330 case '!': 331 return MIToken::exclaim; 332 case '(': 333 return MIToken::lparen; 334 case ')': 335 return MIToken::rparen; 336 default: 337 return MIToken::Error; 338 } 339 } 340 341 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 342 auto Kind = symbolToken(C.peek()); 343 if (Kind == MIToken::Error) 344 return None; 345 auto Range = C; 346 C.advance(); 347 Token = MIToken(Kind, Range.upto(C)); 348 return C; 349 } 350 351 StringRef llvm::lexMIToken( 352 StringRef Source, MIToken &Token, 353 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 354 auto C = skipWhitespace(Cursor(Source)); 355 if (C.isEOF()) { 356 Token = MIToken(MIToken::Eof, C.remaining()); 357 return C.remaining(); 358 } 359 360 if (Cursor R = maybeLexIdentifier(C, Token)) 361 return R.remaining(); 362 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 363 return R.remaining(); 364 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 365 return R.remaining(); 366 if (Cursor R = maybeLexStackObject(C, Token)) 367 return R.remaining(); 368 if (Cursor R = maybeLexFixedStackObject(C, Token)) 369 return R.remaining(); 370 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 371 return R.remaining(); 372 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 373 return R.remaining(); 374 if (Cursor R = maybeLexRegister(C, Token)) 375 return R.remaining(); 376 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 377 return R.remaining(); 378 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 379 return R.remaining(); 380 if (Cursor R = maybeLexIntegerLiteral(C, Token)) 381 return R.remaining(); 382 if (Cursor R = maybeLexSymbol(C, Token)) 383 return R.remaining(); 384 385 Token = MIToken(MIToken::Error, C.remaining()); 386 ErrorCallback(C.location(), 387 Twine("unexpected character '") + Twine(C.peek()) + "'"); 388 return C.remaining(); 389 } 390