1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringSwitch.h" 17 #include "llvm/ADT/Twine.h" 18 #include <cctype> 19 20 using namespace llvm; 21 22 namespace { 23 24 /// This class provides a way to iterate and get characters from the source 25 /// string. 26 class Cursor { 27 const char *Ptr; 28 const char *End; 29 30 public: 31 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 32 33 explicit Cursor(StringRef Str) { 34 Ptr = Str.data(); 35 End = Ptr + Str.size(); 36 } 37 38 bool isEOF() const { return Ptr == End; } 39 40 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 41 42 void advance(unsigned I = 1) { Ptr += I; } 43 44 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 45 46 StringRef upto(Cursor C) const { 47 assert(C.Ptr >= Ptr && C.Ptr <= End); 48 return StringRef(Ptr, C.Ptr - Ptr); 49 } 50 51 StringRef::iterator location() const { return Ptr; } 52 53 operator bool() const { return Ptr != nullptr; } 54 }; 55 56 } // end anonymous namespace 57 58 /// Skip the leading whitespace characters and return the updated cursor. 59 static Cursor skipWhitespace(Cursor C) { 60 while (isspace(C.peek())) 61 C.advance(); 62 return C; 63 } 64 65 /// Return true if the given character satisfies the following regular 66 /// expression: [-a-zA-Z$._0-9] 67 static bool isIdentifierChar(char C) { 68 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 69 C == '$'; 70 } 71 72 void MIToken::unescapeQuotedStringValue(std::string &Str) const { 73 assert(isStringValueQuoted() && "String value isn't quoted"); 74 StringRef Value = Range.drop_front(StringOffset); 75 assert(Value.front() == '"' && Value.back() == '"'); 76 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 77 78 Str.clear(); 79 Str.reserve(C.remaining().size()); 80 while (!C.isEOF()) { 81 char Char = C.peek(); 82 if (Char == '\\') { 83 if (C.peek(1) == '\\') { 84 // Two '\' become one 85 Str += '\\'; 86 C.advance(2); 87 continue; 88 } 89 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 90 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 91 C.advance(3); 92 continue; 93 } 94 } 95 Str += Char; 96 C.advance(); 97 } 98 } 99 100 /// Lex a string constant using the following regular expression: \"[^\"]*\" 101 static Cursor lexStringConstant( 102 Cursor C, 103 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 104 assert(C.peek() == '"'); 105 for (C.advance(); C.peek() != '"'; C.advance()) { 106 if (C.isEOF()) { 107 ErrorCallback( 108 C.location(), 109 "end of machine instruction reached before the closing '\"'"); 110 return None; 111 } 112 } 113 C.advance(); 114 return C; 115 } 116 117 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 118 return StringSwitch<MIToken::TokenKind>(Identifier) 119 .Case("_", MIToken::underscore) 120 .Case("implicit", MIToken::kw_implicit) 121 .Case("implicit-def", MIToken::kw_implicit_define) 122 .Case("dead", MIToken::kw_dead) 123 .Case("killed", MIToken::kw_killed) 124 .Case("undef", MIToken::kw_undef) 125 .Case("frame-setup", MIToken::kw_frame_setup) 126 .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 127 .Default(MIToken::Identifier); 128 } 129 130 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 131 if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') 132 return None; 133 auto Range = C; 134 while (isIdentifierChar(C.peek())) 135 C.advance(); 136 auto Identifier = Range.upto(C); 137 Token = MIToken(getIdentifierKind(Identifier), Identifier); 138 return C; 139 } 140 141 static Cursor maybeLexMachineBasicBlock( 142 Cursor C, MIToken &Token, 143 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 144 if (!C.remaining().startswith("%bb.")) 145 return None; 146 auto Range = C; 147 C.advance(4); // Skip '%bb.' 148 if (!isdigit(C.peek())) { 149 Token = MIToken(MIToken::Error, C.remaining()); 150 ErrorCallback(C.location(), "expected a number after '%bb.'"); 151 return C; 152 } 153 auto NumberRange = C; 154 while (isdigit(C.peek())) 155 C.advance(); 156 StringRef Number = NumberRange.upto(C); 157 unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>' 158 if (C.peek() == '.') { 159 C.advance(); // Skip '.' 160 ++StringOffset; 161 while (isIdentifierChar(C.peek())) 162 C.advance(); 163 } 164 Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), 165 StringOffset); 166 return C; 167 } 168 169 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 170 MIToken::TokenKind Kind) { 171 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 172 return None; 173 auto Range = C; 174 C.advance(Rule.size()); 175 auto NumberRange = C; 176 while (isdigit(C.peek())) 177 C.advance(); 178 Token = MIToken(Kind, Range.upto(C), APSInt(NumberRange.upto(C))); 179 return C; 180 } 181 182 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 183 MIToken::TokenKind Kind) { 184 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 185 return None; 186 auto Range = C; 187 C.advance(Rule.size()); 188 auto NumberRange = C; 189 while (isdigit(C.peek())) 190 C.advance(); 191 StringRef Number = NumberRange.upto(C); 192 unsigned StringOffset = Rule.size() + Number.size(); 193 if (C.peek() == '.') { 194 C.advance(); 195 ++StringOffset; 196 while (isIdentifierChar(C.peek())) 197 C.advance(); 198 } 199 Token = MIToken(Kind, Range.upto(C), APSInt(Number), StringOffset); 200 return C; 201 } 202 203 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 204 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 205 } 206 207 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 208 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 209 } 210 211 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 212 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 213 } 214 215 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 216 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 217 } 218 219 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 220 auto Range = C; 221 C.advance(); // Skip '%' 222 auto NumberRange = C; 223 while (isdigit(C.peek())) 224 C.advance(); 225 Token = MIToken(MIToken::VirtualRegister, Range.upto(C), 226 APSInt(NumberRange.upto(C))); 227 return C; 228 } 229 230 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 231 if (C.peek() != '%') 232 return None; 233 if (isdigit(C.peek(1))) 234 return lexVirtualRegister(C, Token); 235 auto Range = C; 236 C.advance(); // Skip '%' 237 while (isIdentifierChar(C.peek())) 238 C.advance(); 239 Token = MIToken(MIToken::NamedRegister, Range.upto(C), 240 /*StringOffset=*/1); // Drop the '%' 241 return C; 242 } 243 244 static Cursor lexName( 245 Cursor C, MIToken &Token, MIToken::TokenKind Type, 246 MIToken::TokenKind QuotedType, unsigned PrefixLength, 247 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 248 auto Range = C; 249 C.advance(PrefixLength); 250 if (C.peek() == '"') { 251 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 252 Token = MIToken(QuotedType, Range.upto(R), PrefixLength); 253 return R; 254 } 255 Token = MIToken(MIToken::Error, Range.remaining()); 256 return Range; 257 } 258 while (isIdentifierChar(C.peek())) 259 C.advance(); 260 Token = MIToken(Type, Range.upto(C), PrefixLength); 261 return C; 262 } 263 264 static Cursor maybeLexGlobalValue( 265 Cursor C, MIToken &Token, 266 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 267 if (C.peek() != '@') 268 return None; 269 if (!isdigit(C.peek(1))) 270 return lexName(C, Token, MIToken::NamedGlobalValue, 271 MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1, 272 ErrorCallback); 273 auto Range = C; 274 C.advance(1); // Skip the '@' 275 auto NumberRange = C; 276 while (isdigit(C.peek())) 277 C.advance(); 278 Token = 279 MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); 280 return C; 281 } 282 283 static Cursor maybeLexExternalSymbol( 284 Cursor C, MIToken &Token, 285 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 286 if (C.peek() != '$') 287 return None; 288 return lexName(C, Token, MIToken::ExternalSymbol, 289 MIToken::QuotedExternalSymbol, 290 /*PrefixLength=*/1, ErrorCallback); 291 } 292 293 static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { 294 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 295 return None; 296 auto Range = C; 297 C.advance(); 298 while (isdigit(C.peek())) 299 C.advance(); 300 StringRef StrVal = Range.upto(C); 301 Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); 302 return C; 303 } 304 305 static MIToken::TokenKind symbolToken(char C) { 306 switch (C) { 307 case ',': 308 return MIToken::comma; 309 case '=': 310 return MIToken::equal; 311 case ':': 312 return MIToken::colon; 313 default: 314 return MIToken::Error; 315 } 316 } 317 318 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 319 auto Kind = symbolToken(C.peek()); 320 if (Kind == MIToken::Error) 321 return None; 322 auto Range = C; 323 C.advance(); 324 Token = MIToken(Kind, Range.upto(C)); 325 return C; 326 } 327 328 StringRef llvm::lexMIToken( 329 StringRef Source, MIToken &Token, 330 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 331 auto C = skipWhitespace(Cursor(Source)); 332 if (C.isEOF()) { 333 Token = MIToken(MIToken::Eof, C.remaining()); 334 return C.remaining(); 335 } 336 337 if (Cursor R = maybeLexIdentifier(C, Token)) 338 return R.remaining(); 339 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 340 return R.remaining(); 341 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 342 return R.remaining(); 343 if (Cursor R = maybeLexStackObject(C, Token)) 344 return R.remaining(); 345 if (Cursor R = maybeLexFixedStackObject(C, Token)) 346 return R.remaining(); 347 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 348 return R.remaining(); 349 if (Cursor R = maybeLexRegister(C, Token)) 350 return R.remaining(); 351 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 352 return R.remaining(); 353 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 354 return R.remaining(); 355 if (Cursor R = maybeLexIntegerLiteral(C, Token)) 356 return R.remaining(); 357 if (Cursor R = maybeLexSymbol(C, Token)) 358 return R.remaining(); 359 360 Token = MIToken(MIToken::Error, C.remaining()); 361 ErrorCallback(C.location(), 362 Twine("unexpected character '") + Twine(C.peek()) + "'"); 363 return C.remaining(); 364 } 365