1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringSwitch.h" 16 #include "llvm/ADT/Twine.h" 17 #include <cctype> 18 19 using namespace llvm; 20 21 namespace { 22 23 /// This class provides a way to iterate and get characters from the source 24 /// string. 25 class Cursor { 26 const char *Ptr; 27 const char *End; 28 29 public: 30 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 31 32 explicit Cursor(StringRef Str) { 33 Ptr = Str.data(); 34 End = Ptr + Str.size(); 35 } 36 37 bool isEOF() const { return Ptr == End; } 38 39 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 40 41 void advance(unsigned I = 1) { Ptr += I; } 42 43 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 44 45 StringRef upto(Cursor C) const { 46 assert(C.Ptr >= Ptr && C.Ptr <= End); 47 return StringRef(Ptr, C.Ptr - Ptr); 48 } 49 50 StringRef::iterator location() const { return Ptr; } 51 52 operator bool() const { return Ptr != nullptr; } 53 }; 54 55 } // end anonymous namespace 56 57 /// Skip the leading whitespace characters and return the updated cursor. 58 static Cursor skipWhitespace(Cursor C) { 59 while (isspace(C.peek())) 60 C.advance(); 61 return C; 62 } 63 64 static bool isIdentifierChar(char C) { 65 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; 66 } 67 68 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 69 return StringSwitch<MIToken::TokenKind>(Identifier) 70 .Case("_", MIToken::underscore) 71 .Case("implicit", MIToken::kw_implicit) 72 .Case("implicit-def", MIToken::kw_implicit_define) 73 .Case("dead", MIToken::kw_dead) 74 .Case("killed", MIToken::kw_killed) 75 .Case("undef", MIToken::kw_undef) 76 .Case("frame-setup", MIToken::kw_frame_setup) 77 .Default(MIToken::Identifier); 78 } 79 80 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 81 if (!isalpha(C.peek()) && C.peek() != '_') 82 return None; 83 auto Range = C; 84 while (isIdentifierChar(C.peek())) 85 C.advance(); 86 auto Identifier = Range.upto(C); 87 Token = MIToken(getIdentifierKind(Identifier), Identifier); 88 return C; 89 } 90 91 static Cursor maybeLexMachineBasicBlock( 92 Cursor C, MIToken &Token, 93 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 94 if (!C.remaining().startswith("%bb.")) 95 return None; 96 auto Range = C; 97 C.advance(4); // Skip '%bb.' 98 if (!isdigit(C.peek())) { 99 Token = MIToken(MIToken::Error, C.remaining()); 100 ErrorCallback(C.location(), "expected a number after '%bb.'"); 101 return C; 102 } 103 auto NumberRange = C; 104 while (isdigit(C.peek())) 105 C.advance(); 106 StringRef Number = NumberRange.upto(C); 107 unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>' 108 if (C.peek() == '.') { 109 C.advance(); // Skip '.' 110 ++StringOffset; 111 while (isIdentifierChar(C.peek())) 112 C.advance(); 113 } 114 Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), 115 StringOffset); 116 return C; 117 } 118 119 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 120 MIToken::TokenKind Kind) { 121 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 122 return None; 123 auto Range = C; 124 C.advance(Rule.size()); 125 auto NumberRange = C; 126 while (isdigit(C.peek())) 127 C.advance(); 128 Token = MIToken(Kind, Range.upto(C), APSInt(NumberRange.upto(C))); 129 return C; 130 } 131 132 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 133 MIToken::TokenKind Kind) { 134 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 135 return None; 136 auto Range = C; 137 C.advance(Rule.size()); 138 auto NumberRange = C; 139 while (isdigit(C.peek())) 140 C.advance(); 141 StringRef Number = NumberRange.upto(C); 142 unsigned StringOffset = Rule.size() + Number.size(); 143 if (C.peek() == '.') { 144 C.advance(); 145 ++StringOffset; 146 while (isIdentifierChar(C.peek())) 147 C.advance(); 148 } 149 Token = MIToken(Kind, Range.upto(C), APSInt(Number), StringOffset); 150 return C; 151 } 152 153 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 154 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 155 } 156 157 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 158 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 159 } 160 161 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 162 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 163 } 164 165 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 166 auto Range = C; 167 C.advance(); // Skip '%' 168 auto NumberRange = C; 169 while (isdigit(C.peek())) 170 C.advance(); 171 Token = MIToken(MIToken::VirtualRegister, Range.upto(C), 172 APSInt(NumberRange.upto(C))); 173 return C; 174 } 175 176 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 177 if (C.peek() != '%') 178 return None; 179 if (isdigit(C.peek(1))) 180 return lexVirtualRegister(C, Token); 181 auto Range = C; 182 C.advance(); // Skip '%' 183 while (isIdentifierChar(C.peek())) 184 C.advance(); 185 Token = MIToken(MIToken::NamedRegister, Range.upto(C), 186 /*StringOffset=*/1); // Drop the '%' 187 return C; 188 } 189 190 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) { 191 if (C.peek() != '@') 192 return None; 193 auto Range = C; 194 C.advance(); // Skip the '@' 195 // TODO: add support for quoted names. 196 if (!isdigit(C.peek())) { 197 while (isIdentifierChar(C.peek())) 198 C.advance(); 199 Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C), 200 /*StringOffset=*/1); // Drop the '@' 201 return C; 202 } 203 auto NumberRange = C; 204 while (isdigit(C.peek())) 205 C.advance(); 206 Token = 207 MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); 208 return C; 209 } 210 211 static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { 212 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 213 return None; 214 auto Range = C; 215 C.advance(); 216 while (isdigit(C.peek())) 217 C.advance(); 218 StringRef StrVal = Range.upto(C); 219 Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); 220 return C; 221 } 222 223 static MIToken::TokenKind symbolToken(char C) { 224 switch (C) { 225 case ',': 226 return MIToken::comma; 227 case '=': 228 return MIToken::equal; 229 case ':': 230 return MIToken::colon; 231 default: 232 return MIToken::Error; 233 } 234 } 235 236 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 237 auto Kind = symbolToken(C.peek()); 238 if (Kind == MIToken::Error) 239 return None; 240 auto Range = C; 241 C.advance(); 242 Token = MIToken(Kind, Range.upto(C)); 243 return C; 244 } 245 246 StringRef llvm::lexMIToken( 247 StringRef Source, MIToken &Token, 248 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 249 auto C = skipWhitespace(Cursor(Source)); 250 if (C.isEOF()) { 251 Token = MIToken(MIToken::Eof, C.remaining()); 252 return C.remaining(); 253 } 254 255 if (Cursor R = maybeLexIdentifier(C, Token)) 256 return R.remaining(); 257 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 258 return R.remaining(); 259 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 260 return R.remaining(); 261 if (Cursor R = maybeLexStackObject(C, Token)) 262 return R.remaining(); 263 if (Cursor R = maybeLexFixedStackObject(C, Token)) 264 return R.remaining(); 265 if (Cursor R = maybeLexRegister(C, Token)) 266 return R.remaining(); 267 if (Cursor R = maybeLexGlobalValue(C, Token)) 268 return R.remaining(); 269 if (Cursor R = maybeLexIntegerLiteral(C, Token)) 270 return R.remaining(); 271 if (Cursor R = maybeLexSymbol(C, Token)) 272 return R.remaining(); 273 274 Token = MIToken(MIToken::Error, C.remaining()); 275 ErrorCallback(C.location(), 276 Twine("unexpected character '") + Twine(C.peek()) + "'"); 277 return C.remaining(); 278 } 279