1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringSwitch.h" 16 #include "llvm/ADT/Twine.h" 17 #include <cctype> 18 19 using namespace llvm; 20 21 namespace { 22 23 /// This class provides a way to iterate and get characters from the source 24 /// string. 25 class Cursor { 26 const char *Ptr; 27 const char *End; 28 29 public: 30 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 31 32 explicit Cursor(StringRef Str) { 33 Ptr = Str.data(); 34 End = Ptr + Str.size(); 35 } 36 37 bool isEOF() const { return Ptr == End; } 38 39 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 40 41 void advance(unsigned I = 1) { Ptr += I; } 42 43 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 44 45 StringRef upto(Cursor C) const { 46 assert(C.Ptr >= Ptr && C.Ptr <= End); 47 return StringRef(Ptr, C.Ptr - Ptr); 48 } 49 50 StringRef::iterator location() const { return Ptr; } 51 52 operator bool() const { return Ptr != nullptr; } 53 }; 54 55 } // end anonymous namespace 56 57 /// Skip the leading whitespace characters and return the updated cursor. 58 static Cursor skipWhitespace(Cursor C) { 59 while (isspace(C.peek())) 60 C.advance(); 61 return C; 62 } 63 64 static bool isIdentifierChar(char C) { 65 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; 66 } 67 68 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 69 return StringSwitch<MIToken::TokenKind>(Identifier) 70 .Case("_", MIToken::underscore) 71 .Case("implicit", MIToken::kw_implicit) 72 .Case("implicit-def", MIToken::kw_implicit_define) 73 .Case("dead", MIToken::kw_dead) 74 .Case("killed", MIToken::kw_killed) 75 .Case("undef", MIToken::kw_undef) 76 .Default(MIToken::Identifier); 77 } 78 79 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 80 if (!isalpha(C.peek()) && C.peek() != '_') 81 return None; 82 auto Range = C; 83 while (isIdentifierChar(C.peek())) 84 C.advance(); 85 auto Identifier = Range.upto(C); 86 Token = MIToken(getIdentifierKind(Identifier), Identifier); 87 return C; 88 } 89 90 static Cursor maybeLexMachineBasicBlock( 91 Cursor C, MIToken &Token, 92 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 93 if (!C.remaining().startswith("%bb.")) 94 return None; 95 auto Range = C; 96 C.advance(4); // Skip '%bb.' 97 if (!isdigit(C.peek())) { 98 Token = MIToken(MIToken::Error, C.remaining()); 99 ErrorCallback(C.location(), "expected a number after '%bb.'"); 100 return C; 101 } 102 auto NumberRange = C; 103 while (isdigit(C.peek())) 104 C.advance(); 105 StringRef Number = NumberRange.upto(C); 106 unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>' 107 if (C.peek() == '.') { 108 C.advance(); // Skip '.' 109 ++StringOffset; 110 while (isIdentifierChar(C.peek())) 111 C.advance(); 112 } 113 Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), 114 StringOffset); 115 return C; 116 } 117 118 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 119 auto Range = C; 120 C.advance(); // Skip '%' 121 auto NumberRange = C; 122 while (isdigit(C.peek())) 123 C.advance(); 124 Token = MIToken(MIToken::VirtualRegister, Range.upto(C), 125 APSInt(NumberRange.upto(C))); 126 return C; 127 } 128 129 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 130 if (C.peek() != '%') 131 return None; 132 if (isdigit(C.peek(1))) 133 return lexVirtualRegister(C, Token); 134 auto Range = C; 135 C.advance(); // Skip '%' 136 while (isIdentifierChar(C.peek())) 137 C.advance(); 138 Token = MIToken(MIToken::NamedRegister, Range.upto(C), 139 /*StringOffset=*/1); // Drop the '%' 140 return C; 141 } 142 143 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) { 144 if (C.peek() != '@') 145 return None; 146 auto Range = C; 147 C.advance(); // Skip the '@' 148 // TODO: add support for quoted names. 149 if (!isdigit(C.peek())) { 150 while (isIdentifierChar(C.peek())) 151 C.advance(); 152 Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C), 153 /*StringOffset=*/1); // Drop the '@' 154 return C; 155 } 156 auto NumberRange = C; 157 while (isdigit(C.peek())) 158 C.advance(); 159 Token = 160 MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); 161 return C; 162 } 163 164 static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { 165 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 166 return None; 167 auto Range = C; 168 C.advance(); 169 while (isdigit(C.peek())) 170 C.advance(); 171 StringRef StrVal = Range.upto(C); 172 Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); 173 return C; 174 } 175 176 static MIToken::TokenKind symbolToken(char C) { 177 switch (C) { 178 case ',': 179 return MIToken::comma; 180 case '=': 181 return MIToken::equal; 182 case ':': 183 return MIToken::colon; 184 default: 185 return MIToken::Error; 186 } 187 } 188 189 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 190 auto Kind = symbolToken(C.peek()); 191 if (Kind == MIToken::Error) 192 return None; 193 auto Range = C; 194 C.advance(); 195 Token = MIToken(Kind, Range.upto(C)); 196 return C; 197 } 198 199 StringRef llvm::lexMIToken( 200 StringRef Source, MIToken &Token, 201 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 202 auto C = skipWhitespace(Cursor(Source)); 203 if (C.isEOF()) { 204 Token = MIToken(MIToken::Eof, C.remaining()); 205 return C.remaining(); 206 } 207 208 if (Cursor R = maybeLexIdentifier(C, Token)) 209 return R.remaining(); 210 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 211 return R.remaining(); 212 if (Cursor R = maybeLexRegister(C, Token)) 213 return R.remaining(); 214 if (Cursor R = maybeLexGlobalValue(C, Token)) 215 return R.remaining(); 216 if (Cursor R = maybeLexIntegerLiteral(C, Token)) 217 return R.remaining(); 218 if (Cursor R = maybeLexSymbol(C, Token)) 219 return R.remaining(); 220 221 Token = MIToken(MIToken::Error, C.remaining()); 222 ErrorCallback(C.location(), 223 Twine("unexpected character '") + Twine(C.peek()) + "'"); 224 return C.remaining(); 225 } 226