1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/StringSwitch.h" 16 #include "llvm/ADT/Twine.h" 17 #include <cctype> 18 19 using namespace llvm; 20 21 namespace { 22 23 /// This class provides a way to iterate and get characters from the source 24 /// string. 25 class Cursor { 26 const char *Ptr; 27 const char *End; 28 29 public: 30 Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} 31 32 explicit Cursor(StringRef Str) { 33 Ptr = Str.data(); 34 End = Ptr + Str.size(); 35 } 36 37 bool isEOF() const { return Ptr == End; } 38 39 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 40 41 void advance(unsigned I = 1) { Ptr += I; } 42 43 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 44 45 StringRef upto(Cursor C) const { 46 assert(C.Ptr >= Ptr && C.Ptr <= End); 47 return StringRef(Ptr, C.Ptr - Ptr); 48 } 49 50 StringRef::iterator location() const { return Ptr; } 51 52 operator bool() const { return Ptr != nullptr; } 53 }; 54 55 } // end anonymous namespace 56 57 /// Skip the leading whitespace characters and return the updated cursor. 58 static Cursor skipWhitespace(Cursor C) { 59 while (isspace(C.peek())) 60 C.advance(); 61 return C; 62 } 63 64 static bool isIdentifierChar(char C) { 65 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; 66 } 67 68 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 69 return StringSwitch<MIToken::TokenKind>(Identifier) 70 .Case("_", MIToken::underscore) 71 .Case("implicit", MIToken::kw_implicit) 72 .Case("implicit-def", MIToken::kw_implicit_define) 73 .Case("dead", MIToken::kw_dead) 74 .Case("killed", MIToken::kw_killed) 75 .Case("undef", MIToken::kw_undef) 76 .Default(MIToken::Identifier); 77 } 78 79 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 80 if (!isalpha(C.peek()) && C.peek() != '_') 81 return None; 82 auto Range = C; 83 while (isIdentifierChar(C.peek())) 84 C.advance(); 85 auto Identifier = Range.upto(C); 86 Token = MIToken(getIdentifierKind(Identifier), Identifier); 87 return C; 88 } 89 90 static Cursor maybeLexMachineBasicBlock( 91 Cursor C, MIToken &Token, 92 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 93 if (!C.remaining().startswith("%bb.")) 94 return None; 95 auto Range = C; 96 C.advance(4); // Skip '%bb.' 97 if (!isdigit(C.peek())) { 98 Token = MIToken(MIToken::Error, C.remaining()); 99 ErrorCallback(C.location(), "expected a number after '%bb.'"); 100 return C; 101 } 102 auto NumberRange = C; 103 while (isdigit(C.peek())) 104 C.advance(); 105 StringRef Number = NumberRange.upto(C); 106 unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>' 107 if (C.peek() == '.') { 108 C.advance(); // Skip '.' 109 ++StringOffset; 110 while (isIdentifierChar(C.peek())) 111 C.advance(); 112 } 113 Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), 114 StringOffset); 115 return C; 116 } 117 118 static Cursor maybeLexRegister(Cursor C, MIToken &Token) { 119 if (C.peek() != '%') 120 return None; 121 auto Range = C; 122 C.advance(); // Skip '%' 123 while (isIdentifierChar(C.peek())) 124 C.advance(); 125 Token = MIToken(MIToken::NamedRegister, Range.upto(C), 126 /*StringOffset=*/1); // Drop the '%' 127 return C; 128 } 129 130 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) { 131 if (C.peek() != '@') 132 return None; 133 auto Range = C; 134 C.advance(); // Skip the '@' 135 // TODO: add support for quoted names. 136 if (!isdigit(C.peek())) { 137 while (isIdentifierChar(C.peek())) 138 C.advance(); 139 Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C), 140 /*StringOffset=*/1); // Drop the '@' 141 return C; 142 } 143 auto NumberRange = C; 144 while (isdigit(C.peek())) 145 C.advance(); 146 Token = 147 MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); 148 return C; 149 } 150 151 static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { 152 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 153 return None; 154 auto Range = C; 155 C.advance(); 156 while (isdigit(C.peek())) 157 C.advance(); 158 StringRef StrVal = Range.upto(C); 159 Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); 160 return C; 161 } 162 163 static MIToken::TokenKind symbolToken(char C) { 164 switch (C) { 165 case ',': 166 return MIToken::comma; 167 case '=': 168 return MIToken::equal; 169 default: 170 return MIToken::Error; 171 } 172 } 173 174 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 175 auto Kind = symbolToken(C.peek()); 176 if (Kind == MIToken::Error) 177 return None; 178 auto Range = C; 179 C.advance(); 180 Token = MIToken(Kind, Range.upto(C)); 181 return C; 182 } 183 184 StringRef llvm::lexMIToken( 185 StringRef Source, MIToken &Token, 186 function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { 187 auto C = skipWhitespace(Cursor(Source)); 188 if (C.isEOF()) { 189 Token = MIToken(MIToken::Eof, C.remaining()); 190 return C.remaining(); 191 } 192 193 if (Cursor R = maybeLexIdentifier(C, Token)) 194 return R.remaining(); 195 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 196 return R.remaining(); 197 if (Cursor R = maybeLexRegister(C, Token)) 198 return R.remaining(); 199 if (Cursor R = maybeLexGlobalValue(C, Token)) 200 return R.remaining(); 201 if (Cursor R = maybeLexIntegerLiteral(C, Token)) 202 return R.remaining(); 203 if (Cursor R = maybeLexSymbol(C, Token)) 204 return R.remaining(); 205 206 Token = MIToken(MIToken::Error, C.remaining()); 207 ErrorCallback(C.location(), 208 Twine("unexpected character '") + Twine(C.peek()) + "'"); 209 return C.remaining(); 210 } 211