1 //===- MILexer.cpp - Machine instructions lexer implementation ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the lexing of machine instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MILexer.h"
15 #include "llvm/ADT/None.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include <cctype>
20 
21 using namespace llvm;
22 
23 namespace {
24 
25 typedef function_ref<void(StringRef::iterator Loc, const Twine &)>
26     ErrorCallbackType;
27 
28 /// This class provides a way to iterate and get characters from the source
29 /// string.
30 class Cursor {
31   const char *Ptr;
32   const char *End;
33 
34 public:
35   Cursor(NoneType) : Ptr(nullptr), End(nullptr) {}
36 
37   explicit Cursor(StringRef Str) {
38     Ptr = Str.data();
39     End = Ptr + Str.size();
40   }
41 
42   bool isEOF() const { return Ptr == End; }
43 
44   char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
45 
46   void advance(unsigned I = 1) { Ptr += I; }
47 
48   StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
49 
50   StringRef upto(Cursor C) const {
51     assert(C.Ptr >= Ptr && C.Ptr <= End);
52     return StringRef(Ptr, C.Ptr - Ptr);
53   }
54 
55   StringRef::iterator location() const { return Ptr; }
56 
57   operator bool() const { return Ptr != nullptr; }
58 };
59 
60 } // end anonymous namespace
61 
62 MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
63   this->Kind = Kind;
64   this->Range = Range;
65   return *this;
66 }
67 
68 MIToken &MIToken::setStringValue(StringRef StrVal) {
69   StringValue = StrVal;
70   return *this;
71 }
72 
73 MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
74   StringValueStorage = std::move(StrVal);
75   StringValue = StringValueStorage;
76   return *this;
77 }
78 
79 MIToken &MIToken::setIntegerValue(APSInt IntVal) {
80   this->IntVal = std::move(IntVal);
81   return *this;
82 }
83 
84 /// Skip the leading whitespace characters and return the updated cursor.
85 static Cursor skipWhitespace(Cursor C) {
86   while (isblank(C.peek()))
87     C.advance();
88   return C;
89 }
90 
91 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
92 
93 /// Skip a line comment and return the updated cursor.
94 static Cursor skipComment(Cursor C) {
95   if (C.peek() != ';')
96     return C;
97   while (!isNewlineChar(C.peek()) && !C.isEOF())
98     C.advance();
99   return C;
100 }
101 
102 /// Return true if the given character satisfies the following regular
103 /// expression: [-a-zA-Z$._0-9]
104 static bool isIdentifierChar(char C) {
105   return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
106          C == '$';
107 }
108 
109 /// Unescapes the given string value.
110 ///
111 /// Expects the string value to be quoted.
112 static std::string unescapeQuotedString(StringRef Value) {
113   assert(Value.front() == '"' && Value.back() == '"');
114   Cursor C = Cursor(Value.substr(1, Value.size() - 2));
115 
116   std::string Str;
117   Str.reserve(C.remaining().size());
118   while (!C.isEOF()) {
119     char Char = C.peek();
120     if (Char == '\\') {
121       if (C.peek(1) == '\\') {
122         // Two '\' become one
123         Str += '\\';
124         C.advance(2);
125         continue;
126       }
127       if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
128         Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
129         C.advance(3);
130         continue;
131       }
132     }
133     Str += Char;
134     C.advance();
135   }
136   return Str;
137 }
138 
139 /// Lex a string constant using the following regular expression: \"[^\"]*\"
140 static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
141   assert(C.peek() == '"');
142   for (C.advance(); C.peek() != '"'; C.advance()) {
143     if (C.isEOF() || isNewlineChar(C.peek())) {
144       ErrorCallback(
145           C.location(),
146           "end of machine instruction reached before the closing '\"'");
147       return None;
148     }
149   }
150   C.advance();
151   return C;
152 }
153 
154 static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
155                       unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
156   auto Range = C;
157   C.advance(PrefixLength);
158   if (C.peek() == '"') {
159     if (Cursor R = lexStringConstant(C, ErrorCallback)) {
160       StringRef String = Range.upto(R);
161       Token.reset(Type, String)
162           .setOwnedStringValue(
163               unescapeQuotedString(String.drop_front(PrefixLength)));
164       return R;
165     }
166     Token.reset(MIToken::Error, Range.remaining());
167     return Range;
168   }
169   while (isIdentifierChar(C.peek()))
170     C.advance();
171   Token.reset(Type, Range.upto(C))
172       .setStringValue(Range.upto(C).drop_front(PrefixLength));
173   return C;
174 }
175 
176 static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) {
177   if ((C.peek() != 'i' && C.peek() != 's') || !isdigit(C.peek(1)))
178     return None;
179   char Kind = C.peek();
180   auto Range = C;
181   C.advance(); // Skip 'i'
182   while (isdigit(C.peek()))
183     C.advance();
184   Token.reset(Kind == 'i' ? MIToken::IntegerType : MIToken::ScalarType,
185               Range.upto(C));
186   return C;
187 }
188 
189 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
190   return StringSwitch<MIToken::TokenKind>(Identifier)
191       .Case("_", MIToken::underscore)
192       .Case("implicit", MIToken::kw_implicit)
193       .Case("implicit-def", MIToken::kw_implicit_define)
194       .Case("def", MIToken::kw_def)
195       .Case("dead", MIToken::kw_dead)
196       .Case("killed", MIToken::kw_killed)
197       .Case("undef", MIToken::kw_undef)
198       .Case("internal", MIToken::kw_internal)
199       .Case("early-clobber", MIToken::kw_early_clobber)
200       .Case("debug-use", MIToken::kw_debug_use)
201       .Case("tied-def", MIToken::kw_tied_def)
202       .Case("frame-setup", MIToken::kw_frame_setup)
203       .Case("debug-location", MIToken::kw_debug_location)
204       .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
205       .Case(".cfi_offset", MIToken::kw_cfi_offset)
206       .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
207       .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
208       .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
209       .Case("blockaddress", MIToken::kw_blockaddress)
210       .Case("target-index", MIToken::kw_target_index)
211       .Case("half", MIToken::kw_half)
212       .Case("float", MIToken::kw_float)
213       .Case("double", MIToken::kw_double)
214       .Case("x86_fp80", MIToken::kw_x86_fp80)
215       .Case("fp128", MIToken::kw_fp128)
216       .Case("ppc_fp128", MIToken::kw_ppc_fp128)
217       .Case("target-flags", MIToken::kw_target_flags)
218       .Case("volatile", MIToken::kw_volatile)
219       .Case("non-temporal", MIToken::kw_non_temporal)
220       .Case("invariant", MIToken::kw_invariant)
221       .Case("align", MIToken::kw_align)
222       .Case("stack", MIToken::kw_stack)
223       .Case("got", MIToken::kw_got)
224       .Case("jump-table", MIToken::kw_jump_table)
225       .Case("constant-pool", MIToken::kw_constant_pool)
226       .Case("call-entry", MIToken::kw_call_entry)
227       .Case("liveout", MIToken::kw_liveout)
228       .Case("address-taken", MIToken::kw_address_taken)
229       .Case("landing-pad", MIToken::kw_landing_pad)
230       .Case("liveins", MIToken::kw_liveins)
231       .Case("successors", MIToken::kw_successors)
232       .Default(MIToken::Identifier);
233 }
234 
235 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
236   if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
237     return None;
238   auto Range = C;
239   while (isIdentifierChar(C.peek()))
240     C.advance();
241   auto Identifier = Range.upto(C);
242   Token.reset(getIdentifierKind(Identifier), Identifier)
243       .setStringValue(Identifier);
244   return C;
245 }
246 
247 static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
248                                         ErrorCallbackType ErrorCallback) {
249   bool IsReference = C.remaining().startswith("%bb.");
250   if (!IsReference && !C.remaining().startswith("bb."))
251     return None;
252   auto Range = C;
253   unsigned PrefixLength = IsReference ? 4 : 3;
254   C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
255   if (!isdigit(C.peek())) {
256     Token.reset(MIToken::Error, C.remaining());
257     ErrorCallback(C.location(), "expected a number after '%bb.'");
258     return C;
259   }
260   auto NumberRange = C;
261   while (isdigit(C.peek()))
262     C.advance();
263   StringRef Number = NumberRange.upto(C);
264   unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
265   if (C.peek() == '.') {
266     C.advance(); // Skip '.'
267     ++StringOffset;
268     while (isIdentifierChar(C.peek()))
269       C.advance();
270   }
271   Token.reset(IsReference ? MIToken::MachineBasicBlock
272                           : MIToken::MachineBasicBlockLabel,
273               Range.upto(C))
274       .setIntegerValue(APSInt(Number))
275       .setStringValue(Range.upto(C).drop_front(StringOffset));
276   return C;
277 }
278 
279 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
280                             MIToken::TokenKind Kind) {
281   if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
282     return None;
283   auto Range = C;
284   C.advance(Rule.size());
285   auto NumberRange = C;
286   while (isdigit(C.peek()))
287     C.advance();
288   Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
289   return C;
290 }
291 
292 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
293                                    MIToken::TokenKind Kind) {
294   if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
295     return None;
296   auto Range = C;
297   C.advance(Rule.size());
298   auto NumberRange = C;
299   while (isdigit(C.peek()))
300     C.advance();
301   StringRef Number = NumberRange.upto(C);
302   unsigned StringOffset = Rule.size() + Number.size();
303   if (C.peek() == '.') {
304     C.advance();
305     ++StringOffset;
306     while (isIdentifierChar(C.peek()))
307       C.advance();
308   }
309   Token.reset(Kind, Range.upto(C))
310       .setIntegerValue(APSInt(Number))
311       .setStringValue(Range.upto(C).drop_front(StringOffset));
312   return C;
313 }
314 
315 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
316   return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
317 }
318 
319 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
320   return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
321 }
322 
323 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
324   return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
325 }
326 
327 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
328   return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
329 }
330 
331 static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
332                                        ErrorCallbackType ErrorCallback) {
333   const StringRef Rule = "%subreg.";
334   if (!C.remaining().startswith(Rule))
335     return None;
336   return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
337                  ErrorCallback);
338 }
339 
340 static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
341                               ErrorCallbackType ErrorCallback) {
342   const StringRef Rule = "%ir-block.";
343   if (!C.remaining().startswith(Rule))
344     return None;
345   if (isdigit(C.peek(Rule.size())))
346     return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
347   return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
348 }
349 
350 static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
351                               ErrorCallbackType ErrorCallback) {
352   const StringRef Rule = "%ir.";
353   if (!C.remaining().startswith(Rule))
354     return None;
355   if (isdigit(C.peek(Rule.size())))
356     return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
357   return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
358 }
359 
360 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
361   auto Range = C;
362   C.advance(); // Skip '%'
363   auto NumberRange = C;
364   while (isdigit(C.peek()))
365     C.advance();
366   Token.reset(MIToken::VirtualRegister, Range.upto(C))
367       .setIntegerValue(APSInt(NumberRange.upto(C)));
368   return C;
369 }
370 
371 static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
372   if (C.peek() != '%')
373     return None;
374   if (isdigit(C.peek(1)))
375     return lexVirtualRegister(C, Token);
376   auto Range = C;
377   C.advance(); // Skip '%'
378   while (isIdentifierChar(C.peek()))
379     C.advance();
380   Token.reset(MIToken::NamedRegister, Range.upto(C))
381       .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
382   return C;
383 }
384 
385 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
386                                   ErrorCallbackType ErrorCallback) {
387   if (C.peek() != '@')
388     return None;
389   if (!isdigit(C.peek(1)))
390     return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
391                    ErrorCallback);
392   auto Range = C;
393   C.advance(1); // Skip the '@'
394   auto NumberRange = C;
395   while (isdigit(C.peek()))
396     C.advance();
397   Token.reset(MIToken::GlobalValue, Range.upto(C))
398       .setIntegerValue(APSInt(NumberRange.upto(C)));
399   return C;
400 }
401 
402 static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
403                                      ErrorCallbackType ErrorCallback) {
404   if (C.peek() != '$')
405     return None;
406   return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
407                  ErrorCallback);
408 }
409 
410 static bool isValidHexFloatingPointPrefix(char C) {
411   return C == 'H' || C == 'K' || C == 'L' || C == 'M';
412 }
413 
414 static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
415   if (C.peek() != '0' || C.peek(1) != 'x')
416     return None;
417   Cursor Range = C;
418   C.advance(2); // Skip '0x'
419   if (isValidHexFloatingPointPrefix(C.peek()))
420     C.advance();
421   while (isxdigit(C.peek()))
422     C.advance();
423   Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
424   return C;
425 }
426 
427 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
428   C.advance();
429   // Skip over [0-9]*([eE][-+]?[0-9]+)?
430   while (isdigit(C.peek()))
431     C.advance();
432   if ((C.peek() == 'e' || C.peek() == 'E') &&
433       (isdigit(C.peek(1)) ||
434        ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
435     C.advance(2);
436     while (isdigit(C.peek()))
437       C.advance();
438   }
439   Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
440   return C;
441 }
442 
443 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
444   if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
445     return None;
446   auto Range = C;
447   C.advance();
448   while (isdigit(C.peek()))
449     C.advance();
450   if (C.peek() == '.')
451     return lexFloatingPointLiteral(Range, C, Token);
452   StringRef StrVal = Range.upto(C);
453   Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
454   return C;
455 }
456 
457 static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
458   return StringSwitch<MIToken::TokenKind>(Identifier)
459       .Case("!tbaa", MIToken::md_tbaa)
460       .Case("!alias.scope", MIToken::md_alias_scope)
461       .Case("!noalias", MIToken::md_noalias)
462       .Case("!range", MIToken::md_range)
463       .Default(MIToken::Error);
464 }
465 
466 static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
467                              ErrorCallbackType ErrorCallback) {
468   if (C.peek() != '!')
469     return None;
470   auto Range = C;
471   C.advance(1);
472   if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
473     Token.reset(MIToken::exclaim, Range.upto(C));
474     return C;
475   }
476   while (isIdentifierChar(C.peek()))
477     C.advance();
478   StringRef StrVal = Range.upto(C);
479   Token.reset(getMetadataKeywordKind(StrVal), StrVal);
480   if (Token.isError())
481     ErrorCallback(Token.location(),
482                   "use of unknown metadata keyword '" + StrVal + "'");
483   return C;
484 }
485 
486 static MIToken::TokenKind symbolToken(char C) {
487   switch (C) {
488   case ',':
489     return MIToken::comma;
490   case '=':
491     return MIToken::equal;
492   case ':':
493     return MIToken::colon;
494   case '(':
495     return MIToken::lparen;
496   case ')':
497     return MIToken::rparen;
498   case '{':
499     return MIToken::lbrace;
500   case '}':
501     return MIToken::rbrace;
502   case '+':
503     return MIToken::plus;
504   case '-':
505     return MIToken::minus;
506   case '<':
507     return MIToken::less;
508   case '>':
509     return MIToken::greater;
510   default:
511     return MIToken::Error;
512   }
513 }
514 
515 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
516   MIToken::TokenKind Kind;
517   unsigned Length = 1;
518   if (C.peek() == ':' && C.peek(1) == ':') {
519     Kind = MIToken::coloncolon;
520     Length = 2;
521   } else
522     Kind = symbolToken(C.peek());
523   if (Kind == MIToken::Error)
524     return None;
525   auto Range = C;
526   C.advance(Length);
527   Token.reset(Kind, Range.upto(C));
528   return C;
529 }
530 
531 static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
532   if (!isNewlineChar(C.peek()))
533     return None;
534   auto Range = C;
535   C.advance();
536   Token.reset(MIToken::Newline, Range.upto(C));
537   return C;
538 }
539 
540 static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
541                                      ErrorCallbackType ErrorCallback) {
542   if (C.peek() != '`')
543     return None;
544   auto Range = C;
545   C.advance();
546   auto StrRange = C;
547   while (C.peek() != '`') {
548     if (C.isEOF() || isNewlineChar(C.peek())) {
549       ErrorCallback(
550           C.location(),
551           "end of machine instruction reached before the closing '`'");
552       Token.reset(MIToken::Error, Range.remaining());
553       return C;
554     }
555     C.advance();
556   }
557   StringRef Value = StrRange.upto(C);
558   C.advance();
559   Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
560   return C;
561 }
562 
563 StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
564                            ErrorCallbackType ErrorCallback) {
565   auto C = skipComment(skipWhitespace(Cursor(Source)));
566   if (C.isEOF()) {
567     Token.reset(MIToken::Eof, C.remaining());
568     return C.remaining();
569   }
570 
571   if (Cursor R = maybeLexIntegerOrScalarType(C, Token))
572     return R.remaining();
573   if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
574     return R.remaining();
575   if (Cursor R = maybeLexIdentifier(C, Token))
576     return R.remaining();
577   if (Cursor R = maybeLexJumpTableIndex(C, Token))
578     return R.remaining();
579   if (Cursor R = maybeLexStackObject(C, Token))
580     return R.remaining();
581   if (Cursor R = maybeLexFixedStackObject(C, Token))
582     return R.remaining();
583   if (Cursor R = maybeLexConstantPoolItem(C, Token))
584     return R.remaining();
585   if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
586     return R.remaining();
587   if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
588     return R.remaining();
589   if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
590     return R.remaining();
591   if (Cursor R = maybeLexRegister(C, Token))
592     return R.remaining();
593   if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
594     return R.remaining();
595   if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
596     return R.remaining();
597   if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
598     return R.remaining();
599   if (Cursor R = maybeLexNumericalLiteral(C, Token))
600     return R.remaining();
601   if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
602     return R.remaining();
603   if (Cursor R = maybeLexSymbol(C, Token))
604     return R.remaining();
605   if (Cursor R = maybeLexNewline(C, Token))
606     return R.remaining();
607   if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
608     return R.remaining();
609 
610   Token.reset(MIToken::Error, C.remaining());
611   ErrorCallback(C.location(),
612                 Twine("unexpected character '") + Twine(C.peek()) + "'");
613   return C.remaining();
614 }
615