1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "CPlusPlusNameParser.h"
11 
12 #include "clang/Basic/IdentifierTable.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15 
16 using namespace lldb;
17 using namespace lldb_private;
18 using llvm::Optional;
19 using llvm::None;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
23 
24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25   m_next_token_index = 0;
26   Optional<ParsedFunction> result(None);
27 
28   // Try to parse the name as function without a return type specified e.g.
29   // main(int, char*[])
30   {
31     Bookmark start_position = SetBookmark();
32     result = ParseFunctionImpl(false);
33     if (result && !HasMoreTokens())
34       return result;
35   }
36 
37   // Try to parse the name as function with function pointer return type e.g.
38   // void (*get_func(const char*))()
39   result = ParseFuncPtr(true);
40   if (result)
41     return result;
42 
43   // Finally try to parse the name as a function with non-function return type
44   // e.g. int main(int, char*[])
45   result = ParseFunctionImpl(true);
46   if (HasMoreTokens())
47     return None;
48   return result;
49 }
50 
51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52   m_next_token_index = 0;
53   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54   if (!name_ranges)
55     return None;
56   if (HasMoreTokens())
57     return None;
58   ParsedName result;
59   result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60   result.context = GetTextForRange(name_ranges.getValue().context_range);
61   return result;
62 }
63 
64 bool CPlusPlusNameParser::HasMoreTokens() {
65   return m_next_token_index < m_tokens.size();
66 }
67 
68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69 
70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71 
72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73   if (!HasMoreTokens())
74     return false;
75 
76   if (!Peek().is(kind))
77     return false;
78 
79   Advance();
80   return true;
81 }
82 
83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84   if (!HasMoreTokens())
85     return false;
86 
87   if (!Peek().isOneOf(kinds...))
88     return false;
89 
90   Advance();
91   return true;
92 }
93 
94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95   return Bookmark(m_next_token_index);
96 }
97 
98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99 
100 clang::Token &CPlusPlusNameParser::Peek() {
101   assert(HasMoreTokens());
102   return m_tokens[m_next_token_index];
103 }
104 
105 Optional<ParsedFunction>
106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107   Bookmark start_position = SetBookmark();
108   if (expect_return_type) {
109     // Consume return type if it's expected.
110     if (!ConsumeTypename())
111       return None;
112   }
113 
114   auto maybe_name = ParseFullNameImpl();
115   if (!maybe_name) {
116     return None;
117   }
118 
119   size_t argument_start = GetCurrentPosition();
120   if (!ConsumeArguments()) {
121     return None;
122   }
123 
124   size_t qualifiers_start = GetCurrentPosition();
125   SkipFunctionQualifiers();
126   size_t end_position = GetCurrentPosition();
127 
128   ParsedFunction result;
129   result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130   result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133   start_position.Remove();
134   return result;
135 }
136 
137 Optional<ParsedFunction>
138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139   Bookmark start_position = SetBookmark();
140   if (expect_return_type) {
141     // Consume return type.
142     if (!ConsumeTypename())
143       return None;
144   }
145 
146   if (!ConsumeToken(tok::l_paren))
147     return None;
148   if (!ConsumePtrsAndRefs())
149     return None;
150 
151   {
152     Bookmark before_inner_function_pos = SetBookmark();
153     auto maybe_inner_function_name = ParseFunctionImpl(false);
154     if (maybe_inner_function_name)
155       if (ConsumeToken(tok::r_paren))
156         if (ConsumeArguments()) {
157           SkipFunctionQualifiers();
158           start_position.Remove();
159           before_inner_function_pos.Remove();
160           return maybe_inner_function_name;
161         }
162   }
163 
164   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165   if (maybe_inner_function_ptr_name)
166     if (ConsumeToken(tok::r_paren))
167       if (ConsumeArguments()) {
168         SkipFunctionQualifiers();
169         start_position.Remove();
170         return maybe_inner_function_ptr_name;
171       }
172   return None;
173 }
174 
175 bool CPlusPlusNameParser::ConsumeArguments() {
176   return ConsumeBrackets(tok::l_paren, tok::r_paren);
177 }
178 
179 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180   Bookmark start_position = SetBookmark();
181   if (!HasMoreTokens() || Peek().getKind() != tok::less)
182     return false;
183   Advance();
184 
185   // Consuming template arguments is a bit trickier than consuming function
186   // arguments, because '<' '>' brackets are not always trivially balanced. In
187   // some rare cases tokens '<' and '>' can appear inside template arguments as
188   // arithmetic or shift operators not as template brackets. Examples:
189   // std::enable_if<(10u)<(64), bool>
190   //           f<A<operator<(X,Y)::Subclass>>
191   // Good thing that compiler makes sure that really ambiguous cases of '>'
192   // usage should be enclosed within '()' brackets.
193   int template_counter = 1;
194   bool can_open_template = false;
195   while (HasMoreTokens() && template_counter > 0) {
196     tok::TokenKind kind = Peek().getKind();
197     switch (kind) {
198     case tok::greatergreater:
199       template_counter -= 2;
200       can_open_template = false;
201       Advance();
202       break;
203     case tok::greater:
204       --template_counter;
205       can_open_template = false;
206       Advance();
207       break;
208     case tok::less:
209       // '<' is an attempt to open a subteamplte
210       // check if parser is at the point where it's actually possible,
211       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
212       // need to do the same for '>' because compiler actually makes sure that
213       // '>' always surrounded by brackets to avoid ambiguity.
214       if (can_open_template)
215         ++template_counter;
216       can_open_template = false;
217       Advance();
218       break;
219     case tok::kw_operator: // C++ operator overloading.
220       if (!ConsumeOperator())
221         return false;
222       can_open_template = true;
223       break;
224     case tok::raw_identifier:
225       can_open_template = true;
226       Advance();
227       break;
228     case tok::l_square:
229       if (!ConsumeBrackets(tok::l_square, tok::r_square))
230         return false;
231       can_open_template = false;
232       break;
233     case tok::l_paren:
234       if (!ConsumeArguments())
235         return false;
236       can_open_template = false;
237       break;
238     default:
239       can_open_template = false;
240       Advance();
241       break;
242     }
243   }
244 
245   if (template_counter != 0) {
246     return false;
247   }
248   start_position.Remove();
249   return true;
250 }
251 
252 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
253   Bookmark start_position = SetBookmark();
254   if (!ConsumeToken(tok::l_paren)) {
255     return false;
256   }
257   constexpr llvm::StringLiteral g_anonymous("anonymous");
258   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
259       Peek().getRawIdentifier() == g_anonymous) {
260     Advance();
261   } else {
262     return false;
263   }
264 
265   if (!ConsumeToken(tok::kw_namespace)) {
266     return false;
267   }
268 
269   if (!ConsumeToken(tok::r_paren)) {
270     return false;
271   }
272   start_position.Remove();
273   return true;
274 }
275 
276 bool CPlusPlusNameParser::ConsumeLambda() {
277   Bookmark start_position = SetBookmark();
278   if (!ConsumeToken(tok::l_brace)) {
279     return false;
280   }
281   constexpr llvm::StringLiteral g_lambda("lambda");
282   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
283       Peek().getRawIdentifier() == g_lambda) {
284     // Put the matched brace back so we can use ConsumeBrackets
285     TakeBack();
286   } else {
287     return false;
288   }
289 
290   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
291     return false;
292   }
293 
294   start_position.Remove();
295   return true;
296 }
297 
298 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
299                                           tok::TokenKind right) {
300   Bookmark start_position = SetBookmark();
301   if (!HasMoreTokens() || Peek().getKind() != left)
302     return false;
303   Advance();
304 
305   int counter = 1;
306   while (HasMoreTokens() && counter > 0) {
307     tok::TokenKind kind = Peek().getKind();
308     if (kind == right)
309       --counter;
310     else if (kind == left)
311       ++counter;
312     Advance();
313   }
314 
315   assert(counter >= 0);
316   if (counter > 0) {
317     return false;
318   }
319   start_position.Remove();
320   return true;
321 }
322 
323 bool CPlusPlusNameParser::ConsumeOperator() {
324   Bookmark start_position = SetBookmark();
325   if (!ConsumeToken(tok::kw_operator))
326     return false;
327 
328   if (!HasMoreTokens()) {
329     return false;
330   }
331 
332   const auto &token = Peek();
333   switch (token.getKind()) {
334   case tok::kw_new:
335   case tok::kw_delete:
336     // This is 'new' or 'delete' operators.
337     Advance();
338     // Check for array new/delete.
339     if (HasMoreTokens() && Peek().is(tok::l_square)) {
340       // Consume the '[' and ']'.
341       if (!ConsumeBrackets(tok::l_square, tok::r_square))
342         return false;
343     }
344     break;
345 
346 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
347   case tok::Token:                                                             \
348     Advance();                                                                 \
349     break;
350 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
351 #include "clang/Basic/OperatorKinds.def"
352 #undef OVERLOADED_OPERATOR
353 #undef OVERLOADED_OPERATOR_MULTI
354 
355   case tok::l_paren:
356     // Call operator consume '(' ... ')'.
357     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
358       break;
359     return false;
360 
361   case tok::l_square:
362     // This is a [] operator.
363     // Consume the '[' and ']'.
364     if (ConsumeBrackets(tok::l_square, tok::r_square))
365       break;
366     return false;
367 
368   default:
369     // This might be a cast operator.
370     if (ConsumeTypename())
371       break;
372     return false;
373   }
374   start_position.Remove();
375   return true;
376 }
377 
378 void CPlusPlusNameParser::SkipTypeQualifiers() {
379   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
380     ;
381 }
382 
383 void CPlusPlusNameParser::SkipFunctionQualifiers() {
384   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
385     ;
386 }
387 
388 bool CPlusPlusNameParser::ConsumeBuiltinType() {
389   bool result = false;
390   bool continue_parsing = true;
391   // Built-in types can be made of a few keywords like 'unsigned long long
392   // int'. This function consumes all built-in type keywords without checking
393   // if they make sense like 'unsigned char void'.
394   while (continue_parsing && HasMoreTokens()) {
395     switch (Peek().getKind()) {
396     case tok::kw_short:
397     case tok::kw_long:
398     case tok::kw___int64:
399     case tok::kw___int128:
400     case tok::kw_signed:
401     case tok::kw_unsigned:
402     case tok::kw_void:
403     case tok::kw_char:
404     case tok::kw_int:
405     case tok::kw_half:
406     case tok::kw_float:
407     case tok::kw_double:
408     case tok::kw___float128:
409     case tok::kw_wchar_t:
410     case tok::kw_bool:
411     case tok::kw_char16_t:
412     case tok::kw_char32_t:
413       result = true;
414       Advance();
415       break;
416     default:
417       continue_parsing = false;
418       break;
419     }
420   }
421   return result;
422 }
423 
424 void CPlusPlusNameParser::SkipPtrsAndRefs() {
425   // Ignoring result.
426   ConsumePtrsAndRefs();
427 }
428 
429 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
430   bool found = false;
431   SkipTypeQualifiers();
432   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
433                       tok::kw_volatile)) {
434     found = true;
435     SkipTypeQualifiers();
436   }
437   return found;
438 }
439 
440 bool CPlusPlusNameParser::ConsumeDecltype() {
441   Bookmark start_position = SetBookmark();
442   if (!ConsumeToken(tok::kw_decltype))
443     return false;
444 
445   if (!ConsumeArguments())
446     return false;
447 
448   start_position.Remove();
449   return true;
450 }
451 
452 bool CPlusPlusNameParser::ConsumeTypename() {
453   Bookmark start_position = SetBookmark();
454   SkipTypeQualifiers();
455   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
456     if (!ParseFullNameImpl())
457       return false;
458   }
459   SkipPtrsAndRefs();
460   start_position.Remove();
461   return true;
462 }
463 
464 Optional<CPlusPlusNameParser::ParsedNameRanges>
465 CPlusPlusNameParser::ParseFullNameImpl() {
466   // Name parsing state machine.
467   enum class State {
468     Beginning,       // start of the name
469     AfterTwoColons,  // right after ::
470     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
471     AfterTemplate,   // right after template brackets (<something>)
472     AfterOperator,   // right after name of C++ operator
473   };
474 
475   Bookmark start_position = SetBookmark();
476   State state = State::Beginning;
477   bool continue_parsing = true;
478   Optional<size_t> last_coloncolon_position = None;
479 
480   while (continue_parsing && HasMoreTokens()) {
481     const auto &token = Peek();
482     switch (token.getKind()) {
483     case tok::raw_identifier: // Just a name.
484       if (state != State::Beginning && state != State::AfterTwoColons) {
485         continue_parsing = false;
486         break;
487       }
488       Advance();
489       state = State::AfterIdentifier;
490       break;
491     case tok::l_paren: {
492       if (state == State::Beginning || state == State::AfterTwoColons) {
493         // (anonymous namespace)
494         if (ConsumeAnonymousNamespace()) {
495           state = State::AfterIdentifier;
496           break;
497         }
498       }
499 
500       // Type declared inside a function 'func()::Type'
501       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
502           state != State::AfterOperator) {
503         continue_parsing = false;
504         break;
505       }
506       Bookmark l_paren_position = SetBookmark();
507       // Consume the '(' ... ') [const]'.
508       if (!ConsumeArguments()) {
509         continue_parsing = false;
510         break;
511       }
512       SkipFunctionQualifiers();
513 
514       // Consume '::'
515       size_t coloncolon_position = GetCurrentPosition();
516       if (!ConsumeToken(tok::coloncolon)) {
517         continue_parsing = false;
518         break;
519       }
520       l_paren_position.Remove();
521       last_coloncolon_position = coloncolon_position;
522       state = State::AfterTwoColons;
523       break;
524     }
525     case tok::l_brace:
526       if (state == State::Beginning || state == State::AfterTwoColons) {
527         if (ConsumeLambda()) {
528           state = State::AfterIdentifier;
529           break;
530         }
531       }
532       continue_parsing = false;
533       break;
534     case tok::coloncolon: // Type nesting delimiter.
535       if (state != State::Beginning && state != State::AfterIdentifier &&
536           state != State::AfterTemplate) {
537         continue_parsing = false;
538         break;
539       }
540       last_coloncolon_position = GetCurrentPosition();
541       Advance();
542       state = State::AfterTwoColons;
543       break;
544     case tok::less: // Template brackets.
545       if (state != State::AfterIdentifier && state != State::AfterOperator) {
546         continue_parsing = false;
547         break;
548       }
549       if (!ConsumeTemplateArgs()) {
550         continue_parsing = false;
551         break;
552       }
553       state = State::AfterTemplate;
554       break;
555     case tok::kw_operator: // C++ operator overloading.
556       if (state != State::Beginning && state != State::AfterTwoColons) {
557         continue_parsing = false;
558         break;
559       }
560       if (!ConsumeOperator()) {
561         continue_parsing = false;
562         break;
563       }
564       state = State::AfterOperator;
565       break;
566     case tok::tilde: // Destructor.
567       if (state != State::Beginning && state != State::AfterTwoColons) {
568         continue_parsing = false;
569         break;
570       }
571       Advance();
572       if (ConsumeToken(tok::raw_identifier)) {
573         state = State::AfterIdentifier;
574       } else {
575         TakeBack();
576         continue_parsing = false;
577       }
578       break;
579     default:
580       continue_parsing = false;
581       break;
582     }
583   }
584 
585   if (state == State::AfterIdentifier || state == State::AfterOperator ||
586       state == State::AfterTemplate) {
587     ParsedNameRanges result;
588     if (last_coloncolon_position) {
589       result.context_range = Range(start_position.GetSavedPosition(),
590                                    last_coloncolon_position.getValue());
591       result.basename_range =
592           Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
593     } else {
594       result.basename_range =
595           Range(start_position.GetSavedPosition(), GetCurrentPosition());
596     }
597     start_position.Remove();
598     return result;
599   } else {
600     return None;
601   }
602 }
603 
604 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
605   if (range.empty())
606     return llvm::StringRef();
607   assert(range.begin_index < range.end_index);
608   assert(range.begin_index < m_tokens.size());
609   assert(range.end_index <= m_tokens.size());
610   clang::Token &first_token = m_tokens[range.begin_index];
611   clang::Token &last_token = m_tokens[range.end_index - 1];
612   clang::SourceLocation start_loc = first_token.getLocation();
613   clang::SourceLocation end_loc = last_token.getLocation();
614   unsigned start_pos = start_loc.getRawEncoding();
615   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
616   return m_text.take_front(end_pos).drop_front(start_pos);
617 }
618 
619 static const clang::LangOptions &GetLangOptions() {
620   static clang::LangOptions g_options;
621   static llvm::once_flag g_once_flag;
622   llvm::call_once(g_once_flag, []() {
623     g_options.LineComment = true;
624     g_options.C99 = true;
625     g_options.C11 = true;
626     g_options.CPlusPlus = true;
627     g_options.CPlusPlus11 = true;
628     g_options.CPlusPlus14 = true;
629     g_options.CPlusPlus17 = true;
630   });
631   return g_options;
632 }
633 
634 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
635   static llvm::StringMap<tok::TokenKind> g_map{
636 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
637 #include "clang/Basic/TokenKinds.def"
638 #undef KEYWORD
639   };
640   return g_map;
641 }
642 
643 void CPlusPlusNameParser::ExtractTokens() {
644   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
645                      m_text.data(), m_text.data() + m_text.size());
646   const auto &kw_map = GetKeywordsMap();
647   clang::Token token;
648   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
649        lexer.LexFromRawLexer(token)) {
650     if (token.is(clang::tok::raw_identifier)) {
651       auto it = kw_map.find(token.getRawIdentifier());
652       if (it != kw_map.end()) {
653         token.setKind(it->getValue());
654       }
655     }
656 
657     m_tokens.push_back(token);
658   }
659 }
660