1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "CPlusPlusNameParser.h"
11
12 #include "clang/Basic/IdentifierTable.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15
16 using namespace lldb;
17 using namespace lldb_private;
18 using llvm::Optional;
19 using llvm::None;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
23
ParseAsFunctionDefinition()24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25 m_next_token_index = 0;
26 Optional<ParsedFunction> result(None);
27
28 // Try to parse the name as function without a return type specified e.g.
29 // main(int, char*[])
30 {
31 Bookmark start_position = SetBookmark();
32 result = ParseFunctionImpl(false);
33 if (result && !HasMoreTokens())
34 return result;
35 }
36
37 // Try to parse the name as function with function pointer return type e.g.
38 // void (*get_func(const char*))()
39 result = ParseFuncPtr(true);
40 if (result)
41 return result;
42
43 // Finally try to parse the name as a function with non-function return type
44 // e.g. int main(int, char*[])
45 result = ParseFunctionImpl(true);
46 if (HasMoreTokens())
47 return None;
48 return result;
49 }
50
ParseAsFullName()51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52 m_next_token_index = 0;
53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54 if (!name_ranges)
55 return None;
56 if (HasMoreTokens())
57 return None;
58 ParsedName result;
59 result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60 result.context = GetTextForRange(name_ranges.getValue().context_range);
61 return result;
62 }
63
HasMoreTokens()64 bool CPlusPlusNameParser::HasMoreTokens() {
65 return m_next_token_index < m_tokens.size();
66 }
67
Advance()68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69
TakeBack()70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71
ConsumeToken(tok::TokenKind kind)72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73 if (!HasMoreTokens())
74 return false;
75
76 if (!Peek().is(kind))
77 return false;
78
79 Advance();
80 return true;
81 }
82
ConsumeToken(Ts...kinds)83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84 if (!HasMoreTokens())
85 return false;
86
87 if (!Peek().isOneOf(kinds...))
88 return false;
89
90 Advance();
91 return true;
92 }
93
SetBookmark()94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95 return Bookmark(m_next_token_index);
96 }
97
GetCurrentPosition()98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99
Peek()100 clang::Token &CPlusPlusNameParser::Peek() {
101 assert(HasMoreTokens());
102 return m_tokens[m_next_token_index];
103 }
104
105 Optional<ParsedFunction>
ParseFunctionImpl(bool expect_return_type)106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107 Bookmark start_position = SetBookmark();
108 if (expect_return_type) {
109 // Consume return type if it's expected.
110 if (!ConsumeTypename())
111 return None;
112 }
113
114 auto maybe_name = ParseFullNameImpl();
115 if (!maybe_name) {
116 return None;
117 }
118
119 size_t argument_start = GetCurrentPosition();
120 if (!ConsumeArguments()) {
121 return None;
122 }
123
124 size_t qualifiers_start = GetCurrentPosition();
125 SkipFunctionQualifiers();
126 size_t end_position = GetCurrentPosition();
127
128 ParsedFunction result;
129 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130 result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133 start_position.Remove();
134 return result;
135 }
136
137 Optional<ParsedFunction>
ParseFuncPtr(bool expect_return_type)138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139 Bookmark start_position = SetBookmark();
140 if (expect_return_type) {
141 // Consume return type.
142 if (!ConsumeTypename())
143 return None;
144 }
145
146 if (!ConsumeToken(tok::l_paren))
147 return None;
148 if (!ConsumePtrsAndRefs())
149 return None;
150
151 {
152 Bookmark before_inner_function_pos = SetBookmark();
153 auto maybe_inner_function_name = ParseFunctionImpl(false);
154 if (maybe_inner_function_name)
155 if (ConsumeToken(tok::r_paren))
156 if (ConsumeArguments()) {
157 SkipFunctionQualifiers();
158 start_position.Remove();
159 before_inner_function_pos.Remove();
160 return maybe_inner_function_name;
161 }
162 }
163
164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165 if (maybe_inner_function_ptr_name)
166 if (ConsumeToken(tok::r_paren))
167 if (ConsumeArguments()) {
168 SkipFunctionQualifiers();
169 start_position.Remove();
170 return maybe_inner_function_ptr_name;
171 }
172 return None;
173 }
174
ConsumeArguments()175 bool CPlusPlusNameParser::ConsumeArguments() {
176 return ConsumeBrackets(tok::l_paren, tok::r_paren);
177 }
178
ConsumeTemplateArgs()179 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180 Bookmark start_position = SetBookmark();
181 if (!HasMoreTokens() || Peek().getKind() != tok::less)
182 return false;
183 Advance();
184
185 // Consuming template arguments is a bit trickier than consuming function
186 // arguments, because '<' '>' brackets are not always trivially balanced. In
187 // some rare cases tokens '<' and '>' can appear inside template arguments as
188 // arithmetic or shift operators not as template brackets. Examples:
189 // std::enable_if<(10u)<(64), bool>
190 // f<A<operator<(X,Y)::Subclass>>
191 // Good thing that compiler makes sure that really ambiguous cases of '>'
192 // usage should be enclosed within '()' brackets.
193 int template_counter = 1;
194 bool can_open_template = false;
195 while (HasMoreTokens() && template_counter > 0) {
196 tok::TokenKind kind = Peek().getKind();
197 switch (kind) {
198 case tok::greatergreater:
199 template_counter -= 2;
200 can_open_template = false;
201 Advance();
202 break;
203 case tok::greater:
204 --template_counter;
205 can_open_template = false;
206 Advance();
207 break;
208 case tok::less:
209 // '<' is an attempt to open a subteamplte
210 // check if parser is at the point where it's actually possible,
211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
212 // need to do the same for '>' because compiler actually makes sure that
213 // '>' always surrounded by brackets to avoid ambiguity.
214 if (can_open_template)
215 ++template_counter;
216 can_open_template = false;
217 Advance();
218 break;
219 case tok::kw_operator: // C++ operator overloading.
220 if (!ConsumeOperator())
221 return false;
222 can_open_template = true;
223 break;
224 case tok::raw_identifier:
225 can_open_template = true;
226 Advance();
227 break;
228 case tok::l_square:
229 if (!ConsumeBrackets(tok::l_square, tok::r_square))
230 return false;
231 can_open_template = false;
232 break;
233 case tok::l_paren:
234 if (!ConsumeArguments())
235 return false;
236 can_open_template = false;
237 break;
238 default:
239 can_open_template = false;
240 Advance();
241 break;
242 }
243 }
244
245 if (template_counter != 0) {
246 return false;
247 }
248 start_position.Remove();
249 return true;
250 }
251
ConsumeAnonymousNamespace()252 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
253 Bookmark start_position = SetBookmark();
254 if (!ConsumeToken(tok::l_paren)) {
255 return false;
256 }
257 constexpr llvm::StringLiteral g_anonymous("anonymous");
258 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
259 Peek().getRawIdentifier() == g_anonymous) {
260 Advance();
261 } else {
262 return false;
263 }
264
265 if (!ConsumeToken(tok::kw_namespace)) {
266 return false;
267 }
268
269 if (!ConsumeToken(tok::r_paren)) {
270 return false;
271 }
272 start_position.Remove();
273 return true;
274 }
275
ConsumeLambda()276 bool CPlusPlusNameParser::ConsumeLambda() {
277 Bookmark start_position = SetBookmark();
278 if (!ConsumeToken(tok::l_brace)) {
279 return false;
280 }
281 constexpr llvm::StringLiteral g_lambda("lambda");
282 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
283 Peek().getRawIdentifier() == g_lambda) {
284 // Put the matched brace back so we can use ConsumeBrackets
285 TakeBack();
286 } else {
287 return false;
288 }
289
290 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
291 return false;
292 }
293
294 start_position.Remove();
295 return true;
296 }
297
ConsumeBrackets(tok::TokenKind left,tok::TokenKind right)298 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
299 tok::TokenKind right) {
300 Bookmark start_position = SetBookmark();
301 if (!HasMoreTokens() || Peek().getKind() != left)
302 return false;
303 Advance();
304
305 int counter = 1;
306 while (HasMoreTokens() && counter > 0) {
307 tok::TokenKind kind = Peek().getKind();
308 if (kind == right)
309 --counter;
310 else if (kind == left)
311 ++counter;
312 Advance();
313 }
314
315 assert(counter >= 0);
316 if (counter > 0) {
317 return false;
318 }
319 start_position.Remove();
320 return true;
321 }
322
ConsumeOperator()323 bool CPlusPlusNameParser::ConsumeOperator() {
324 Bookmark start_position = SetBookmark();
325 if (!ConsumeToken(tok::kw_operator))
326 return false;
327
328 if (!HasMoreTokens()) {
329 return false;
330 }
331
332 const auto &token = Peek();
333 switch (token.getKind()) {
334 case tok::kw_new:
335 case tok::kw_delete:
336 // This is 'new' or 'delete' operators.
337 Advance();
338 // Check for array new/delete.
339 if (HasMoreTokens() && Peek().is(tok::l_square)) {
340 // Consume the '[' and ']'.
341 if (!ConsumeBrackets(tok::l_square, tok::r_square))
342 return false;
343 }
344 break;
345
346 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
347 case tok::Token: \
348 Advance(); \
349 break;
350 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
351 #include "clang/Basic/OperatorKinds.def"
352 #undef OVERLOADED_OPERATOR
353 #undef OVERLOADED_OPERATOR_MULTI
354
355 case tok::l_paren:
356 // Call operator consume '(' ... ')'.
357 if (ConsumeBrackets(tok::l_paren, tok::r_paren))
358 break;
359 return false;
360
361 case tok::l_square:
362 // This is a [] operator.
363 // Consume the '[' and ']'.
364 if (ConsumeBrackets(tok::l_square, tok::r_square))
365 break;
366 return false;
367
368 default:
369 // This might be a cast operator.
370 if (ConsumeTypename())
371 break;
372 return false;
373 }
374 start_position.Remove();
375 return true;
376 }
377
SkipTypeQualifiers()378 void CPlusPlusNameParser::SkipTypeQualifiers() {
379 while (ConsumeToken(tok::kw_const, tok::kw_volatile))
380 ;
381 }
382
SkipFunctionQualifiers()383 void CPlusPlusNameParser::SkipFunctionQualifiers() {
384 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
385 ;
386 }
387
ConsumeBuiltinType()388 bool CPlusPlusNameParser::ConsumeBuiltinType() {
389 bool result = false;
390 bool continue_parsing = true;
391 // Built-in types can be made of a few keywords like 'unsigned long long
392 // int'. This function consumes all built-in type keywords without checking
393 // if they make sense like 'unsigned char void'.
394 while (continue_parsing && HasMoreTokens()) {
395 switch (Peek().getKind()) {
396 case tok::kw_short:
397 case tok::kw_long:
398 case tok::kw___int64:
399 case tok::kw___int128:
400 case tok::kw_signed:
401 case tok::kw_unsigned:
402 case tok::kw_void:
403 case tok::kw_char:
404 case tok::kw_int:
405 case tok::kw_half:
406 case tok::kw_float:
407 case tok::kw_double:
408 case tok::kw___float128:
409 case tok::kw_wchar_t:
410 case tok::kw_bool:
411 case tok::kw_char16_t:
412 case tok::kw_char32_t:
413 result = true;
414 Advance();
415 break;
416 default:
417 continue_parsing = false;
418 break;
419 }
420 }
421 return result;
422 }
423
SkipPtrsAndRefs()424 void CPlusPlusNameParser::SkipPtrsAndRefs() {
425 // Ignoring result.
426 ConsumePtrsAndRefs();
427 }
428
ConsumePtrsAndRefs()429 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
430 bool found = false;
431 SkipTypeQualifiers();
432 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
433 tok::kw_volatile)) {
434 found = true;
435 SkipTypeQualifiers();
436 }
437 return found;
438 }
439
ConsumeDecltype()440 bool CPlusPlusNameParser::ConsumeDecltype() {
441 Bookmark start_position = SetBookmark();
442 if (!ConsumeToken(tok::kw_decltype))
443 return false;
444
445 if (!ConsumeArguments())
446 return false;
447
448 start_position.Remove();
449 return true;
450 }
451
ConsumeTypename()452 bool CPlusPlusNameParser::ConsumeTypename() {
453 Bookmark start_position = SetBookmark();
454 SkipTypeQualifiers();
455 if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
456 if (!ParseFullNameImpl())
457 return false;
458 }
459 SkipPtrsAndRefs();
460 start_position.Remove();
461 return true;
462 }
463
464 Optional<CPlusPlusNameParser::ParsedNameRanges>
ParseFullNameImpl()465 CPlusPlusNameParser::ParseFullNameImpl() {
466 // Name parsing state machine.
467 enum class State {
468 Beginning, // start of the name
469 AfterTwoColons, // right after ::
470 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
471 AfterTemplate, // right after template brackets (<something>)
472 AfterOperator, // right after name of C++ operator
473 };
474
475 Bookmark start_position = SetBookmark();
476 State state = State::Beginning;
477 bool continue_parsing = true;
478 Optional<size_t> last_coloncolon_position = None;
479
480 while (continue_parsing && HasMoreTokens()) {
481 const auto &token = Peek();
482 switch (token.getKind()) {
483 case tok::raw_identifier: // Just a name.
484 if (state != State::Beginning && state != State::AfterTwoColons) {
485 continue_parsing = false;
486 break;
487 }
488 Advance();
489 state = State::AfterIdentifier;
490 break;
491 case tok::l_paren: {
492 if (state == State::Beginning || state == State::AfterTwoColons) {
493 // (anonymous namespace)
494 if (ConsumeAnonymousNamespace()) {
495 state = State::AfterIdentifier;
496 break;
497 }
498 }
499
500 // Type declared inside a function 'func()::Type'
501 if (state != State::AfterIdentifier && state != State::AfterTemplate &&
502 state != State::AfterOperator) {
503 continue_parsing = false;
504 break;
505 }
506 Bookmark l_paren_position = SetBookmark();
507 // Consume the '(' ... ') [const]'.
508 if (!ConsumeArguments()) {
509 continue_parsing = false;
510 break;
511 }
512 SkipFunctionQualifiers();
513
514 // Consume '::'
515 size_t coloncolon_position = GetCurrentPosition();
516 if (!ConsumeToken(tok::coloncolon)) {
517 continue_parsing = false;
518 break;
519 }
520 l_paren_position.Remove();
521 last_coloncolon_position = coloncolon_position;
522 state = State::AfterTwoColons;
523 break;
524 }
525 case tok::l_brace:
526 if (state == State::Beginning || state == State::AfterTwoColons) {
527 if (ConsumeLambda()) {
528 state = State::AfterIdentifier;
529 break;
530 }
531 }
532 continue_parsing = false;
533 break;
534 case tok::coloncolon: // Type nesting delimiter.
535 if (state != State::Beginning && state != State::AfterIdentifier &&
536 state != State::AfterTemplate) {
537 continue_parsing = false;
538 break;
539 }
540 last_coloncolon_position = GetCurrentPosition();
541 Advance();
542 state = State::AfterTwoColons;
543 break;
544 case tok::less: // Template brackets.
545 if (state != State::AfterIdentifier && state != State::AfterOperator) {
546 continue_parsing = false;
547 break;
548 }
549 if (!ConsumeTemplateArgs()) {
550 continue_parsing = false;
551 break;
552 }
553 state = State::AfterTemplate;
554 break;
555 case tok::kw_operator: // C++ operator overloading.
556 if (state != State::Beginning && state != State::AfterTwoColons) {
557 continue_parsing = false;
558 break;
559 }
560 if (!ConsumeOperator()) {
561 continue_parsing = false;
562 break;
563 }
564 state = State::AfterOperator;
565 break;
566 case tok::tilde: // Destructor.
567 if (state != State::Beginning && state != State::AfterTwoColons) {
568 continue_parsing = false;
569 break;
570 }
571 Advance();
572 if (ConsumeToken(tok::raw_identifier)) {
573 state = State::AfterIdentifier;
574 } else {
575 TakeBack();
576 continue_parsing = false;
577 }
578 break;
579 default:
580 continue_parsing = false;
581 break;
582 }
583 }
584
585 if (state == State::AfterIdentifier || state == State::AfterOperator ||
586 state == State::AfterTemplate) {
587 ParsedNameRanges result;
588 if (last_coloncolon_position) {
589 result.context_range = Range(start_position.GetSavedPosition(),
590 last_coloncolon_position.getValue());
591 result.basename_range =
592 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
593 } else {
594 result.basename_range =
595 Range(start_position.GetSavedPosition(), GetCurrentPosition());
596 }
597 start_position.Remove();
598 return result;
599 } else {
600 return None;
601 }
602 }
603
GetTextForRange(const Range & range)604 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
605 if (range.empty())
606 return llvm::StringRef();
607 assert(range.begin_index < range.end_index);
608 assert(range.begin_index < m_tokens.size());
609 assert(range.end_index <= m_tokens.size());
610 clang::Token &first_token = m_tokens[range.begin_index];
611 clang::Token &last_token = m_tokens[range.end_index - 1];
612 clang::SourceLocation start_loc = first_token.getLocation();
613 clang::SourceLocation end_loc = last_token.getLocation();
614 unsigned start_pos = start_loc.getRawEncoding();
615 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
616 return m_text.take_front(end_pos).drop_front(start_pos);
617 }
618
GetLangOptions()619 static const clang::LangOptions &GetLangOptions() {
620 static clang::LangOptions g_options;
621 static llvm::once_flag g_once_flag;
622 llvm::call_once(g_once_flag, []() {
623 g_options.LineComment = true;
624 g_options.C99 = true;
625 g_options.C11 = true;
626 g_options.CPlusPlus = true;
627 g_options.CPlusPlus11 = true;
628 g_options.CPlusPlus14 = true;
629 g_options.CPlusPlus17 = true;
630 });
631 return g_options;
632 }
633
GetKeywordsMap()634 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
635 static llvm::StringMap<tok::TokenKind> g_map{
636 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
637 #include "clang/Basic/TokenKinds.def"
638 #undef KEYWORD
639 };
640 return g_map;
641 }
642
ExtractTokens()643 void CPlusPlusNameParser::ExtractTokens() {
644 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
645 m_text.data(), m_text.data() + m_text.size());
646 const auto &kw_map = GetKeywordsMap();
647 clang::Token token;
648 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
649 lexer.LexFromRawLexer(token)) {
650 if (token.is(clang::tok::raw_identifier)) {
651 auto it = kw_map.find(token.getRawIdentifier());
652 if (it != kw_map.end()) {
653 token.setKind(it->getValue());
654 }
655 }
656
657 m_tokens.push_back(token);
658 }
659 }
660