1*0b57cec5SDimitry Andric //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric 
9*0b57cec5SDimitry Andric #include "CPlusPlusNameParser.h"
10*0b57cec5SDimitry Andric 
11*0b57cec5SDimitry Andric #include "clang/Basic/IdentifierTable.h"
12*0b57cec5SDimitry Andric #include "llvm/ADT/StringMap.h"
13*0b57cec5SDimitry Andric #include "llvm/Support/Threading.h"
14*0b57cec5SDimitry Andric 
15*0b57cec5SDimitry Andric using namespace lldb;
16*0b57cec5SDimitry Andric using namespace lldb_private;
17*0b57cec5SDimitry Andric using llvm::Optional;
18*0b57cec5SDimitry Andric using llvm::None;
19*0b57cec5SDimitry Andric using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20*0b57cec5SDimitry Andric using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21*0b57cec5SDimitry Andric namespace tok = clang::tok;
22*0b57cec5SDimitry Andric 
ParseAsFunctionDefinition()23*0b57cec5SDimitry Andric Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24*0b57cec5SDimitry Andric   m_next_token_index = 0;
25*0b57cec5SDimitry Andric   Optional<ParsedFunction> result(None);
26*0b57cec5SDimitry Andric 
27*0b57cec5SDimitry Andric   // Try to parse the name as function without a return type specified e.g.
28*0b57cec5SDimitry Andric   // main(int, char*[])
29*0b57cec5SDimitry Andric   {
30*0b57cec5SDimitry Andric     Bookmark start_position = SetBookmark();
31*0b57cec5SDimitry Andric     result = ParseFunctionImpl(false);
32*0b57cec5SDimitry Andric     if (result && !HasMoreTokens())
33*0b57cec5SDimitry Andric       return result;
34*0b57cec5SDimitry Andric   }
35*0b57cec5SDimitry Andric 
36*0b57cec5SDimitry Andric   // Try to parse the name as function with function pointer return type e.g.
37*0b57cec5SDimitry Andric   // void (*get_func(const char*))()
38*0b57cec5SDimitry Andric   result = ParseFuncPtr(true);
39*0b57cec5SDimitry Andric   if (result)
40*0b57cec5SDimitry Andric     return result;
41*0b57cec5SDimitry Andric 
42*0b57cec5SDimitry Andric   // Finally try to parse the name as a function with non-function return type
43*0b57cec5SDimitry Andric   // e.g. int main(int, char*[])
44*0b57cec5SDimitry Andric   result = ParseFunctionImpl(true);
45*0b57cec5SDimitry Andric   if (HasMoreTokens())
46*0b57cec5SDimitry Andric     return None;
47*0b57cec5SDimitry Andric   return result;
48*0b57cec5SDimitry Andric }
49*0b57cec5SDimitry Andric 
ParseAsFullName()50*0b57cec5SDimitry Andric Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51*0b57cec5SDimitry Andric   m_next_token_index = 0;
52*0b57cec5SDimitry Andric   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53*0b57cec5SDimitry Andric   if (!name_ranges)
54*0b57cec5SDimitry Andric     return None;
55*0b57cec5SDimitry Andric   if (HasMoreTokens())
56*0b57cec5SDimitry Andric     return None;
57*0b57cec5SDimitry Andric   ParsedName result;
58*0b57cec5SDimitry Andric   result.basename = GetTextForRange(name_ranges.getValue().basename_range);
59*0b57cec5SDimitry Andric   result.context = GetTextForRange(name_ranges.getValue().context_range);
60*0b57cec5SDimitry Andric   return result;
61*0b57cec5SDimitry Andric }
62*0b57cec5SDimitry Andric 
HasMoreTokens()63*0b57cec5SDimitry Andric bool CPlusPlusNameParser::HasMoreTokens() {
64*0b57cec5SDimitry Andric   return m_next_token_index < m_tokens.size();
65*0b57cec5SDimitry Andric }
66*0b57cec5SDimitry Andric 
Advance()67*0b57cec5SDimitry Andric void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68*0b57cec5SDimitry Andric 
TakeBack()69*0b57cec5SDimitry Andric void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70*0b57cec5SDimitry Andric 
ConsumeToken(tok::TokenKind kind)71*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72*0b57cec5SDimitry Andric   if (!HasMoreTokens())
73*0b57cec5SDimitry Andric     return false;
74*0b57cec5SDimitry Andric 
75*0b57cec5SDimitry Andric   if (!Peek().is(kind))
76*0b57cec5SDimitry Andric     return false;
77*0b57cec5SDimitry Andric 
78*0b57cec5SDimitry Andric   Advance();
79*0b57cec5SDimitry Andric   return true;
80*0b57cec5SDimitry Andric }
81*0b57cec5SDimitry Andric 
ConsumeToken(Ts...kinds)82*0b57cec5SDimitry Andric template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83*0b57cec5SDimitry Andric   if (!HasMoreTokens())
84*0b57cec5SDimitry Andric     return false;
85*0b57cec5SDimitry Andric 
86*0b57cec5SDimitry Andric   if (!Peek().isOneOf(kinds...))
87*0b57cec5SDimitry Andric     return false;
88*0b57cec5SDimitry Andric 
89*0b57cec5SDimitry Andric   Advance();
90*0b57cec5SDimitry Andric   return true;
91*0b57cec5SDimitry Andric }
92*0b57cec5SDimitry Andric 
SetBookmark()93*0b57cec5SDimitry Andric CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94*0b57cec5SDimitry Andric   return Bookmark(m_next_token_index);
95*0b57cec5SDimitry Andric }
96*0b57cec5SDimitry Andric 
GetCurrentPosition()97*0b57cec5SDimitry Andric size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98*0b57cec5SDimitry Andric 
Peek()99*0b57cec5SDimitry Andric clang::Token &CPlusPlusNameParser::Peek() {
100*0b57cec5SDimitry Andric   assert(HasMoreTokens());
101*0b57cec5SDimitry Andric   return m_tokens[m_next_token_index];
102*0b57cec5SDimitry Andric }
103*0b57cec5SDimitry Andric 
104*0b57cec5SDimitry Andric Optional<ParsedFunction>
ParseFunctionImpl(bool expect_return_type)105*0b57cec5SDimitry Andric CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
107*0b57cec5SDimitry Andric   if (expect_return_type) {
108*0b57cec5SDimitry Andric     // Consume return type if it's expected.
109*0b57cec5SDimitry Andric     if (!ConsumeTypename())
110*0b57cec5SDimitry Andric       return None;
111*0b57cec5SDimitry Andric   }
112*0b57cec5SDimitry Andric 
113*0b57cec5SDimitry Andric   auto maybe_name = ParseFullNameImpl();
114*0b57cec5SDimitry Andric   if (!maybe_name) {
115*0b57cec5SDimitry Andric     return None;
116*0b57cec5SDimitry Andric   }
117*0b57cec5SDimitry Andric 
118*0b57cec5SDimitry Andric   size_t argument_start = GetCurrentPosition();
119*0b57cec5SDimitry Andric   if (!ConsumeArguments()) {
120*0b57cec5SDimitry Andric     return None;
121*0b57cec5SDimitry Andric   }
122*0b57cec5SDimitry Andric 
123*0b57cec5SDimitry Andric   size_t qualifiers_start = GetCurrentPosition();
124*0b57cec5SDimitry Andric   SkipFunctionQualifiers();
125*0b57cec5SDimitry Andric   size_t end_position = GetCurrentPosition();
126*0b57cec5SDimitry Andric 
127*0b57cec5SDimitry Andric   ParsedFunction result;
128*0b57cec5SDimitry Andric   result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
129*0b57cec5SDimitry Andric   result.name.context = GetTextForRange(maybe_name.getValue().context_range);
130*0b57cec5SDimitry Andric   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
131*0b57cec5SDimitry Andric   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
132*0b57cec5SDimitry Andric   start_position.Remove();
133*0b57cec5SDimitry Andric   return result;
134*0b57cec5SDimitry Andric }
135*0b57cec5SDimitry Andric 
136*0b57cec5SDimitry Andric Optional<ParsedFunction>
ParseFuncPtr(bool expect_return_type)137*0b57cec5SDimitry Andric CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
138*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
139*0b57cec5SDimitry Andric   if (expect_return_type) {
140*0b57cec5SDimitry Andric     // Consume return type.
141*0b57cec5SDimitry Andric     if (!ConsumeTypename())
142*0b57cec5SDimitry Andric       return None;
143*0b57cec5SDimitry Andric   }
144*0b57cec5SDimitry Andric 
145*0b57cec5SDimitry Andric   if (!ConsumeToken(tok::l_paren))
146*0b57cec5SDimitry Andric     return None;
147*0b57cec5SDimitry Andric   if (!ConsumePtrsAndRefs())
148*0b57cec5SDimitry Andric     return None;
149*0b57cec5SDimitry Andric 
150*0b57cec5SDimitry Andric   {
151*0b57cec5SDimitry Andric     Bookmark before_inner_function_pos = SetBookmark();
152*0b57cec5SDimitry Andric     auto maybe_inner_function_name = ParseFunctionImpl(false);
153*0b57cec5SDimitry Andric     if (maybe_inner_function_name)
154*0b57cec5SDimitry Andric       if (ConsumeToken(tok::r_paren))
155*0b57cec5SDimitry Andric         if (ConsumeArguments()) {
156*0b57cec5SDimitry Andric           SkipFunctionQualifiers();
157*0b57cec5SDimitry Andric           start_position.Remove();
158*0b57cec5SDimitry Andric           before_inner_function_pos.Remove();
159*0b57cec5SDimitry Andric           return maybe_inner_function_name;
160*0b57cec5SDimitry Andric         }
161*0b57cec5SDimitry Andric   }
162*0b57cec5SDimitry Andric 
163*0b57cec5SDimitry Andric   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
164*0b57cec5SDimitry Andric   if (maybe_inner_function_ptr_name)
165*0b57cec5SDimitry Andric     if (ConsumeToken(tok::r_paren))
166*0b57cec5SDimitry Andric       if (ConsumeArguments()) {
167*0b57cec5SDimitry Andric         SkipFunctionQualifiers();
168*0b57cec5SDimitry Andric         start_position.Remove();
169*0b57cec5SDimitry Andric         return maybe_inner_function_ptr_name;
170*0b57cec5SDimitry Andric       }
171*0b57cec5SDimitry Andric   return None;
172*0b57cec5SDimitry Andric }
173*0b57cec5SDimitry Andric 
ConsumeArguments()174*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeArguments() {
175*0b57cec5SDimitry Andric   return ConsumeBrackets(tok::l_paren, tok::r_paren);
176*0b57cec5SDimitry Andric }
177*0b57cec5SDimitry Andric 
ConsumeTemplateArgs()178*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeTemplateArgs() {
179*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
180*0b57cec5SDimitry Andric   if (!HasMoreTokens() || Peek().getKind() != tok::less)
181*0b57cec5SDimitry Andric     return false;
182*0b57cec5SDimitry Andric   Advance();
183*0b57cec5SDimitry Andric 
184*0b57cec5SDimitry Andric   // Consuming template arguments is a bit trickier than consuming function
185*0b57cec5SDimitry Andric   // arguments, because '<' '>' brackets are not always trivially balanced. In
186*0b57cec5SDimitry Andric   // some rare cases tokens '<' and '>' can appear inside template arguments as
187*0b57cec5SDimitry Andric   // arithmetic or shift operators not as template brackets. Examples:
188*0b57cec5SDimitry Andric   // std::enable_if<(10u)<(64), bool>
189*0b57cec5SDimitry Andric   //           f<A<operator<(X,Y)::Subclass>>
190*0b57cec5SDimitry Andric   // Good thing that compiler makes sure that really ambiguous cases of '>'
191*0b57cec5SDimitry Andric   // usage should be enclosed within '()' brackets.
192*0b57cec5SDimitry Andric   int template_counter = 1;
193*0b57cec5SDimitry Andric   bool can_open_template = false;
194*0b57cec5SDimitry Andric   while (HasMoreTokens() && template_counter > 0) {
195*0b57cec5SDimitry Andric     tok::TokenKind kind = Peek().getKind();
196*0b57cec5SDimitry Andric     switch (kind) {
197*0b57cec5SDimitry Andric     case tok::greatergreater:
198*0b57cec5SDimitry Andric       template_counter -= 2;
199*0b57cec5SDimitry Andric       can_open_template = false;
200*0b57cec5SDimitry Andric       Advance();
201*0b57cec5SDimitry Andric       break;
202*0b57cec5SDimitry Andric     case tok::greater:
203*0b57cec5SDimitry Andric       --template_counter;
204*0b57cec5SDimitry Andric       can_open_template = false;
205*0b57cec5SDimitry Andric       Advance();
206*0b57cec5SDimitry Andric       break;
207*0b57cec5SDimitry Andric     case tok::less:
208*0b57cec5SDimitry Andric       // '<' is an attempt to open a subteamplte
209*0b57cec5SDimitry Andric       // check if parser is at the point where it's actually possible,
210*0b57cec5SDimitry Andric       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
211*0b57cec5SDimitry Andric       // need to do the same for '>' because compiler actually makes sure that
212*0b57cec5SDimitry Andric       // '>' always surrounded by brackets to avoid ambiguity.
213*0b57cec5SDimitry Andric       if (can_open_template)
214*0b57cec5SDimitry Andric         ++template_counter;
215*0b57cec5SDimitry Andric       can_open_template = false;
216*0b57cec5SDimitry Andric       Advance();
217*0b57cec5SDimitry Andric       break;
218*0b57cec5SDimitry Andric     case tok::kw_operator: // C++ operator overloading.
219*0b57cec5SDimitry Andric       if (!ConsumeOperator())
220*0b57cec5SDimitry Andric         return false;
221*0b57cec5SDimitry Andric       can_open_template = true;
222*0b57cec5SDimitry Andric       break;
223*0b57cec5SDimitry Andric     case tok::raw_identifier:
224*0b57cec5SDimitry Andric       can_open_template = true;
225*0b57cec5SDimitry Andric       Advance();
226*0b57cec5SDimitry Andric       break;
227*0b57cec5SDimitry Andric     case tok::l_square:
228*0b57cec5SDimitry Andric       if (!ConsumeBrackets(tok::l_square, tok::r_square))
229*0b57cec5SDimitry Andric         return false;
230*0b57cec5SDimitry Andric       can_open_template = false;
231*0b57cec5SDimitry Andric       break;
232*0b57cec5SDimitry Andric     case tok::l_paren:
233*0b57cec5SDimitry Andric       if (!ConsumeArguments())
234*0b57cec5SDimitry Andric         return false;
235*0b57cec5SDimitry Andric       can_open_template = false;
236*0b57cec5SDimitry Andric       break;
237*0b57cec5SDimitry Andric     default:
238*0b57cec5SDimitry Andric       can_open_template = false;
239*0b57cec5SDimitry Andric       Advance();
240*0b57cec5SDimitry Andric       break;
241*0b57cec5SDimitry Andric     }
242*0b57cec5SDimitry Andric   }
243*0b57cec5SDimitry Andric 
244*0b57cec5SDimitry Andric   if (template_counter != 0) {
245*0b57cec5SDimitry Andric     return false;
246*0b57cec5SDimitry Andric   }
247*0b57cec5SDimitry Andric   start_position.Remove();
248*0b57cec5SDimitry Andric   return true;
249*0b57cec5SDimitry Andric }
250*0b57cec5SDimitry Andric 
ConsumeAnonymousNamespace()251*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
252*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
253*0b57cec5SDimitry Andric   if (!ConsumeToken(tok::l_paren)) {
254*0b57cec5SDimitry Andric     return false;
255*0b57cec5SDimitry Andric   }
256*0b57cec5SDimitry Andric   constexpr llvm::StringLiteral g_anonymous("anonymous");
257*0b57cec5SDimitry Andric   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
258*0b57cec5SDimitry Andric       Peek().getRawIdentifier() == g_anonymous) {
259*0b57cec5SDimitry Andric     Advance();
260*0b57cec5SDimitry Andric   } else {
261*0b57cec5SDimitry Andric     return false;
262*0b57cec5SDimitry Andric   }
263*0b57cec5SDimitry Andric 
264*0b57cec5SDimitry Andric   if (!ConsumeToken(tok::kw_namespace)) {
265*0b57cec5SDimitry Andric     return false;
266*0b57cec5SDimitry Andric   }
267*0b57cec5SDimitry Andric 
268*0b57cec5SDimitry Andric   if (!ConsumeToken(tok::r_paren)) {
269*0b57cec5SDimitry Andric     return false;
270*0b57cec5SDimitry Andric   }
271*0b57cec5SDimitry Andric   start_position.Remove();
272*0b57cec5SDimitry Andric   return true;
273*0b57cec5SDimitry Andric }
274*0b57cec5SDimitry Andric 
ConsumeLambda()275*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeLambda() {
276*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
277*0b57cec5SDimitry Andric   if (!ConsumeToken(tok::l_brace)) {
278*0b57cec5SDimitry Andric     return false;
279*0b57cec5SDimitry Andric   }
280*0b57cec5SDimitry Andric   constexpr llvm::StringLiteral g_lambda("lambda");
281*0b57cec5SDimitry Andric   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
282*0b57cec5SDimitry Andric       Peek().getRawIdentifier() == g_lambda) {
283*0b57cec5SDimitry Andric     // Put the matched brace back so we can use ConsumeBrackets
284*0b57cec5SDimitry Andric     TakeBack();
285*0b57cec5SDimitry Andric   } else {
286*0b57cec5SDimitry Andric     return false;
287*0b57cec5SDimitry Andric   }
288*0b57cec5SDimitry Andric 
289*0b57cec5SDimitry Andric   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
290*0b57cec5SDimitry Andric     return false;
291*0b57cec5SDimitry Andric   }
292*0b57cec5SDimitry Andric 
293*0b57cec5SDimitry Andric   start_position.Remove();
294*0b57cec5SDimitry Andric   return true;
295*0b57cec5SDimitry Andric }
296*0b57cec5SDimitry Andric 
ConsumeBrackets(tok::TokenKind left,tok::TokenKind right)297*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
298*0b57cec5SDimitry Andric                                           tok::TokenKind right) {
299*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
300*0b57cec5SDimitry Andric   if (!HasMoreTokens() || Peek().getKind() != left)
301*0b57cec5SDimitry Andric     return false;
302*0b57cec5SDimitry Andric   Advance();
303*0b57cec5SDimitry Andric 
304*0b57cec5SDimitry Andric   int counter = 1;
305*0b57cec5SDimitry Andric   while (HasMoreTokens() && counter > 0) {
306*0b57cec5SDimitry Andric     tok::TokenKind kind = Peek().getKind();
307*0b57cec5SDimitry Andric     if (kind == right)
308*0b57cec5SDimitry Andric       --counter;
309*0b57cec5SDimitry Andric     else if (kind == left)
310*0b57cec5SDimitry Andric       ++counter;
311*0b57cec5SDimitry Andric     Advance();
312*0b57cec5SDimitry Andric   }
313*0b57cec5SDimitry Andric 
314*0b57cec5SDimitry Andric   assert(counter >= 0);
315*0b57cec5SDimitry Andric   if (counter > 0) {
316*0b57cec5SDimitry Andric     return false;
317*0b57cec5SDimitry Andric   }
318*0b57cec5SDimitry Andric   start_position.Remove();
319*0b57cec5SDimitry Andric   return true;
320*0b57cec5SDimitry Andric }
321*0b57cec5SDimitry Andric 
ConsumeOperator()322*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeOperator() {
323*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
324*0b57cec5SDimitry Andric   if (!ConsumeToken(tok::kw_operator))
325*0b57cec5SDimitry Andric     return false;
326*0b57cec5SDimitry Andric 
327*0b57cec5SDimitry Andric   if (!HasMoreTokens()) {
328*0b57cec5SDimitry Andric     return false;
329*0b57cec5SDimitry Andric   }
330*0b57cec5SDimitry Andric 
331*0b57cec5SDimitry Andric   const auto &token = Peek();
332*0b57cec5SDimitry Andric 
333*0b57cec5SDimitry Andric   // When clang generates debug info it adds template parameters to names.
334*0b57cec5SDimitry Andric   // Since clang doesn't add a space between the name and the template parameter
335*0b57cec5SDimitry Andric   // in some cases we are not generating valid C++ names e.g.:
336*0b57cec5SDimitry Andric   //
337*0b57cec5SDimitry Andric   //   operator<<A::B>
338*0b57cec5SDimitry Andric   //
339*0b57cec5SDimitry Andric   // In some of these cases we will not parse them correctly. This fixes the
340*0b57cec5SDimitry Andric   // issue by detecting this case and inserting tok::less in place of
341*0b57cec5SDimitry Andric   // tok::lessless and returning successfully that we consumed the operator.
342*0b57cec5SDimitry Andric   if (token.getKind() == tok::lessless) {
343*0b57cec5SDimitry Andric     // Make sure we have more tokens before attempting to look ahead one more.
344*0b57cec5SDimitry Andric     if (m_next_token_index + 1 < m_tokens.size()) {
345*0b57cec5SDimitry Andric       // Look ahead two tokens.
346*0b57cec5SDimitry Andric       clang::Token n_token = m_tokens[m_next_token_index + 1];
347*0b57cec5SDimitry Andric       // If we find ( or < then this is indeed operator<< no need for fix.
348*0b57cec5SDimitry Andric       if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
349*0b57cec5SDimitry Andric         clang::Token tmp_tok;
350*0b57cec5SDimitry Andric         tmp_tok.startToken();
351*0b57cec5SDimitry Andric         tmp_tok.setLength(1);
352*0b57cec5SDimitry Andric         tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
353*0b57cec5SDimitry Andric         tmp_tok.setKind(tok::less);
354*0b57cec5SDimitry Andric 
355*0b57cec5SDimitry Andric         m_tokens[m_next_token_index] = tmp_tok;
356*0b57cec5SDimitry Andric 
357*0b57cec5SDimitry Andric         start_position.Remove();
358*0b57cec5SDimitry Andric         return true;
359*0b57cec5SDimitry Andric       }
360*0b57cec5SDimitry Andric     }
361*0b57cec5SDimitry Andric   }
362*0b57cec5SDimitry Andric 
363*0b57cec5SDimitry Andric   switch (token.getKind()) {
364*0b57cec5SDimitry Andric   case tok::kw_new:
365*0b57cec5SDimitry Andric   case tok::kw_delete:
366*0b57cec5SDimitry Andric     // This is 'new' or 'delete' operators.
367*0b57cec5SDimitry Andric     Advance();
368*0b57cec5SDimitry Andric     // Check for array new/delete.
369*0b57cec5SDimitry Andric     if (HasMoreTokens() && Peek().is(tok::l_square)) {
370*0b57cec5SDimitry Andric       // Consume the '[' and ']'.
371*0b57cec5SDimitry Andric       if (!ConsumeBrackets(tok::l_square, tok::r_square))
372*0b57cec5SDimitry Andric         return false;
373*0b57cec5SDimitry Andric     }
374*0b57cec5SDimitry Andric     break;
375*0b57cec5SDimitry Andric 
376*0b57cec5SDimitry Andric #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
377*0b57cec5SDimitry Andric   case tok::Token:                                                             \
378*0b57cec5SDimitry Andric     Advance();                                                                 \
379*0b57cec5SDimitry Andric     break;
380*0b57cec5SDimitry Andric #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
381*0b57cec5SDimitry Andric #include "clang/Basic/OperatorKinds.def"
382*0b57cec5SDimitry Andric #undef OVERLOADED_OPERATOR
383*0b57cec5SDimitry Andric #undef OVERLOADED_OPERATOR_MULTI
384*0b57cec5SDimitry Andric 
385*0b57cec5SDimitry Andric   case tok::l_paren:
386*0b57cec5SDimitry Andric     // Call operator consume '(' ... ')'.
387*0b57cec5SDimitry Andric     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
388*0b57cec5SDimitry Andric       break;
389*0b57cec5SDimitry Andric     return false;
390*0b57cec5SDimitry Andric 
391*0b57cec5SDimitry Andric   case tok::l_square:
392*0b57cec5SDimitry Andric     // This is a [] operator.
393*0b57cec5SDimitry Andric     // Consume the '[' and ']'.
394*0b57cec5SDimitry Andric     if (ConsumeBrackets(tok::l_square, tok::r_square))
395*0b57cec5SDimitry Andric       break;
396*0b57cec5SDimitry Andric     return false;
397*0b57cec5SDimitry Andric 
398*0b57cec5SDimitry Andric   default:
399*0b57cec5SDimitry Andric     // This might be a cast operator.
400*0b57cec5SDimitry Andric     if (ConsumeTypename())
401*0b57cec5SDimitry Andric       break;
402*0b57cec5SDimitry Andric     return false;
403*0b57cec5SDimitry Andric   }
404*0b57cec5SDimitry Andric   start_position.Remove();
405*0b57cec5SDimitry Andric   return true;
406*0b57cec5SDimitry Andric }
407*0b57cec5SDimitry Andric 
SkipTypeQualifiers()408*0b57cec5SDimitry Andric void CPlusPlusNameParser::SkipTypeQualifiers() {
409*0b57cec5SDimitry Andric   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
410*0b57cec5SDimitry Andric     ;
411*0b57cec5SDimitry Andric }
412*0b57cec5SDimitry Andric 
SkipFunctionQualifiers()413*0b57cec5SDimitry Andric void CPlusPlusNameParser::SkipFunctionQualifiers() {
414*0b57cec5SDimitry Andric   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
415*0b57cec5SDimitry Andric     ;
416*0b57cec5SDimitry Andric }
417*0b57cec5SDimitry Andric 
ConsumeBuiltinType()418*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeBuiltinType() {
419*0b57cec5SDimitry Andric   bool result = false;
420*0b57cec5SDimitry Andric   bool continue_parsing = true;
421*0b57cec5SDimitry Andric   // Built-in types can be made of a few keywords like 'unsigned long long
422*0b57cec5SDimitry Andric   // int'. This function consumes all built-in type keywords without checking
423*0b57cec5SDimitry Andric   // if they make sense like 'unsigned char void'.
424*0b57cec5SDimitry Andric   while (continue_parsing && HasMoreTokens()) {
425*0b57cec5SDimitry Andric     switch (Peek().getKind()) {
426*0b57cec5SDimitry Andric     case tok::kw_short:
427*0b57cec5SDimitry Andric     case tok::kw_long:
428*0b57cec5SDimitry Andric     case tok::kw___int64:
429*0b57cec5SDimitry Andric     case tok::kw___int128:
430*0b57cec5SDimitry Andric     case tok::kw_signed:
431*0b57cec5SDimitry Andric     case tok::kw_unsigned:
432*0b57cec5SDimitry Andric     case tok::kw_void:
433*0b57cec5SDimitry Andric     case tok::kw_char:
434*0b57cec5SDimitry Andric     case tok::kw_int:
435*0b57cec5SDimitry Andric     case tok::kw_half:
436*0b57cec5SDimitry Andric     case tok::kw_float:
437*0b57cec5SDimitry Andric     case tok::kw_double:
438*0b57cec5SDimitry Andric     case tok::kw___float128:
439*0b57cec5SDimitry Andric     case tok::kw_wchar_t:
440*0b57cec5SDimitry Andric     case tok::kw_bool:
441*0b57cec5SDimitry Andric     case tok::kw_char16_t:
442*0b57cec5SDimitry Andric     case tok::kw_char32_t:
443*0b57cec5SDimitry Andric       result = true;
444*0b57cec5SDimitry Andric       Advance();
445*0b57cec5SDimitry Andric       break;
446*0b57cec5SDimitry Andric     default:
447*0b57cec5SDimitry Andric       continue_parsing = false;
448*0b57cec5SDimitry Andric       break;
449*0b57cec5SDimitry Andric     }
450*0b57cec5SDimitry Andric   }
451*0b57cec5SDimitry Andric   return result;
452*0b57cec5SDimitry Andric }
453*0b57cec5SDimitry Andric 
SkipPtrsAndRefs()454*0b57cec5SDimitry Andric void CPlusPlusNameParser::SkipPtrsAndRefs() {
455*0b57cec5SDimitry Andric   // Ignoring result.
456*0b57cec5SDimitry Andric   ConsumePtrsAndRefs();
457*0b57cec5SDimitry Andric }
458*0b57cec5SDimitry Andric 
ConsumePtrsAndRefs()459*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
460*0b57cec5SDimitry Andric   bool found = false;
461*0b57cec5SDimitry Andric   SkipTypeQualifiers();
462*0b57cec5SDimitry Andric   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
463*0b57cec5SDimitry Andric                       tok::kw_volatile)) {
464*0b57cec5SDimitry Andric     found = true;
465*0b57cec5SDimitry Andric     SkipTypeQualifiers();
466*0b57cec5SDimitry Andric   }
467*0b57cec5SDimitry Andric   return found;
468*0b57cec5SDimitry Andric }
469*0b57cec5SDimitry Andric 
ConsumeDecltype()470*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeDecltype() {
471*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
472*0b57cec5SDimitry Andric   if (!ConsumeToken(tok::kw_decltype))
473*0b57cec5SDimitry Andric     return false;
474*0b57cec5SDimitry Andric 
475*0b57cec5SDimitry Andric   if (!ConsumeArguments())
476*0b57cec5SDimitry Andric     return false;
477*0b57cec5SDimitry Andric 
478*0b57cec5SDimitry Andric   start_position.Remove();
479*0b57cec5SDimitry Andric   return true;
480*0b57cec5SDimitry Andric }
481*0b57cec5SDimitry Andric 
ConsumeTypename()482*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeTypename() {
483*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
484*0b57cec5SDimitry Andric   SkipTypeQualifiers();
485*0b57cec5SDimitry Andric   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
486*0b57cec5SDimitry Andric     if (!ParseFullNameImpl())
487*0b57cec5SDimitry Andric       return false;
488*0b57cec5SDimitry Andric   }
489*0b57cec5SDimitry Andric   SkipPtrsAndRefs();
490*0b57cec5SDimitry Andric   start_position.Remove();
491*0b57cec5SDimitry Andric   return true;
492*0b57cec5SDimitry Andric }
493*0b57cec5SDimitry Andric 
494*0b57cec5SDimitry Andric Optional<CPlusPlusNameParser::ParsedNameRanges>
ParseFullNameImpl()495*0b57cec5SDimitry Andric CPlusPlusNameParser::ParseFullNameImpl() {
496*0b57cec5SDimitry Andric   // Name parsing state machine.
497*0b57cec5SDimitry Andric   enum class State {
498*0b57cec5SDimitry Andric     Beginning,       // start of the name
499*0b57cec5SDimitry Andric     AfterTwoColons,  // right after ::
500*0b57cec5SDimitry Andric     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
501*0b57cec5SDimitry Andric     AfterTemplate,   // right after template brackets (<something>)
502*0b57cec5SDimitry Andric     AfterOperator,   // right after name of C++ operator
503*0b57cec5SDimitry Andric   };
504*0b57cec5SDimitry Andric 
505*0b57cec5SDimitry Andric   Bookmark start_position = SetBookmark();
506*0b57cec5SDimitry Andric   State state = State::Beginning;
507*0b57cec5SDimitry Andric   bool continue_parsing = true;
508*0b57cec5SDimitry Andric   Optional<size_t> last_coloncolon_position = None;
509*0b57cec5SDimitry Andric 
510*0b57cec5SDimitry Andric   while (continue_parsing && HasMoreTokens()) {
511*0b57cec5SDimitry Andric     const auto &token = Peek();
512*0b57cec5SDimitry Andric     switch (token.getKind()) {
513*0b57cec5SDimitry Andric     case tok::raw_identifier: // Just a name.
514*0b57cec5SDimitry Andric       if (state != State::Beginning && state != State::AfterTwoColons) {
515*0b57cec5SDimitry Andric         continue_parsing = false;
516*0b57cec5SDimitry Andric         break;
517*0b57cec5SDimitry Andric       }
518*0b57cec5SDimitry Andric       Advance();
519*0b57cec5SDimitry Andric       state = State::AfterIdentifier;
520*0b57cec5SDimitry Andric       break;
521*0b57cec5SDimitry Andric     case tok::l_paren: {
522*0b57cec5SDimitry Andric       if (state == State::Beginning || state == State::AfterTwoColons) {
523*0b57cec5SDimitry Andric         // (anonymous namespace)
524*0b57cec5SDimitry Andric         if (ConsumeAnonymousNamespace()) {
525*0b57cec5SDimitry Andric           state = State::AfterIdentifier;
526*0b57cec5SDimitry Andric           break;
527*0b57cec5SDimitry Andric         }
528*0b57cec5SDimitry Andric       }
529*0b57cec5SDimitry Andric 
530*0b57cec5SDimitry Andric       // Type declared inside a function 'func()::Type'
531*0b57cec5SDimitry Andric       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
532*0b57cec5SDimitry Andric           state != State::AfterOperator) {
533*0b57cec5SDimitry Andric         continue_parsing = false;
534*0b57cec5SDimitry Andric         break;
535*0b57cec5SDimitry Andric       }
536*0b57cec5SDimitry Andric       Bookmark l_paren_position = SetBookmark();
537*0b57cec5SDimitry Andric       // Consume the '(' ... ') [const]'.
538*0b57cec5SDimitry Andric       if (!ConsumeArguments()) {
539*0b57cec5SDimitry Andric         continue_parsing = false;
540*0b57cec5SDimitry Andric         break;
541*0b57cec5SDimitry Andric       }
542*0b57cec5SDimitry Andric       SkipFunctionQualifiers();
543*0b57cec5SDimitry Andric 
544*0b57cec5SDimitry Andric       // Consume '::'
545*0b57cec5SDimitry Andric       size_t coloncolon_position = GetCurrentPosition();
546*0b57cec5SDimitry Andric       if (!ConsumeToken(tok::coloncolon)) {
547*0b57cec5SDimitry Andric         continue_parsing = false;
548*0b57cec5SDimitry Andric         break;
549*0b57cec5SDimitry Andric       }
550*0b57cec5SDimitry Andric       l_paren_position.Remove();
551*0b57cec5SDimitry Andric       last_coloncolon_position = coloncolon_position;
552*0b57cec5SDimitry Andric       state = State::AfterTwoColons;
553*0b57cec5SDimitry Andric       break;
554*0b57cec5SDimitry Andric     }
555*0b57cec5SDimitry Andric     case tok::l_brace:
556*0b57cec5SDimitry Andric       if (state == State::Beginning || state == State::AfterTwoColons) {
557*0b57cec5SDimitry Andric         if (ConsumeLambda()) {
558*0b57cec5SDimitry Andric           state = State::AfterIdentifier;
559*0b57cec5SDimitry Andric           break;
560*0b57cec5SDimitry Andric         }
561*0b57cec5SDimitry Andric       }
562*0b57cec5SDimitry Andric       continue_parsing = false;
563*0b57cec5SDimitry Andric       break;
564*0b57cec5SDimitry Andric     case tok::coloncolon: // Type nesting delimiter.
565*0b57cec5SDimitry Andric       if (state != State::Beginning && state != State::AfterIdentifier &&
566*0b57cec5SDimitry Andric           state != State::AfterTemplate) {
567*0b57cec5SDimitry Andric         continue_parsing = false;
568*0b57cec5SDimitry Andric         break;
569*0b57cec5SDimitry Andric       }
570*0b57cec5SDimitry Andric       last_coloncolon_position = GetCurrentPosition();
571*0b57cec5SDimitry Andric       Advance();
572*0b57cec5SDimitry Andric       state = State::AfterTwoColons;
573*0b57cec5SDimitry Andric       break;
574*0b57cec5SDimitry Andric     case tok::less: // Template brackets.
575*0b57cec5SDimitry Andric       if (state != State::AfterIdentifier && state != State::AfterOperator) {
576*0b57cec5SDimitry Andric         continue_parsing = false;
577*0b57cec5SDimitry Andric         break;
578*0b57cec5SDimitry Andric       }
579*0b57cec5SDimitry Andric       if (!ConsumeTemplateArgs()) {
580*0b57cec5SDimitry Andric         continue_parsing = false;
581*0b57cec5SDimitry Andric         break;
582*0b57cec5SDimitry Andric       }
583*0b57cec5SDimitry Andric       state = State::AfterTemplate;
584*0b57cec5SDimitry Andric       break;
585*0b57cec5SDimitry Andric     case tok::kw_operator: // C++ operator overloading.
586*0b57cec5SDimitry Andric       if (state != State::Beginning && state != State::AfterTwoColons) {
587*0b57cec5SDimitry Andric         continue_parsing = false;
588*0b57cec5SDimitry Andric         break;
589*0b57cec5SDimitry Andric       }
590*0b57cec5SDimitry Andric       if (!ConsumeOperator()) {
591*0b57cec5SDimitry Andric         continue_parsing = false;
592*0b57cec5SDimitry Andric         break;
593*0b57cec5SDimitry Andric       }
594*0b57cec5SDimitry Andric       state = State::AfterOperator;
595*0b57cec5SDimitry Andric       break;
596*0b57cec5SDimitry Andric     case tok::tilde: // Destructor.
597*0b57cec5SDimitry Andric       if (state != State::Beginning && state != State::AfterTwoColons) {
598*0b57cec5SDimitry Andric         continue_parsing = false;
599*0b57cec5SDimitry Andric         break;
600*0b57cec5SDimitry Andric       }
601*0b57cec5SDimitry Andric       Advance();
602*0b57cec5SDimitry Andric       if (ConsumeToken(tok::raw_identifier)) {
603*0b57cec5SDimitry Andric         state = State::AfterIdentifier;
604*0b57cec5SDimitry Andric       } else {
605*0b57cec5SDimitry Andric         TakeBack();
606*0b57cec5SDimitry Andric         continue_parsing = false;
607*0b57cec5SDimitry Andric       }
608*0b57cec5SDimitry Andric       break;
609*0b57cec5SDimitry Andric     default:
610*0b57cec5SDimitry Andric       continue_parsing = false;
611*0b57cec5SDimitry Andric       break;
612*0b57cec5SDimitry Andric     }
613*0b57cec5SDimitry Andric   }
614*0b57cec5SDimitry Andric 
615*0b57cec5SDimitry Andric   if (state == State::AfterIdentifier || state == State::AfterOperator ||
616*0b57cec5SDimitry Andric       state == State::AfterTemplate) {
617*0b57cec5SDimitry Andric     ParsedNameRanges result;
618*0b57cec5SDimitry Andric     if (last_coloncolon_position) {
619*0b57cec5SDimitry Andric       result.context_range = Range(start_position.GetSavedPosition(),
620*0b57cec5SDimitry Andric                                    last_coloncolon_position.getValue());
621*0b57cec5SDimitry Andric       result.basename_range =
622*0b57cec5SDimitry Andric           Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
623*0b57cec5SDimitry Andric     } else {
624*0b57cec5SDimitry Andric       result.basename_range =
625*0b57cec5SDimitry Andric           Range(start_position.GetSavedPosition(), GetCurrentPosition());
626*0b57cec5SDimitry Andric     }
627*0b57cec5SDimitry Andric     start_position.Remove();
628*0b57cec5SDimitry Andric     return result;
629*0b57cec5SDimitry Andric   } else {
630*0b57cec5SDimitry Andric     return None;
631*0b57cec5SDimitry Andric   }
632*0b57cec5SDimitry Andric }
633*0b57cec5SDimitry Andric 
GetTextForRange(const Range & range)634*0b57cec5SDimitry Andric llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
635*0b57cec5SDimitry Andric   if (range.empty())
636*0b57cec5SDimitry Andric     return llvm::StringRef();
637*0b57cec5SDimitry Andric   assert(range.begin_index < range.end_index);
638*0b57cec5SDimitry Andric   assert(range.begin_index < m_tokens.size());
639*0b57cec5SDimitry Andric   assert(range.end_index <= m_tokens.size());
640*0b57cec5SDimitry Andric   clang::Token &first_token = m_tokens[range.begin_index];
641*0b57cec5SDimitry Andric   clang::Token &last_token = m_tokens[range.end_index - 1];
642*0b57cec5SDimitry Andric   clang::SourceLocation start_loc = first_token.getLocation();
643*0b57cec5SDimitry Andric   clang::SourceLocation end_loc = last_token.getLocation();
644*0b57cec5SDimitry Andric   unsigned start_pos = start_loc.getRawEncoding();
645*0b57cec5SDimitry Andric   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
646*0b57cec5SDimitry Andric   return m_text.take_front(end_pos).drop_front(start_pos);
647*0b57cec5SDimitry Andric }
648*0b57cec5SDimitry Andric 
GetLangOptions()649*0b57cec5SDimitry Andric static const clang::LangOptions &GetLangOptions() {
650*0b57cec5SDimitry Andric   static clang::LangOptions g_options;
651*0b57cec5SDimitry Andric   static llvm::once_flag g_once_flag;
652*0b57cec5SDimitry Andric   llvm::call_once(g_once_flag, []() {
653*0b57cec5SDimitry Andric     g_options.LineComment = true;
654*0b57cec5SDimitry Andric     g_options.C99 = true;
655*0b57cec5SDimitry Andric     g_options.C11 = true;
656*0b57cec5SDimitry Andric     g_options.CPlusPlus = true;
657*0b57cec5SDimitry Andric     g_options.CPlusPlus11 = true;
658*0b57cec5SDimitry Andric     g_options.CPlusPlus14 = true;
659*0b57cec5SDimitry Andric     g_options.CPlusPlus17 = true;
660*0b57cec5SDimitry Andric   });
661   return g_options;
662 }
663 
GetKeywordsMap()664 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
665   static llvm::StringMap<tok::TokenKind> g_map{
666 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
667 #include "clang/Basic/TokenKinds.def"
668 #undef KEYWORD
669   };
670   return g_map;
671 }
672 
ExtractTokens()673 void CPlusPlusNameParser::ExtractTokens() {
674   if (m_text.empty())
675     return;
676   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
677                      m_text.data(), m_text.data() + m_text.size());
678   const auto &kw_map = GetKeywordsMap();
679   clang::Token token;
680   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
681        lexer.LexFromRawLexer(token)) {
682     if (token.is(clang::tok::raw_identifier)) {
683       auto it = kw_map.find(token.getRawIdentifier());
684       if (it != kw_map.end()) {
685         token.setKind(it->getValue());
686       }
687     }
688 
689     m_tokens.push_back(token);
690   }
691 }
692