1*0b57cec5SDimitry Andric //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric
9*0b57cec5SDimitry Andric #include "CPlusPlusNameParser.h"
10*0b57cec5SDimitry Andric
11*0b57cec5SDimitry Andric #include "clang/Basic/IdentifierTable.h"
12*0b57cec5SDimitry Andric #include "llvm/ADT/StringMap.h"
13*0b57cec5SDimitry Andric #include "llvm/Support/Threading.h"
14*0b57cec5SDimitry Andric
15*0b57cec5SDimitry Andric using namespace lldb;
16*0b57cec5SDimitry Andric using namespace lldb_private;
17*0b57cec5SDimitry Andric using llvm::Optional;
18*0b57cec5SDimitry Andric using llvm::None;
19*0b57cec5SDimitry Andric using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20*0b57cec5SDimitry Andric using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21*0b57cec5SDimitry Andric namespace tok = clang::tok;
22*0b57cec5SDimitry Andric
ParseAsFunctionDefinition()23*0b57cec5SDimitry Andric Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24*0b57cec5SDimitry Andric m_next_token_index = 0;
25*0b57cec5SDimitry Andric Optional<ParsedFunction> result(None);
26*0b57cec5SDimitry Andric
27*0b57cec5SDimitry Andric // Try to parse the name as function without a return type specified e.g.
28*0b57cec5SDimitry Andric // main(int, char*[])
29*0b57cec5SDimitry Andric {
30*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
31*0b57cec5SDimitry Andric result = ParseFunctionImpl(false);
32*0b57cec5SDimitry Andric if (result && !HasMoreTokens())
33*0b57cec5SDimitry Andric return result;
34*0b57cec5SDimitry Andric }
35*0b57cec5SDimitry Andric
36*0b57cec5SDimitry Andric // Try to parse the name as function with function pointer return type e.g.
37*0b57cec5SDimitry Andric // void (*get_func(const char*))()
38*0b57cec5SDimitry Andric result = ParseFuncPtr(true);
39*0b57cec5SDimitry Andric if (result)
40*0b57cec5SDimitry Andric return result;
41*0b57cec5SDimitry Andric
42*0b57cec5SDimitry Andric // Finally try to parse the name as a function with non-function return type
43*0b57cec5SDimitry Andric // e.g. int main(int, char*[])
44*0b57cec5SDimitry Andric result = ParseFunctionImpl(true);
45*0b57cec5SDimitry Andric if (HasMoreTokens())
46*0b57cec5SDimitry Andric return None;
47*0b57cec5SDimitry Andric return result;
48*0b57cec5SDimitry Andric }
49*0b57cec5SDimitry Andric
ParseAsFullName()50*0b57cec5SDimitry Andric Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51*0b57cec5SDimitry Andric m_next_token_index = 0;
52*0b57cec5SDimitry Andric Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53*0b57cec5SDimitry Andric if (!name_ranges)
54*0b57cec5SDimitry Andric return None;
55*0b57cec5SDimitry Andric if (HasMoreTokens())
56*0b57cec5SDimitry Andric return None;
57*0b57cec5SDimitry Andric ParsedName result;
58*0b57cec5SDimitry Andric result.basename = GetTextForRange(name_ranges.getValue().basename_range);
59*0b57cec5SDimitry Andric result.context = GetTextForRange(name_ranges.getValue().context_range);
60*0b57cec5SDimitry Andric return result;
61*0b57cec5SDimitry Andric }
62*0b57cec5SDimitry Andric
HasMoreTokens()63*0b57cec5SDimitry Andric bool CPlusPlusNameParser::HasMoreTokens() {
64*0b57cec5SDimitry Andric return m_next_token_index < m_tokens.size();
65*0b57cec5SDimitry Andric }
66*0b57cec5SDimitry Andric
Advance()67*0b57cec5SDimitry Andric void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68*0b57cec5SDimitry Andric
TakeBack()69*0b57cec5SDimitry Andric void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70*0b57cec5SDimitry Andric
ConsumeToken(tok::TokenKind kind)71*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72*0b57cec5SDimitry Andric if (!HasMoreTokens())
73*0b57cec5SDimitry Andric return false;
74*0b57cec5SDimitry Andric
75*0b57cec5SDimitry Andric if (!Peek().is(kind))
76*0b57cec5SDimitry Andric return false;
77*0b57cec5SDimitry Andric
78*0b57cec5SDimitry Andric Advance();
79*0b57cec5SDimitry Andric return true;
80*0b57cec5SDimitry Andric }
81*0b57cec5SDimitry Andric
ConsumeToken(Ts...kinds)82*0b57cec5SDimitry Andric template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83*0b57cec5SDimitry Andric if (!HasMoreTokens())
84*0b57cec5SDimitry Andric return false;
85*0b57cec5SDimitry Andric
86*0b57cec5SDimitry Andric if (!Peek().isOneOf(kinds...))
87*0b57cec5SDimitry Andric return false;
88*0b57cec5SDimitry Andric
89*0b57cec5SDimitry Andric Advance();
90*0b57cec5SDimitry Andric return true;
91*0b57cec5SDimitry Andric }
92*0b57cec5SDimitry Andric
SetBookmark()93*0b57cec5SDimitry Andric CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94*0b57cec5SDimitry Andric return Bookmark(m_next_token_index);
95*0b57cec5SDimitry Andric }
96*0b57cec5SDimitry Andric
GetCurrentPosition()97*0b57cec5SDimitry Andric size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98*0b57cec5SDimitry Andric
Peek()99*0b57cec5SDimitry Andric clang::Token &CPlusPlusNameParser::Peek() {
100*0b57cec5SDimitry Andric assert(HasMoreTokens());
101*0b57cec5SDimitry Andric return m_tokens[m_next_token_index];
102*0b57cec5SDimitry Andric }
103*0b57cec5SDimitry Andric
104*0b57cec5SDimitry Andric Optional<ParsedFunction>
ParseFunctionImpl(bool expect_return_type)105*0b57cec5SDimitry Andric CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
107*0b57cec5SDimitry Andric if (expect_return_type) {
108*0b57cec5SDimitry Andric // Consume return type if it's expected.
109*0b57cec5SDimitry Andric if (!ConsumeTypename())
110*0b57cec5SDimitry Andric return None;
111*0b57cec5SDimitry Andric }
112*0b57cec5SDimitry Andric
113*0b57cec5SDimitry Andric auto maybe_name = ParseFullNameImpl();
114*0b57cec5SDimitry Andric if (!maybe_name) {
115*0b57cec5SDimitry Andric return None;
116*0b57cec5SDimitry Andric }
117*0b57cec5SDimitry Andric
118*0b57cec5SDimitry Andric size_t argument_start = GetCurrentPosition();
119*0b57cec5SDimitry Andric if (!ConsumeArguments()) {
120*0b57cec5SDimitry Andric return None;
121*0b57cec5SDimitry Andric }
122*0b57cec5SDimitry Andric
123*0b57cec5SDimitry Andric size_t qualifiers_start = GetCurrentPosition();
124*0b57cec5SDimitry Andric SkipFunctionQualifiers();
125*0b57cec5SDimitry Andric size_t end_position = GetCurrentPosition();
126*0b57cec5SDimitry Andric
127*0b57cec5SDimitry Andric ParsedFunction result;
128*0b57cec5SDimitry Andric result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
129*0b57cec5SDimitry Andric result.name.context = GetTextForRange(maybe_name.getValue().context_range);
130*0b57cec5SDimitry Andric result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
131*0b57cec5SDimitry Andric result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
132*0b57cec5SDimitry Andric start_position.Remove();
133*0b57cec5SDimitry Andric return result;
134*0b57cec5SDimitry Andric }
135*0b57cec5SDimitry Andric
136*0b57cec5SDimitry Andric Optional<ParsedFunction>
ParseFuncPtr(bool expect_return_type)137*0b57cec5SDimitry Andric CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
138*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
139*0b57cec5SDimitry Andric if (expect_return_type) {
140*0b57cec5SDimitry Andric // Consume return type.
141*0b57cec5SDimitry Andric if (!ConsumeTypename())
142*0b57cec5SDimitry Andric return None;
143*0b57cec5SDimitry Andric }
144*0b57cec5SDimitry Andric
145*0b57cec5SDimitry Andric if (!ConsumeToken(tok::l_paren))
146*0b57cec5SDimitry Andric return None;
147*0b57cec5SDimitry Andric if (!ConsumePtrsAndRefs())
148*0b57cec5SDimitry Andric return None;
149*0b57cec5SDimitry Andric
150*0b57cec5SDimitry Andric {
151*0b57cec5SDimitry Andric Bookmark before_inner_function_pos = SetBookmark();
152*0b57cec5SDimitry Andric auto maybe_inner_function_name = ParseFunctionImpl(false);
153*0b57cec5SDimitry Andric if (maybe_inner_function_name)
154*0b57cec5SDimitry Andric if (ConsumeToken(tok::r_paren))
155*0b57cec5SDimitry Andric if (ConsumeArguments()) {
156*0b57cec5SDimitry Andric SkipFunctionQualifiers();
157*0b57cec5SDimitry Andric start_position.Remove();
158*0b57cec5SDimitry Andric before_inner_function_pos.Remove();
159*0b57cec5SDimitry Andric return maybe_inner_function_name;
160*0b57cec5SDimitry Andric }
161*0b57cec5SDimitry Andric }
162*0b57cec5SDimitry Andric
163*0b57cec5SDimitry Andric auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
164*0b57cec5SDimitry Andric if (maybe_inner_function_ptr_name)
165*0b57cec5SDimitry Andric if (ConsumeToken(tok::r_paren))
166*0b57cec5SDimitry Andric if (ConsumeArguments()) {
167*0b57cec5SDimitry Andric SkipFunctionQualifiers();
168*0b57cec5SDimitry Andric start_position.Remove();
169*0b57cec5SDimitry Andric return maybe_inner_function_ptr_name;
170*0b57cec5SDimitry Andric }
171*0b57cec5SDimitry Andric return None;
172*0b57cec5SDimitry Andric }
173*0b57cec5SDimitry Andric
ConsumeArguments()174*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeArguments() {
175*0b57cec5SDimitry Andric return ConsumeBrackets(tok::l_paren, tok::r_paren);
176*0b57cec5SDimitry Andric }
177*0b57cec5SDimitry Andric
ConsumeTemplateArgs()178*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeTemplateArgs() {
179*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
180*0b57cec5SDimitry Andric if (!HasMoreTokens() || Peek().getKind() != tok::less)
181*0b57cec5SDimitry Andric return false;
182*0b57cec5SDimitry Andric Advance();
183*0b57cec5SDimitry Andric
184*0b57cec5SDimitry Andric // Consuming template arguments is a bit trickier than consuming function
185*0b57cec5SDimitry Andric // arguments, because '<' '>' brackets are not always trivially balanced. In
186*0b57cec5SDimitry Andric // some rare cases tokens '<' and '>' can appear inside template arguments as
187*0b57cec5SDimitry Andric // arithmetic or shift operators not as template brackets. Examples:
188*0b57cec5SDimitry Andric // std::enable_if<(10u)<(64), bool>
189*0b57cec5SDimitry Andric // f<A<operator<(X,Y)::Subclass>>
190*0b57cec5SDimitry Andric // Good thing that compiler makes sure that really ambiguous cases of '>'
191*0b57cec5SDimitry Andric // usage should be enclosed within '()' brackets.
192*0b57cec5SDimitry Andric int template_counter = 1;
193*0b57cec5SDimitry Andric bool can_open_template = false;
194*0b57cec5SDimitry Andric while (HasMoreTokens() && template_counter > 0) {
195*0b57cec5SDimitry Andric tok::TokenKind kind = Peek().getKind();
196*0b57cec5SDimitry Andric switch (kind) {
197*0b57cec5SDimitry Andric case tok::greatergreater:
198*0b57cec5SDimitry Andric template_counter -= 2;
199*0b57cec5SDimitry Andric can_open_template = false;
200*0b57cec5SDimitry Andric Advance();
201*0b57cec5SDimitry Andric break;
202*0b57cec5SDimitry Andric case tok::greater:
203*0b57cec5SDimitry Andric --template_counter;
204*0b57cec5SDimitry Andric can_open_template = false;
205*0b57cec5SDimitry Andric Advance();
206*0b57cec5SDimitry Andric break;
207*0b57cec5SDimitry Andric case tok::less:
208*0b57cec5SDimitry Andric // '<' is an attempt to open a subteamplte
209*0b57cec5SDimitry Andric // check if parser is at the point where it's actually possible,
210*0b57cec5SDimitry Andric // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
211*0b57cec5SDimitry Andric // need to do the same for '>' because compiler actually makes sure that
212*0b57cec5SDimitry Andric // '>' always surrounded by brackets to avoid ambiguity.
213*0b57cec5SDimitry Andric if (can_open_template)
214*0b57cec5SDimitry Andric ++template_counter;
215*0b57cec5SDimitry Andric can_open_template = false;
216*0b57cec5SDimitry Andric Advance();
217*0b57cec5SDimitry Andric break;
218*0b57cec5SDimitry Andric case tok::kw_operator: // C++ operator overloading.
219*0b57cec5SDimitry Andric if (!ConsumeOperator())
220*0b57cec5SDimitry Andric return false;
221*0b57cec5SDimitry Andric can_open_template = true;
222*0b57cec5SDimitry Andric break;
223*0b57cec5SDimitry Andric case tok::raw_identifier:
224*0b57cec5SDimitry Andric can_open_template = true;
225*0b57cec5SDimitry Andric Advance();
226*0b57cec5SDimitry Andric break;
227*0b57cec5SDimitry Andric case tok::l_square:
228*0b57cec5SDimitry Andric if (!ConsumeBrackets(tok::l_square, tok::r_square))
229*0b57cec5SDimitry Andric return false;
230*0b57cec5SDimitry Andric can_open_template = false;
231*0b57cec5SDimitry Andric break;
232*0b57cec5SDimitry Andric case tok::l_paren:
233*0b57cec5SDimitry Andric if (!ConsumeArguments())
234*0b57cec5SDimitry Andric return false;
235*0b57cec5SDimitry Andric can_open_template = false;
236*0b57cec5SDimitry Andric break;
237*0b57cec5SDimitry Andric default:
238*0b57cec5SDimitry Andric can_open_template = false;
239*0b57cec5SDimitry Andric Advance();
240*0b57cec5SDimitry Andric break;
241*0b57cec5SDimitry Andric }
242*0b57cec5SDimitry Andric }
243*0b57cec5SDimitry Andric
244*0b57cec5SDimitry Andric if (template_counter != 0) {
245*0b57cec5SDimitry Andric return false;
246*0b57cec5SDimitry Andric }
247*0b57cec5SDimitry Andric start_position.Remove();
248*0b57cec5SDimitry Andric return true;
249*0b57cec5SDimitry Andric }
250*0b57cec5SDimitry Andric
ConsumeAnonymousNamespace()251*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
252*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
253*0b57cec5SDimitry Andric if (!ConsumeToken(tok::l_paren)) {
254*0b57cec5SDimitry Andric return false;
255*0b57cec5SDimitry Andric }
256*0b57cec5SDimitry Andric constexpr llvm::StringLiteral g_anonymous("anonymous");
257*0b57cec5SDimitry Andric if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
258*0b57cec5SDimitry Andric Peek().getRawIdentifier() == g_anonymous) {
259*0b57cec5SDimitry Andric Advance();
260*0b57cec5SDimitry Andric } else {
261*0b57cec5SDimitry Andric return false;
262*0b57cec5SDimitry Andric }
263*0b57cec5SDimitry Andric
264*0b57cec5SDimitry Andric if (!ConsumeToken(tok::kw_namespace)) {
265*0b57cec5SDimitry Andric return false;
266*0b57cec5SDimitry Andric }
267*0b57cec5SDimitry Andric
268*0b57cec5SDimitry Andric if (!ConsumeToken(tok::r_paren)) {
269*0b57cec5SDimitry Andric return false;
270*0b57cec5SDimitry Andric }
271*0b57cec5SDimitry Andric start_position.Remove();
272*0b57cec5SDimitry Andric return true;
273*0b57cec5SDimitry Andric }
274*0b57cec5SDimitry Andric
ConsumeLambda()275*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeLambda() {
276*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
277*0b57cec5SDimitry Andric if (!ConsumeToken(tok::l_brace)) {
278*0b57cec5SDimitry Andric return false;
279*0b57cec5SDimitry Andric }
280*0b57cec5SDimitry Andric constexpr llvm::StringLiteral g_lambda("lambda");
281*0b57cec5SDimitry Andric if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
282*0b57cec5SDimitry Andric Peek().getRawIdentifier() == g_lambda) {
283*0b57cec5SDimitry Andric // Put the matched brace back so we can use ConsumeBrackets
284*0b57cec5SDimitry Andric TakeBack();
285*0b57cec5SDimitry Andric } else {
286*0b57cec5SDimitry Andric return false;
287*0b57cec5SDimitry Andric }
288*0b57cec5SDimitry Andric
289*0b57cec5SDimitry Andric if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
290*0b57cec5SDimitry Andric return false;
291*0b57cec5SDimitry Andric }
292*0b57cec5SDimitry Andric
293*0b57cec5SDimitry Andric start_position.Remove();
294*0b57cec5SDimitry Andric return true;
295*0b57cec5SDimitry Andric }
296*0b57cec5SDimitry Andric
ConsumeBrackets(tok::TokenKind left,tok::TokenKind right)297*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
298*0b57cec5SDimitry Andric tok::TokenKind right) {
299*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
300*0b57cec5SDimitry Andric if (!HasMoreTokens() || Peek().getKind() != left)
301*0b57cec5SDimitry Andric return false;
302*0b57cec5SDimitry Andric Advance();
303*0b57cec5SDimitry Andric
304*0b57cec5SDimitry Andric int counter = 1;
305*0b57cec5SDimitry Andric while (HasMoreTokens() && counter > 0) {
306*0b57cec5SDimitry Andric tok::TokenKind kind = Peek().getKind();
307*0b57cec5SDimitry Andric if (kind == right)
308*0b57cec5SDimitry Andric --counter;
309*0b57cec5SDimitry Andric else if (kind == left)
310*0b57cec5SDimitry Andric ++counter;
311*0b57cec5SDimitry Andric Advance();
312*0b57cec5SDimitry Andric }
313*0b57cec5SDimitry Andric
314*0b57cec5SDimitry Andric assert(counter >= 0);
315*0b57cec5SDimitry Andric if (counter > 0) {
316*0b57cec5SDimitry Andric return false;
317*0b57cec5SDimitry Andric }
318*0b57cec5SDimitry Andric start_position.Remove();
319*0b57cec5SDimitry Andric return true;
320*0b57cec5SDimitry Andric }
321*0b57cec5SDimitry Andric
ConsumeOperator()322*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeOperator() {
323*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
324*0b57cec5SDimitry Andric if (!ConsumeToken(tok::kw_operator))
325*0b57cec5SDimitry Andric return false;
326*0b57cec5SDimitry Andric
327*0b57cec5SDimitry Andric if (!HasMoreTokens()) {
328*0b57cec5SDimitry Andric return false;
329*0b57cec5SDimitry Andric }
330*0b57cec5SDimitry Andric
331*0b57cec5SDimitry Andric const auto &token = Peek();
332*0b57cec5SDimitry Andric
333*0b57cec5SDimitry Andric // When clang generates debug info it adds template parameters to names.
334*0b57cec5SDimitry Andric // Since clang doesn't add a space between the name and the template parameter
335*0b57cec5SDimitry Andric // in some cases we are not generating valid C++ names e.g.:
336*0b57cec5SDimitry Andric //
337*0b57cec5SDimitry Andric // operator<<A::B>
338*0b57cec5SDimitry Andric //
339*0b57cec5SDimitry Andric // In some of these cases we will not parse them correctly. This fixes the
340*0b57cec5SDimitry Andric // issue by detecting this case and inserting tok::less in place of
341*0b57cec5SDimitry Andric // tok::lessless and returning successfully that we consumed the operator.
342*0b57cec5SDimitry Andric if (token.getKind() == tok::lessless) {
343*0b57cec5SDimitry Andric // Make sure we have more tokens before attempting to look ahead one more.
344*0b57cec5SDimitry Andric if (m_next_token_index + 1 < m_tokens.size()) {
345*0b57cec5SDimitry Andric // Look ahead two tokens.
346*0b57cec5SDimitry Andric clang::Token n_token = m_tokens[m_next_token_index + 1];
347*0b57cec5SDimitry Andric // If we find ( or < then this is indeed operator<< no need for fix.
348*0b57cec5SDimitry Andric if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
349*0b57cec5SDimitry Andric clang::Token tmp_tok;
350*0b57cec5SDimitry Andric tmp_tok.startToken();
351*0b57cec5SDimitry Andric tmp_tok.setLength(1);
352*0b57cec5SDimitry Andric tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
353*0b57cec5SDimitry Andric tmp_tok.setKind(tok::less);
354*0b57cec5SDimitry Andric
355*0b57cec5SDimitry Andric m_tokens[m_next_token_index] = tmp_tok;
356*0b57cec5SDimitry Andric
357*0b57cec5SDimitry Andric start_position.Remove();
358*0b57cec5SDimitry Andric return true;
359*0b57cec5SDimitry Andric }
360*0b57cec5SDimitry Andric }
361*0b57cec5SDimitry Andric }
362*0b57cec5SDimitry Andric
363*0b57cec5SDimitry Andric switch (token.getKind()) {
364*0b57cec5SDimitry Andric case tok::kw_new:
365*0b57cec5SDimitry Andric case tok::kw_delete:
366*0b57cec5SDimitry Andric // This is 'new' or 'delete' operators.
367*0b57cec5SDimitry Andric Advance();
368*0b57cec5SDimitry Andric // Check for array new/delete.
369*0b57cec5SDimitry Andric if (HasMoreTokens() && Peek().is(tok::l_square)) {
370*0b57cec5SDimitry Andric // Consume the '[' and ']'.
371*0b57cec5SDimitry Andric if (!ConsumeBrackets(tok::l_square, tok::r_square))
372*0b57cec5SDimitry Andric return false;
373*0b57cec5SDimitry Andric }
374*0b57cec5SDimitry Andric break;
375*0b57cec5SDimitry Andric
376*0b57cec5SDimitry Andric #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
377*0b57cec5SDimitry Andric case tok::Token: \
378*0b57cec5SDimitry Andric Advance(); \
379*0b57cec5SDimitry Andric break;
380*0b57cec5SDimitry Andric #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
381*0b57cec5SDimitry Andric #include "clang/Basic/OperatorKinds.def"
382*0b57cec5SDimitry Andric #undef OVERLOADED_OPERATOR
383*0b57cec5SDimitry Andric #undef OVERLOADED_OPERATOR_MULTI
384*0b57cec5SDimitry Andric
385*0b57cec5SDimitry Andric case tok::l_paren:
386*0b57cec5SDimitry Andric // Call operator consume '(' ... ')'.
387*0b57cec5SDimitry Andric if (ConsumeBrackets(tok::l_paren, tok::r_paren))
388*0b57cec5SDimitry Andric break;
389*0b57cec5SDimitry Andric return false;
390*0b57cec5SDimitry Andric
391*0b57cec5SDimitry Andric case tok::l_square:
392*0b57cec5SDimitry Andric // This is a [] operator.
393*0b57cec5SDimitry Andric // Consume the '[' and ']'.
394*0b57cec5SDimitry Andric if (ConsumeBrackets(tok::l_square, tok::r_square))
395*0b57cec5SDimitry Andric break;
396*0b57cec5SDimitry Andric return false;
397*0b57cec5SDimitry Andric
398*0b57cec5SDimitry Andric default:
399*0b57cec5SDimitry Andric // This might be a cast operator.
400*0b57cec5SDimitry Andric if (ConsumeTypename())
401*0b57cec5SDimitry Andric break;
402*0b57cec5SDimitry Andric return false;
403*0b57cec5SDimitry Andric }
404*0b57cec5SDimitry Andric start_position.Remove();
405*0b57cec5SDimitry Andric return true;
406*0b57cec5SDimitry Andric }
407*0b57cec5SDimitry Andric
SkipTypeQualifiers()408*0b57cec5SDimitry Andric void CPlusPlusNameParser::SkipTypeQualifiers() {
409*0b57cec5SDimitry Andric while (ConsumeToken(tok::kw_const, tok::kw_volatile))
410*0b57cec5SDimitry Andric ;
411*0b57cec5SDimitry Andric }
412*0b57cec5SDimitry Andric
SkipFunctionQualifiers()413*0b57cec5SDimitry Andric void CPlusPlusNameParser::SkipFunctionQualifiers() {
414*0b57cec5SDimitry Andric while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
415*0b57cec5SDimitry Andric ;
416*0b57cec5SDimitry Andric }
417*0b57cec5SDimitry Andric
ConsumeBuiltinType()418*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeBuiltinType() {
419*0b57cec5SDimitry Andric bool result = false;
420*0b57cec5SDimitry Andric bool continue_parsing = true;
421*0b57cec5SDimitry Andric // Built-in types can be made of a few keywords like 'unsigned long long
422*0b57cec5SDimitry Andric // int'. This function consumes all built-in type keywords without checking
423*0b57cec5SDimitry Andric // if they make sense like 'unsigned char void'.
424*0b57cec5SDimitry Andric while (continue_parsing && HasMoreTokens()) {
425*0b57cec5SDimitry Andric switch (Peek().getKind()) {
426*0b57cec5SDimitry Andric case tok::kw_short:
427*0b57cec5SDimitry Andric case tok::kw_long:
428*0b57cec5SDimitry Andric case tok::kw___int64:
429*0b57cec5SDimitry Andric case tok::kw___int128:
430*0b57cec5SDimitry Andric case tok::kw_signed:
431*0b57cec5SDimitry Andric case tok::kw_unsigned:
432*0b57cec5SDimitry Andric case tok::kw_void:
433*0b57cec5SDimitry Andric case tok::kw_char:
434*0b57cec5SDimitry Andric case tok::kw_int:
435*0b57cec5SDimitry Andric case tok::kw_half:
436*0b57cec5SDimitry Andric case tok::kw_float:
437*0b57cec5SDimitry Andric case tok::kw_double:
438*0b57cec5SDimitry Andric case tok::kw___float128:
439*0b57cec5SDimitry Andric case tok::kw_wchar_t:
440*0b57cec5SDimitry Andric case tok::kw_bool:
441*0b57cec5SDimitry Andric case tok::kw_char16_t:
442*0b57cec5SDimitry Andric case tok::kw_char32_t:
443*0b57cec5SDimitry Andric result = true;
444*0b57cec5SDimitry Andric Advance();
445*0b57cec5SDimitry Andric break;
446*0b57cec5SDimitry Andric default:
447*0b57cec5SDimitry Andric continue_parsing = false;
448*0b57cec5SDimitry Andric break;
449*0b57cec5SDimitry Andric }
450*0b57cec5SDimitry Andric }
451*0b57cec5SDimitry Andric return result;
452*0b57cec5SDimitry Andric }
453*0b57cec5SDimitry Andric
SkipPtrsAndRefs()454*0b57cec5SDimitry Andric void CPlusPlusNameParser::SkipPtrsAndRefs() {
455*0b57cec5SDimitry Andric // Ignoring result.
456*0b57cec5SDimitry Andric ConsumePtrsAndRefs();
457*0b57cec5SDimitry Andric }
458*0b57cec5SDimitry Andric
ConsumePtrsAndRefs()459*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
460*0b57cec5SDimitry Andric bool found = false;
461*0b57cec5SDimitry Andric SkipTypeQualifiers();
462*0b57cec5SDimitry Andric while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
463*0b57cec5SDimitry Andric tok::kw_volatile)) {
464*0b57cec5SDimitry Andric found = true;
465*0b57cec5SDimitry Andric SkipTypeQualifiers();
466*0b57cec5SDimitry Andric }
467*0b57cec5SDimitry Andric return found;
468*0b57cec5SDimitry Andric }
469*0b57cec5SDimitry Andric
ConsumeDecltype()470*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeDecltype() {
471*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
472*0b57cec5SDimitry Andric if (!ConsumeToken(tok::kw_decltype))
473*0b57cec5SDimitry Andric return false;
474*0b57cec5SDimitry Andric
475*0b57cec5SDimitry Andric if (!ConsumeArguments())
476*0b57cec5SDimitry Andric return false;
477*0b57cec5SDimitry Andric
478*0b57cec5SDimitry Andric start_position.Remove();
479*0b57cec5SDimitry Andric return true;
480*0b57cec5SDimitry Andric }
481*0b57cec5SDimitry Andric
ConsumeTypename()482*0b57cec5SDimitry Andric bool CPlusPlusNameParser::ConsumeTypename() {
483*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
484*0b57cec5SDimitry Andric SkipTypeQualifiers();
485*0b57cec5SDimitry Andric if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
486*0b57cec5SDimitry Andric if (!ParseFullNameImpl())
487*0b57cec5SDimitry Andric return false;
488*0b57cec5SDimitry Andric }
489*0b57cec5SDimitry Andric SkipPtrsAndRefs();
490*0b57cec5SDimitry Andric start_position.Remove();
491*0b57cec5SDimitry Andric return true;
492*0b57cec5SDimitry Andric }
493*0b57cec5SDimitry Andric
494*0b57cec5SDimitry Andric Optional<CPlusPlusNameParser::ParsedNameRanges>
ParseFullNameImpl()495*0b57cec5SDimitry Andric CPlusPlusNameParser::ParseFullNameImpl() {
496*0b57cec5SDimitry Andric // Name parsing state machine.
497*0b57cec5SDimitry Andric enum class State {
498*0b57cec5SDimitry Andric Beginning, // start of the name
499*0b57cec5SDimitry Andric AfterTwoColons, // right after ::
500*0b57cec5SDimitry Andric AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
501*0b57cec5SDimitry Andric AfterTemplate, // right after template brackets (<something>)
502*0b57cec5SDimitry Andric AfterOperator, // right after name of C++ operator
503*0b57cec5SDimitry Andric };
504*0b57cec5SDimitry Andric
505*0b57cec5SDimitry Andric Bookmark start_position = SetBookmark();
506*0b57cec5SDimitry Andric State state = State::Beginning;
507*0b57cec5SDimitry Andric bool continue_parsing = true;
508*0b57cec5SDimitry Andric Optional<size_t> last_coloncolon_position = None;
509*0b57cec5SDimitry Andric
510*0b57cec5SDimitry Andric while (continue_parsing && HasMoreTokens()) {
511*0b57cec5SDimitry Andric const auto &token = Peek();
512*0b57cec5SDimitry Andric switch (token.getKind()) {
513*0b57cec5SDimitry Andric case tok::raw_identifier: // Just a name.
514*0b57cec5SDimitry Andric if (state != State::Beginning && state != State::AfterTwoColons) {
515*0b57cec5SDimitry Andric continue_parsing = false;
516*0b57cec5SDimitry Andric break;
517*0b57cec5SDimitry Andric }
518*0b57cec5SDimitry Andric Advance();
519*0b57cec5SDimitry Andric state = State::AfterIdentifier;
520*0b57cec5SDimitry Andric break;
521*0b57cec5SDimitry Andric case tok::l_paren: {
522*0b57cec5SDimitry Andric if (state == State::Beginning || state == State::AfterTwoColons) {
523*0b57cec5SDimitry Andric // (anonymous namespace)
524*0b57cec5SDimitry Andric if (ConsumeAnonymousNamespace()) {
525*0b57cec5SDimitry Andric state = State::AfterIdentifier;
526*0b57cec5SDimitry Andric break;
527*0b57cec5SDimitry Andric }
528*0b57cec5SDimitry Andric }
529*0b57cec5SDimitry Andric
530*0b57cec5SDimitry Andric // Type declared inside a function 'func()::Type'
531*0b57cec5SDimitry Andric if (state != State::AfterIdentifier && state != State::AfterTemplate &&
532*0b57cec5SDimitry Andric state != State::AfterOperator) {
533*0b57cec5SDimitry Andric continue_parsing = false;
534*0b57cec5SDimitry Andric break;
535*0b57cec5SDimitry Andric }
536*0b57cec5SDimitry Andric Bookmark l_paren_position = SetBookmark();
537*0b57cec5SDimitry Andric // Consume the '(' ... ') [const]'.
538*0b57cec5SDimitry Andric if (!ConsumeArguments()) {
539*0b57cec5SDimitry Andric continue_parsing = false;
540*0b57cec5SDimitry Andric break;
541*0b57cec5SDimitry Andric }
542*0b57cec5SDimitry Andric SkipFunctionQualifiers();
543*0b57cec5SDimitry Andric
544*0b57cec5SDimitry Andric // Consume '::'
545*0b57cec5SDimitry Andric size_t coloncolon_position = GetCurrentPosition();
546*0b57cec5SDimitry Andric if (!ConsumeToken(tok::coloncolon)) {
547*0b57cec5SDimitry Andric continue_parsing = false;
548*0b57cec5SDimitry Andric break;
549*0b57cec5SDimitry Andric }
550*0b57cec5SDimitry Andric l_paren_position.Remove();
551*0b57cec5SDimitry Andric last_coloncolon_position = coloncolon_position;
552*0b57cec5SDimitry Andric state = State::AfterTwoColons;
553*0b57cec5SDimitry Andric break;
554*0b57cec5SDimitry Andric }
555*0b57cec5SDimitry Andric case tok::l_brace:
556*0b57cec5SDimitry Andric if (state == State::Beginning || state == State::AfterTwoColons) {
557*0b57cec5SDimitry Andric if (ConsumeLambda()) {
558*0b57cec5SDimitry Andric state = State::AfterIdentifier;
559*0b57cec5SDimitry Andric break;
560*0b57cec5SDimitry Andric }
561*0b57cec5SDimitry Andric }
562*0b57cec5SDimitry Andric continue_parsing = false;
563*0b57cec5SDimitry Andric break;
564*0b57cec5SDimitry Andric case tok::coloncolon: // Type nesting delimiter.
565*0b57cec5SDimitry Andric if (state != State::Beginning && state != State::AfterIdentifier &&
566*0b57cec5SDimitry Andric state != State::AfterTemplate) {
567*0b57cec5SDimitry Andric continue_parsing = false;
568*0b57cec5SDimitry Andric break;
569*0b57cec5SDimitry Andric }
570*0b57cec5SDimitry Andric last_coloncolon_position = GetCurrentPosition();
571*0b57cec5SDimitry Andric Advance();
572*0b57cec5SDimitry Andric state = State::AfterTwoColons;
573*0b57cec5SDimitry Andric break;
574*0b57cec5SDimitry Andric case tok::less: // Template brackets.
575*0b57cec5SDimitry Andric if (state != State::AfterIdentifier && state != State::AfterOperator) {
576*0b57cec5SDimitry Andric continue_parsing = false;
577*0b57cec5SDimitry Andric break;
578*0b57cec5SDimitry Andric }
579*0b57cec5SDimitry Andric if (!ConsumeTemplateArgs()) {
580*0b57cec5SDimitry Andric continue_parsing = false;
581*0b57cec5SDimitry Andric break;
582*0b57cec5SDimitry Andric }
583*0b57cec5SDimitry Andric state = State::AfterTemplate;
584*0b57cec5SDimitry Andric break;
585*0b57cec5SDimitry Andric case tok::kw_operator: // C++ operator overloading.
586*0b57cec5SDimitry Andric if (state != State::Beginning && state != State::AfterTwoColons) {
587*0b57cec5SDimitry Andric continue_parsing = false;
588*0b57cec5SDimitry Andric break;
589*0b57cec5SDimitry Andric }
590*0b57cec5SDimitry Andric if (!ConsumeOperator()) {
591*0b57cec5SDimitry Andric continue_parsing = false;
592*0b57cec5SDimitry Andric break;
593*0b57cec5SDimitry Andric }
594*0b57cec5SDimitry Andric state = State::AfterOperator;
595*0b57cec5SDimitry Andric break;
596*0b57cec5SDimitry Andric case tok::tilde: // Destructor.
597*0b57cec5SDimitry Andric if (state != State::Beginning && state != State::AfterTwoColons) {
598*0b57cec5SDimitry Andric continue_parsing = false;
599*0b57cec5SDimitry Andric break;
600*0b57cec5SDimitry Andric }
601*0b57cec5SDimitry Andric Advance();
602*0b57cec5SDimitry Andric if (ConsumeToken(tok::raw_identifier)) {
603*0b57cec5SDimitry Andric state = State::AfterIdentifier;
604*0b57cec5SDimitry Andric } else {
605*0b57cec5SDimitry Andric TakeBack();
606*0b57cec5SDimitry Andric continue_parsing = false;
607*0b57cec5SDimitry Andric }
608*0b57cec5SDimitry Andric break;
609*0b57cec5SDimitry Andric default:
610*0b57cec5SDimitry Andric continue_parsing = false;
611*0b57cec5SDimitry Andric break;
612*0b57cec5SDimitry Andric }
613*0b57cec5SDimitry Andric }
614*0b57cec5SDimitry Andric
615*0b57cec5SDimitry Andric if (state == State::AfterIdentifier || state == State::AfterOperator ||
616*0b57cec5SDimitry Andric state == State::AfterTemplate) {
617*0b57cec5SDimitry Andric ParsedNameRanges result;
618*0b57cec5SDimitry Andric if (last_coloncolon_position) {
619*0b57cec5SDimitry Andric result.context_range = Range(start_position.GetSavedPosition(),
620*0b57cec5SDimitry Andric last_coloncolon_position.getValue());
621*0b57cec5SDimitry Andric result.basename_range =
622*0b57cec5SDimitry Andric Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
623*0b57cec5SDimitry Andric } else {
624*0b57cec5SDimitry Andric result.basename_range =
625*0b57cec5SDimitry Andric Range(start_position.GetSavedPosition(), GetCurrentPosition());
626*0b57cec5SDimitry Andric }
627*0b57cec5SDimitry Andric start_position.Remove();
628*0b57cec5SDimitry Andric return result;
629*0b57cec5SDimitry Andric } else {
630*0b57cec5SDimitry Andric return None;
631*0b57cec5SDimitry Andric }
632*0b57cec5SDimitry Andric }
633*0b57cec5SDimitry Andric
GetTextForRange(const Range & range)634*0b57cec5SDimitry Andric llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
635*0b57cec5SDimitry Andric if (range.empty())
636*0b57cec5SDimitry Andric return llvm::StringRef();
637*0b57cec5SDimitry Andric assert(range.begin_index < range.end_index);
638*0b57cec5SDimitry Andric assert(range.begin_index < m_tokens.size());
639*0b57cec5SDimitry Andric assert(range.end_index <= m_tokens.size());
640*0b57cec5SDimitry Andric clang::Token &first_token = m_tokens[range.begin_index];
641*0b57cec5SDimitry Andric clang::Token &last_token = m_tokens[range.end_index - 1];
642*0b57cec5SDimitry Andric clang::SourceLocation start_loc = first_token.getLocation();
643*0b57cec5SDimitry Andric clang::SourceLocation end_loc = last_token.getLocation();
644*0b57cec5SDimitry Andric unsigned start_pos = start_loc.getRawEncoding();
645*0b57cec5SDimitry Andric unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
646*0b57cec5SDimitry Andric return m_text.take_front(end_pos).drop_front(start_pos);
647*0b57cec5SDimitry Andric }
648*0b57cec5SDimitry Andric
GetLangOptions()649*0b57cec5SDimitry Andric static const clang::LangOptions &GetLangOptions() {
650*0b57cec5SDimitry Andric static clang::LangOptions g_options;
651*0b57cec5SDimitry Andric static llvm::once_flag g_once_flag;
652*0b57cec5SDimitry Andric llvm::call_once(g_once_flag, []() {
653*0b57cec5SDimitry Andric g_options.LineComment = true;
654*0b57cec5SDimitry Andric g_options.C99 = true;
655*0b57cec5SDimitry Andric g_options.C11 = true;
656*0b57cec5SDimitry Andric g_options.CPlusPlus = true;
657*0b57cec5SDimitry Andric g_options.CPlusPlus11 = true;
658*0b57cec5SDimitry Andric g_options.CPlusPlus14 = true;
659*0b57cec5SDimitry Andric g_options.CPlusPlus17 = true;
660*0b57cec5SDimitry Andric });
661 return g_options;
662 }
663
GetKeywordsMap()664 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
665 static llvm::StringMap<tok::TokenKind> g_map{
666 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
667 #include "clang/Basic/TokenKinds.def"
668 #undef KEYWORD
669 };
670 return g_map;
671 }
672
ExtractTokens()673 void CPlusPlusNameParser::ExtractTokens() {
674 if (m_text.empty())
675 return;
676 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
677 m_text.data(), m_text.data() + m_text.size());
678 const auto &kw_map = GetKeywordsMap();
679 clang::Token token;
680 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
681 lexer.LexFromRawLexer(token)) {
682 if (token.is(clang::tok::raw_identifier)) {
683 auto it = kw_map.find(token.getRawIdentifier());
684 if (it != kw_map.end()) {
685 token.setKind(it->getValue());
686 }
687 }
688
689 m_tokens.push_back(token);
690 }
691 }
692