1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "format-token-annotator"
22 
23 namespace clang {
24 namespace format {
25 
26 namespace {
27 
28 /// \brief A parser that gathers additional information about tokens.
29 ///
30 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
31 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
32 /// into template parameter lists.
33 class AnnotatingParser {
34 public:
35   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
36                    const AdditionalKeywords &Keywords)
37       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
38         Keywords(Keywords) {
39     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
40     resetTokenMetadata(CurrentToken);
41   }
42 
43 private:
44   bool parseAngle() {
45     if (!CurrentToken || !CurrentToken->Previous)
46       return false;
47     if (NonTemplateLess.count(CurrentToken->Previous))
48       return false;
49 
50     const FormatToken& Previous = *CurrentToken->Previous;
51     if (Previous.Previous) {
52       if (Previous.Previous->Tok.isLiteral())
53         return false;
54       if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
55           (!Previous.Previous->MatchingParen ||
56            !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
57         return false;
58     }
59 
60     FormatToken *Left = CurrentToken->Previous;
61     Left->ParentBracket = Contexts.back().ContextKind;
62     ScopedContextCreator ContextCreator(*this, tok::less, 12);
63 
64     // If this angle is in the context of an expression, we need to be more
65     // hesitant to detect it as opening template parameters.
66     bool InExprContext = Contexts.back().IsExpression;
67 
68     Contexts.back().IsExpression = false;
69     // If there's a template keyword before the opening angle bracket, this is a
70     // template parameter, not an argument.
71     Contexts.back().InTemplateArgument =
72         Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
73 
74     if (Style.Language == FormatStyle::LK_Java &&
75         CurrentToken->is(tok::question))
76       next();
77 
78     while (CurrentToken) {
79       if (CurrentToken->is(tok::greater)) {
80         Left->MatchingParen = CurrentToken;
81         CurrentToken->MatchingParen = Left;
82         CurrentToken->Type = TT_TemplateCloser;
83         next();
84         return true;
85       }
86       if (CurrentToken->is(tok::question) &&
87           Style.Language == FormatStyle::LK_Java) {
88         next();
89         continue;
90       }
91       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
92           (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
93         return false;
94       // If a && or || is found and interpreted as a binary operator, this set
95       // of angles is likely part of something like "a < b && c > d". If the
96       // angles are inside an expression, the ||/&& might also be a binary
97       // operator that was misinterpreted because we are parsing template
98       // parameters.
99       // FIXME: This is getting out of hand, write a decent parser.
100       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
101           CurrentToken->Previous->is(TT_BinaryOperator) &&
102           Contexts[Contexts.size() - 2].IsExpression &&
103           !Line.startsWith(tok::kw_template))
104         return false;
105       updateParameterCount(Left, CurrentToken);
106       if (!consumeToken())
107         return false;
108     }
109     return false;
110   }
111 
112   bool parseParens(bool LookForDecls = false) {
113     if (!CurrentToken)
114       return false;
115     FormatToken *Left = CurrentToken->Previous;
116     Left->ParentBracket = Contexts.back().ContextKind;
117     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
118 
119     // FIXME: This is a bit of a hack. Do better.
120     Contexts.back().ColonIsForRangeExpr =
121         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
122 
123     bool StartsObjCMethodExpr = false;
124     if (CurrentToken->is(tok::caret)) {
125       // (^ can start a block type.
126       Left->Type = TT_ObjCBlockLParen;
127     } else if (FormatToken *MaybeSel = Left->Previous) {
128       // @selector( starts a selector.
129       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
130           MaybeSel->Previous->is(tok::at)) {
131         StartsObjCMethodExpr = true;
132       }
133     }
134 
135     if (Left->is(TT_OverloadedOperatorLParen)) {
136       Contexts.back().IsExpression = false;
137     } else if (Style.Language == FormatStyle::LK_JavaScript &&
138                Line.startsWith(Keywords.kw_type, tok::identifier)) {
139       // type X = (...);
140       Contexts.back().IsExpression = false;
141     } else if (Left->Previous &&
142         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
143                                  tok::kw_if, tok::kw_while, tok::l_paren,
144                                  tok::comma) ||
145          Left->Previous->is(TT_BinaryOperator))) {
146       // static_assert, if and while usually contain expressions.
147       Contexts.back().IsExpression = true;
148     } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
149                (Left->Previous->is(Keywords.kw_function) ||
150                 (Left->Previous->endsSequence(tok::identifier,
151                                               Keywords.kw_function)))) {
152       // function(...) or function f(...)
153       Contexts.back().IsExpression = false;
154     } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
155                Left->Previous->is(TT_JsTypeColon)) {
156       // let x: (SomeType);
157       Contexts.back().IsExpression = false;
158     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
159                Left->Previous->MatchingParen &&
160                Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
161       // This is a parameter list of a lambda expression.
162       Contexts.back().IsExpression = false;
163     } else if (Line.InPPDirective &&
164                (!Left->Previous || !Left->Previous->is(tok::identifier))) {
165       Contexts.back().IsExpression = true;
166     } else if (Contexts[Contexts.size() - 2].CaretFound) {
167       // This is the parameter list of an ObjC block.
168       Contexts.back().IsExpression = false;
169     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
170       Left->Type = TT_AttributeParen;
171     } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
172       // The first argument to a foreach macro is a declaration.
173       Contexts.back().IsForEachMacro = true;
174       Contexts.back().IsExpression = false;
175     } else if (Left->Previous && Left->Previous->MatchingParen &&
176                Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
177       Contexts.back().IsExpression = false;
178     } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
179       bool IsForOrCatch =
180           Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
181       Contexts.back().IsExpression = !IsForOrCatch;
182     }
183 
184     if (StartsObjCMethodExpr) {
185       Contexts.back().ColonIsObjCMethodExpr = true;
186       Left->Type = TT_ObjCMethodExpr;
187     }
188 
189     bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
190     bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp);
191     bool HasMultipleLines = false;
192     bool HasMultipleParametersOnALine = false;
193     bool MightBeObjCForRangeLoop =
194         Left->Previous && Left->Previous->is(tok::kw_for);
195     while (CurrentToken) {
196       // LookForDecls is set when "if (" has been seen. Check for
197       // 'identifier' '*' 'identifier' followed by not '=' -- this
198       // '*' has to be a binary operator but determineStarAmpUsage() will
199       // categorize it as an unary operator, so set the right type here.
200       if (LookForDecls && CurrentToken->Next) {
201         FormatToken *Prev = CurrentToken->getPreviousNonComment();
202         if (Prev) {
203           FormatToken *PrevPrev = Prev->getPreviousNonComment();
204           FormatToken *Next = CurrentToken->Next;
205           if (PrevPrev && PrevPrev->is(tok::identifier) &&
206               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
207               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
208             Prev->Type = TT_BinaryOperator;
209             LookForDecls = false;
210           }
211         }
212       }
213 
214       if (CurrentToken->Previous->is(TT_PointerOrReference) &&
215           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
216                                                     tok::coloncolon))
217         ProbablyFunctionType = true;
218       if (CurrentToken->is(tok::comma))
219         MightBeFunctionType = false;
220       if (CurrentToken->Previous->is(TT_BinaryOperator))
221         Contexts.back().IsExpression = true;
222       if (CurrentToken->is(tok::r_paren)) {
223         if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
224             (CurrentToken->Next->is(tok::l_paren) ||
225              (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
226           Left->Type = TT_FunctionTypeLParen;
227         Left->MatchingParen = CurrentToken;
228         CurrentToken->MatchingParen = Left;
229 
230         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
231             Left->Previous && Left->Previous->is(tok::l_paren)) {
232           // Detect the case where macros are used to generate lambdas or
233           // function bodies, e.g.:
234           //   auto my_lambda = MARCO((Type *type, int i) { .. body .. });
235           for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
236             if (Tok->is(TT_BinaryOperator) &&
237                 Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
238               Tok->Type = TT_PointerOrReference;
239           }
240         }
241 
242         if (StartsObjCMethodExpr) {
243           CurrentToken->Type = TT_ObjCMethodExpr;
244           if (Contexts.back().FirstObjCSelectorName) {
245             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
246                 Contexts.back().LongestObjCSelectorName;
247           }
248         }
249 
250         if (Left->is(TT_AttributeParen))
251           CurrentToken->Type = TT_AttributeParen;
252         if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
253           CurrentToken->Type = TT_JavaAnnotation;
254         if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
255           CurrentToken->Type = TT_LeadingJavaAnnotation;
256 
257         if (!HasMultipleLines)
258           Left->PackingKind = PPK_Inconclusive;
259         else if (HasMultipleParametersOnALine)
260           Left->PackingKind = PPK_BinPacked;
261         else
262           Left->PackingKind = PPK_OnePerLine;
263 
264         next();
265         return true;
266       }
267       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
268         return false;
269 
270       if (CurrentToken->is(tok::l_brace))
271         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
272       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
273           !CurrentToken->Next->HasUnescapedNewline &&
274           !CurrentToken->Next->isTrailingComment())
275         HasMultipleParametersOnALine = true;
276       if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
277            CurrentToken->Previous->isSimpleTypeSpecifier()) &&
278           !CurrentToken->is(tok::l_brace))
279         Contexts.back().IsExpression = false;
280       if (CurrentToken->isOneOf(tok::semi, tok::colon))
281         MightBeObjCForRangeLoop = false;
282       if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
283         CurrentToken->Type = TT_ObjCForIn;
284       // When we discover a 'new', we set CanBeExpression to 'false' in order to
285       // parse the type correctly. Reset that after a comma.
286       if (CurrentToken->is(tok::comma))
287         Contexts.back().CanBeExpression = true;
288 
289       FormatToken *Tok = CurrentToken;
290       if (!consumeToken())
291         return false;
292       updateParameterCount(Left, Tok);
293       if (CurrentToken && CurrentToken->HasUnescapedNewline)
294         HasMultipleLines = true;
295     }
296     return false;
297   }
298 
299   bool parseSquare() {
300     if (!CurrentToken)
301       return false;
302 
303     // A '[' could be an index subscript (after an identifier or after
304     // ')' or ']'), it could be the start of an Objective-C method
305     // expression, or it could the start of an Objective-C array literal.
306     FormatToken *Left = CurrentToken->Previous;
307     Left->ParentBracket = Contexts.back().ContextKind;
308     FormatToken *Parent = Left->getPreviousNonComment();
309 
310     // Cases where '>' is followed by '['.
311     // In C++, this can happen either in array of templates (foo<int>[10])
312     // or when array is a nested template type (unique_ptr<type1<type2>[]>).
313     bool CppArrayTemplates =
314         Style.Language == FormatStyle::LK_Cpp && Parent &&
315         Parent->is(TT_TemplateCloser) &&
316         (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
317          Contexts.back().InTemplateArgument);
318 
319     bool StartsObjCMethodExpr =
320         !CppArrayTemplates && Style.Language == FormatStyle::LK_Cpp &&
321         Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
322         CurrentToken->isNot(tok::l_brace) &&
323         (!Parent ||
324          Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
325                          tok::kw_return, tok::kw_throw) ||
326          Parent->isUnaryOperator() ||
327          Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
328          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
329     bool ColonFound = false;
330 
331     unsigned BindingIncrease = 1;
332     if (Left->is(TT_Unknown)) {
333       if (StartsObjCMethodExpr) {
334         Left->Type = TT_ObjCMethodExpr;
335       } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
336                  Contexts.back().ContextKind == tok::l_brace &&
337                  Parent->isOneOf(tok::l_brace, tok::comma)) {
338         Left->Type = TT_JsComputedPropertyName;
339       } else if (Style.Language == FormatStyle::LK_Proto ||
340                  (!CppArrayTemplates && Parent &&
341                   Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
342                                   tok::comma, tok::l_paren, tok::l_square,
343                                   tok::question, tok::colon, tok::kw_return,
344                                   // Should only be relevant to JavaScript:
345                                   tok::kw_default))) {
346         Left->Type = TT_ArrayInitializerLSquare;
347       } else {
348         BindingIncrease = 10;
349         Left->Type = TT_ArraySubscriptLSquare;
350       }
351     }
352 
353     ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
354     Contexts.back().IsExpression = true;
355     Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
356 
357     while (CurrentToken) {
358       if (CurrentToken->is(tok::r_square)) {
359         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
360             Left->is(TT_ObjCMethodExpr)) {
361           // An ObjC method call is rarely followed by an open parenthesis.
362           // FIXME: Do we incorrectly label ":" with this?
363           StartsObjCMethodExpr = false;
364           Left->Type = TT_Unknown;
365         }
366         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
367           CurrentToken->Type = TT_ObjCMethodExpr;
368           // determineStarAmpUsage() thinks that '*' '[' is allocating an
369           // array of pointers, but if '[' starts a selector then '*' is a
370           // binary operator.
371           if (Parent && Parent->is(TT_PointerOrReference))
372             Parent->Type = TT_BinaryOperator;
373         }
374         Left->MatchingParen = CurrentToken;
375         CurrentToken->MatchingParen = Left;
376         if (Contexts.back().FirstObjCSelectorName) {
377           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
378               Contexts.back().LongestObjCSelectorName;
379           if (Left->BlockParameterCount > 1)
380             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
381         }
382         next();
383         return true;
384       }
385       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
386         return false;
387       if (CurrentToken->is(tok::colon)) {
388         if (Left->is(TT_ArraySubscriptLSquare)) {
389           Left->Type = TT_ObjCMethodExpr;
390           StartsObjCMethodExpr = true;
391           Contexts.back().ColonIsObjCMethodExpr = true;
392           if (Parent && Parent->is(tok::r_paren))
393             Parent->Type = TT_CastRParen;
394         }
395         ColonFound = true;
396       }
397       if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
398           !ColonFound)
399         Left->Type = TT_ArrayInitializerLSquare;
400       FormatToken *Tok = CurrentToken;
401       if (!consumeToken())
402         return false;
403       updateParameterCount(Left, Tok);
404     }
405     return false;
406   }
407 
408   bool parseBrace() {
409     if (CurrentToken) {
410       FormatToken *Left = CurrentToken->Previous;
411       Left->ParentBracket = Contexts.back().ContextKind;
412 
413       if (Contexts.back().CaretFound)
414         Left->Type = TT_ObjCBlockLBrace;
415       Contexts.back().CaretFound = false;
416 
417       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
418       Contexts.back().ColonIsDictLiteral = true;
419       if (Left->BlockKind == BK_BracedInit)
420         Contexts.back().IsExpression = true;
421 
422       while (CurrentToken) {
423         if (CurrentToken->is(tok::r_brace)) {
424           Left->MatchingParen = CurrentToken;
425           CurrentToken->MatchingParen = Left;
426           next();
427           return true;
428         }
429         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
430           return false;
431         updateParameterCount(Left, CurrentToken);
432         if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
433           FormatToken *Previous = CurrentToken->getPreviousNonComment();
434           if (((CurrentToken->is(tok::colon) &&
435                 (!Contexts.back().ColonIsDictLiteral ||
436                  Style.Language != FormatStyle::LK_Cpp)) ||
437                Style.Language == FormatStyle::LK_Proto) &&
438               (Previous->Tok.getIdentifierInfo() ||
439                Previous->is(tok::string_literal)))
440             Previous->Type = TT_SelectorName;
441           if (CurrentToken->is(tok::colon) ||
442               Style.Language == FormatStyle::LK_JavaScript)
443             Left->Type = TT_DictLiteral;
444         }
445         if (CurrentToken->is(tok::comma) &&
446             Style.Language == FormatStyle::LK_JavaScript)
447           Left->Type = TT_DictLiteral;
448         if (!consumeToken())
449           return false;
450       }
451     }
452     return true;
453   }
454 
455   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
456     if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
457       ++Left->BlockParameterCount;
458     if (Current->is(tok::comma)) {
459       ++Left->ParameterCount;
460       if (!Left->Role)
461         Left->Role.reset(new CommaSeparatedList(Style));
462       Left->Role->CommaFound(Current);
463     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
464       Left->ParameterCount = 1;
465     }
466   }
467 
468   bool parseConditional() {
469     while (CurrentToken) {
470       if (CurrentToken->is(tok::colon)) {
471         CurrentToken->Type = TT_ConditionalExpr;
472         next();
473         return true;
474       }
475       if (!consumeToken())
476         return false;
477     }
478     return false;
479   }
480 
481   bool parseTemplateDeclaration() {
482     if (CurrentToken && CurrentToken->is(tok::less)) {
483       CurrentToken->Type = TT_TemplateOpener;
484       next();
485       if (!parseAngle())
486         return false;
487       if (CurrentToken)
488         CurrentToken->Previous->ClosesTemplateDeclaration = true;
489       return true;
490     }
491     return false;
492   }
493 
494   bool consumeToken() {
495     FormatToken *Tok = CurrentToken;
496     next();
497     switch (Tok->Tok.getKind()) {
498     case tok::plus:
499     case tok::minus:
500       if (!Tok->Previous && Line.MustBeDeclaration)
501         Tok->Type = TT_ObjCMethodSpecifier;
502       break;
503     case tok::colon:
504       if (!Tok->Previous)
505         return false;
506       // Colons from ?: are handled in parseConditional().
507       if (Style.Language == FormatStyle::LK_JavaScript) {
508         if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
509             (Contexts.size() == 1 &&               // switch/case labels
510              !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
511             Contexts.back().ContextKind == tok::l_paren ||  // function params
512             Contexts.back().ContextKind == tok::l_square || // array type
513             (Contexts.size() == 1 &&
514              Line.MustBeDeclaration)) { // method/property declaration
515           Tok->Type = TT_JsTypeColon;
516           break;
517         }
518       }
519       if (Contexts.back().ColonIsDictLiteral ||
520           Style.Language == FormatStyle::LK_Proto) {
521         Tok->Type = TT_DictLiteral;
522       } else if (Contexts.back().ColonIsObjCMethodExpr ||
523                  Line.startsWith(TT_ObjCMethodSpecifier)) {
524         Tok->Type = TT_ObjCMethodExpr;
525         const FormatToken *BeforePrevious = Tok->Previous->Previous;
526         if (!BeforePrevious ||
527             !(BeforePrevious->is(TT_CastRParen) ||
528               (BeforePrevious->is(TT_ObjCMethodExpr) &&
529                BeforePrevious->is(tok::colon))) ||
530             BeforePrevious->is(tok::r_square) ||
531             Contexts.back().LongestObjCSelectorName == 0) {
532           Tok->Previous->Type = TT_SelectorName;
533           if (Tok->Previous->ColumnWidth >
534               Contexts.back().LongestObjCSelectorName)
535             Contexts.back().LongestObjCSelectorName =
536                 Tok->Previous->ColumnWidth;
537           if (!Contexts.back().FirstObjCSelectorName)
538             Contexts.back().FirstObjCSelectorName = Tok->Previous;
539         }
540       } else if (Contexts.back().ColonIsForRangeExpr) {
541         Tok->Type = TT_RangeBasedForLoopColon;
542       } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
543         Tok->Type = TT_BitFieldColon;
544       } else if (Contexts.size() == 1 &&
545                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
546         if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
547                                                   tok::kw_noexcept))
548           Tok->Type = TT_CtorInitializerColon;
549         else
550           Tok->Type = TT_InheritanceColon;
551       } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
552                  Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
553         // This handles a special macro in ObjC code where selectors including
554         // the colon are passed as macro arguments.
555         Tok->Type = TT_ObjCMethodExpr;
556       } else if (Contexts.back().ContextKind == tok::l_paren) {
557         Tok->Type = TT_InlineASMColon;
558       }
559       break;
560     case tok::pipe:
561     case tok::amp:
562       // | and & in declarations/type expressions represent union and
563       // intersection types, respectively.
564       if (Style.Language == FormatStyle::LK_JavaScript &&
565           !Contexts.back().IsExpression)
566         Tok->Type = TT_JsTypeOperator;
567       break;
568     case tok::kw_if:
569     case tok::kw_while:
570       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
571         next();
572         if (!parseParens(/*LookForDecls=*/true))
573           return false;
574       }
575       break;
576     case tok::kw_for:
577       if (Style.Language == FormatStyle::LK_JavaScript && Tok->Previous &&
578           Tok->Previous->is(tok::period))
579         break;
580       Contexts.back().ColonIsForRangeExpr = true;
581       next();
582       if (!parseParens())
583         return false;
584       break;
585     case tok::l_paren:
586       // When faced with 'operator()()', the kw_operator handler incorrectly
587       // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
588       // the first two parens OverloadedOperators and the second l_paren an
589       // OverloadedOperatorLParen.
590       if (Tok->Previous &&
591           Tok->Previous->is(tok::r_paren) &&
592           Tok->Previous->MatchingParen &&
593           Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
594         Tok->Previous->Type = TT_OverloadedOperator;
595         Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
596         Tok->Type = TT_OverloadedOperatorLParen;
597       }
598 
599       if (!parseParens())
600         return false;
601       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
602           !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
603           (!Tok->Previous ||
604            !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
605                                    TT_LeadingJavaAnnotation)))
606         Line.MightBeFunctionDecl = true;
607       break;
608     case tok::l_square:
609       if (!parseSquare())
610         return false;
611       break;
612     case tok::l_brace:
613       if (!parseBrace())
614         return false;
615       break;
616     case tok::less:
617       if (parseAngle()) {
618         Tok->Type = TT_TemplateOpener;
619       } else {
620         Tok->Type = TT_BinaryOperator;
621         NonTemplateLess.insert(Tok);
622         CurrentToken = Tok;
623         next();
624       }
625       break;
626     case tok::r_paren:
627     case tok::r_square:
628       return false;
629     case tok::r_brace:
630       // Lines can start with '}'.
631       if (Tok->Previous)
632         return false;
633       break;
634     case tok::greater:
635       Tok->Type = TT_BinaryOperator;
636       break;
637     case tok::kw_operator:
638       while (CurrentToken &&
639              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
640         if (CurrentToken->isOneOf(tok::star, tok::amp))
641           CurrentToken->Type = TT_PointerOrReference;
642         consumeToken();
643         if (CurrentToken &&
644             CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
645           CurrentToken->Previous->Type = TT_OverloadedOperator;
646       }
647       if (CurrentToken) {
648         CurrentToken->Type = TT_OverloadedOperatorLParen;
649         if (CurrentToken->Previous->is(TT_BinaryOperator))
650           CurrentToken->Previous->Type = TT_OverloadedOperator;
651       }
652       break;
653     case tok::question:
654       if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
655           Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
656                              tok::r_brace)) {
657         // Question marks before semicolons, colons, etc. indicate optional
658         // types (fields, parameters), e.g.
659         //   function(x?: string, y?) {...}
660         //   class X { y?; }
661         Tok->Type = TT_JsTypeOptionalQuestion;
662         break;
663       }
664       // Declarations cannot be conditional expressions, this can only be part
665       // of a type declaration.
666       if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
667           Style.Language == FormatStyle::LK_JavaScript)
668         break;
669       parseConditional();
670       break;
671     case tok::kw_template:
672       parseTemplateDeclaration();
673       break;
674     case tok::comma:
675       if (Contexts.back().InCtorInitializer)
676         Tok->Type = TT_CtorInitializerComma;
677       else if (Contexts.back().FirstStartOfName &&
678                (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
679         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
680         Line.IsMultiVariableDeclStmt = true;
681       }
682       if (Contexts.back().IsForEachMacro)
683         Contexts.back().IsExpression = true;
684       break;
685     default:
686       break;
687     }
688     return true;
689   }
690 
691   void parseIncludeDirective() {
692     if (CurrentToken && CurrentToken->is(tok::less)) {
693       next();
694       while (CurrentToken) {
695         if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
696           CurrentToken->Type = TT_ImplicitStringLiteral;
697         next();
698       }
699     }
700   }
701 
702   void parseWarningOrError() {
703     next();
704     // We still want to format the whitespace left of the first token of the
705     // warning or error.
706     next();
707     while (CurrentToken) {
708       CurrentToken->Type = TT_ImplicitStringLiteral;
709       next();
710     }
711   }
712 
713   void parsePragma() {
714     next(); // Consume "pragma".
715     if (CurrentToken &&
716         CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
717       bool IsMark = CurrentToken->is(Keywords.kw_mark);
718       next(); // Consume "mark".
719       next(); // Consume first token (so we fix leading whitespace).
720       while (CurrentToken) {
721         if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
722           CurrentToken->Type = TT_ImplicitStringLiteral;
723         next();
724       }
725     }
726   }
727 
728   LineType parsePreprocessorDirective() {
729     bool IsFirstToken = CurrentToken->IsFirst;
730     LineType Type = LT_PreprocessorDirective;
731     next();
732     if (!CurrentToken)
733       return Type;
734 
735     if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
736       // JavaScript files can contain shebang lines of the form:
737       // #!/usr/bin/env node
738       // Treat these like C++ #include directives.
739       while (CurrentToken) {
740         // Tokens cannot be comments here.
741         CurrentToken->Type = TT_ImplicitStringLiteral;
742         next();
743       }
744       return LT_ImportStatement;
745     }
746 
747     if (CurrentToken->Tok.is(tok::numeric_constant)) {
748       CurrentToken->SpacesRequiredBefore = 1;
749       return Type;
750     }
751     // Hashes in the middle of a line can lead to any strange token
752     // sequence.
753     if (!CurrentToken->Tok.getIdentifierInfo())
754       return Type;
755     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
756     case tok::pp_include:
757     case tok::pp_include_next:
758     case tok::pp_import:
759       next();
760       parseIncludeDirective();
761       Type = LT_ImportStatement;
762       break;
763     case tok::pp_error:
764     case tok::pp_warning:
765       parseWarningOrError();
766       break;
767     case tok::pp_pragma:
768       parsePragma();
769       break;
770     case tok::pp_if:
771     case tok::pp_elif:
772       Contexts.back().IsExpression = true;
773       parseLine();
774       break;
775     default:
776       break;
777     }
778     while (CurrentToken)
779       next();
780     return Type;
781   }
782 
783 public:
784   LineType parseLine() {
785     NonTemplateLess.clear();
786     if (CurrentToken->is(tok::hash))
787       return parsePreprocessorDirective();
788 
789     // Directly allow to 'import <string-literal>' to support protocol buffer
790     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
791     // should not break the line).
792     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
793     if ((Style.Language == FormatStyle::LK_Java &&
794          CurrentToken->is(Keywords.kw_package)) ||
795         (Info && Info->getPPKeywordID() == tok::pp_import &&
796          CurrentToken->Next &&
797          CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
798                                      tok::kw_static))) {
799       next();
800       parseIncludeDirective();
801       return LT_ImportStatement;
802     }
803 
804     // If this line starts and ends in '<' and '>', respectively, it is likely
805     // part of "#define <a/b.h>".
806     if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
807       parseIncludeDirective();
808       return LT_ImportStatement;
809     }
810 
811     // In .proto files, top-level options are very similar to import statements
812     // and should not be line-wrapped.
813     if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
814         CurrentToken->is(Keywords.kw_option)) {
815       next();
816       if (CurrentToken && CurrentToken->is(tok::identifier))
817         return LT_ImportStatement;
818     }
819 
820     bool KeywordVirtualFound = false;
821     bool ImportStatement = false;
822 
823     // import {...} from '...';
824     if (Style.Language == FormatStyle::LK_JavaScript &&
825         CurrentToken->is(Keywords.kw_import))
826       ImportStatement = true;
827 
828     while (CurrentToken) {
829       if (CurrentToken->is(tok::kw_virtual))
830         KeywordVirtualFound = true;
831       if (Style.Language == FormatStyle::LK_JavaScript) {
832         // export {...} from '...';
833         // An export followed by "from 'some string';" is a re-export from
834         // another module identified by a URI and is treated as a
835         // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
836         // Just "export {...};" or "export class ..." should not be treated as
837         // an import in this sense.
838         if (Line.First->is(tok::kw_export) &&
839             CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
840             CurrentToken->Next->isStringLiteral())
841           ImportStatement = true;
842         if (isClosureImportStatement(*CurrentToken))
843           ImportStatement = true;
844       }
845       if (!consumeToken())
846         return LT_Invalid;
847     }
848     if (KeywordVirtualFound)
849       return LT_VirtualFunctionDecl;
850     if (ImportStatement)
851       return LT_ImportStatement;
852 
853     if (Line.startsWith(TT_ObjCMethodSpecifier)) {
854       if (Contexts.back().FirstObjCSelectorName)
855         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
856             Contexts.back().LongestObjCSelectorName;
857       return LT_ObjCMethodDecl;
858     }
859 
860     return LT_Other;
861   }
862 
863 private:
864   bool isClosureImportStatement(const FormatToken &Tok) {
865     // FIXME: Closure-library specific stuff should not be hard-coded but be
866     // configurable.
867     return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
868            Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
869                               Tok.Next->Next->TokenText == "provide" ||
870                               Tok.Next->Next->TokenText == "require" ||
871                               Tok.Next->Next->TokenText == "setTestOnly" ||
872                               Tok.Next->Next->TokenText == "forwardDeclare") &&
873            Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
874   }
875 
876   void resetTokenMetadata(FormatToken *Token) {
877     if (!Token)
878       return;
879 
880     // Reset token type in case we have already looked at it and then
881     // recovered from an error (e.g. failure to find the matching >).
882     if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
883                                TT_FunctionLBrace, TT_ImplicitStringLiteral,
884                                TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
885                                TT_OverloadedOperator, TT_RegexLiteral,
886                                TT_TemplateString))
887       CurrentToken->Type = TT_Unknown;
888     CurrentToken->Role.reset();
889     CurrentToken->MatchingParen = nullptr;
890     CurrentToken->FakeLParens.clear();
891     CurrentToken->FakeRParens = 0;
892   }
893 
894   void next() {
895     if (CurrentToken) {
896       CurrentToken->NestingLevel = Contexts.size() - 1;
897       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
898       modifyContext(*CurrentToken);
899       determineTokenType(*CurrentToken);
900       CurrentToken = CurrentToken->Next;
901     }
902 
903     resetTokenMetadata(CurrentToken);
904   }
905 
906   /// \brief A struct to hold information valid in a specific context, e.g.
907   /// a pair of parenthesis.
908   struct Context {
909     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
910             bool IsExpression)
911         : ContextKind(ContextKind), BindingStrength(BindingStrength),
912           IsExpression(IsExpression) {}
913 
914     tok::TokenKind ContextKind;
915     unsigned BindingStrength;
916     bool IsExpression;
917     unsigned LongestObjCSelectorName = 0;
918     bool ColonIsForRangeExpr = false;
919     bool ColonIsDictLiteral = false;
920     bool ColonIsObjCMethodExpr = false;
921     FormatToken *FirstObjCSelectorName = nullptr;
922     FormatToken *FirstStartOfName = nullptr;
923     bool CanBeExpression = true;
924     bool InTemplateArgument = false;
925     bool InCtorInitializer = false;
926     bool CaretFound = false;
927     bool IsForEachMacro = false;
928   };
929 
930   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
931   /// of each instance.
932   struct ScopedContextCreator {
933     AnnotatingParser &P;
934 
935     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
936                          unsigned Increase)
937         : P(P) {
938       P.Contexts.push_back(Context(ContextKind,
939                                    P.Contexts.back().BindingStrength + Increase,
940                                    P.Contexts.back().IsExpression));
941     }
942 
943     ~ScopedContextCreator() { P.Contexts.pop_back(); }
944   };
945 
946   void modifyContext(const FormatToken &Current) {
947     if (Current.getPrecedence() == prec::Assignment &&
948         !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
949         // Type aliases use `type X = ...;` in TypeScript.
950         !(Style.Language == FormatStyle::LK_JavaScript &&
951           Line.startsWith(Keywords.kw_type, tok::identifier)) &&
952         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
953       Contexts.back().IsExpression = true;
954       if (!Line.startsWith(TT_UnaryOperator)) {
955         for (FormatToken *Previous = Current.Previous;
956              Previous && Previous->Previous &&
957              !Previous->Previous->isOneOf(tok::comma, tok::semi);
958              Previous = Previous->Previous) {
959           if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
960             Previous = Previous->MatchingParen;
961             if (!Previous)
962               break;
963           }
964           if (Previous->opensScope())
965             break;
966           if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
967               Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
968               Previous->Previous && Previous->Previous->isNot(tok::equal))
969             Previous->Type = TT_PointerOrReference;
970         }
971       }
972     } else if (Current.is(tok::lessless) &&
973                (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
974       Contexts.back().IsExpression = true;
975     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
976       Contexts.back().IsExpression = true;
977     } else if (Current.is(TT_TrailingReturnArrow)) {
978       Contexts.back().IsExpression = false;
979     } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
980       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
981     } else if (Current.Previous &&
982                Current.Previous->is(TT_CtorInitializerColon)) {
983       Contexts.back().IsExpression = true;
984       Contexts.back().InCtorInitializer = true;
985     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
986       for (FormatToken *Previous = Current.Previous;
987            Previous && Previous->isOneOf(tok::star, tok::amp);
988            Previous = Previous->Previous)
989         Previous->Type = TT_PointerOrReference;
990       if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
991         Contexts.back().IsExpression = false;
992     } else if (Current.is(tok::kw_new)) {
993       Contexts.back().CanBeExpression = false;
994     } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
995       // This should be the condition or increment in a for-loop.
996       Contexts.back().IsExpression = true;
997     }
998   }
999 
1000   void determineTokenType(FormatToken &Current) {
1001     if (!Current.is(TT_Unknown))
1002       // The token type is already known.
1003       return;
1004 
1005     // Line.MightBeFunctionDecl can only be true after the parentheses of a
1006     // function declaration have been found. In this case, 'Current' is a
1007     // trailing token of this declaration and thus cannot be a name.
1008     if (Current.is(Keywords.kw_instanceof)) {
1009       Current.Type = TT_BinaryOperator;
1010     } else if (isStartOfName(Current) &&
1011                (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1012       Contexts.back().FirstStartOfName = &Current;
1013       Current.Type = TT_StartOfName;
1014     } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1015       AutoFound = true;
1016     } else if (Current.is(tok::arrow) &&
1017                Style.Language == FormatStyle::LK_Java) {
1018       Current.Type = TT_LambdaArrow;
1019     } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1020                Current.NestingLevel == 0) {
1021       Current.Type = TT_TrailingReturnArrow;
1022     } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1023       Current.Type =
1024           determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
1025                                              Contexts.back().IsExpression,
1026                                 Contexts.back().InTemplateArgument);
1027     } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1028       Current.Type = determinePlusMinusCaretUsage(Current);
1029       if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1030         Contexts.back().CaretFound = true;
1031     } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1032       Current.Type = determineIncrementUsage(Current);
1033     } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1034       Current.Type = TT_UnaryOperator;
1035     } else if (Current.is(tok::question)) {
1036       if (Style.Language == FormatStyle::LK_JavaScript &&
1037           Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1038         // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1039         // on the interface, not a ternary expression.
1040         Current.Type = TT_JsTypeOptionalQuestion;
1041       } else {
1042         Current.Type = TT_ConditionalExpr;
1043       }
1044     } else if (Current.isBinaryOperator() &&
1045                (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
1046       Current.Type = TT_BinaryOperator;
1047     } else if (Current.is(tok::comment)) {
1048       if (Current.TokenText.startswith("/*")) {
1049         if (Current.TokenText.endswith("*/"))
1050           Current.Type = TT_BlockComment;
1051         else
1052           // The lexer has for some reason determined a comment here. But we
1053           // cannot really handle it, if it isn't properly terminated.
1054           Current.Tok.setKind(tok::unknown);
1055       } else {
1056         Current.Type = TT_LineComment;
1057       }
1058     } else if (Current.is(tok::r_paren)) {
1059       if (rParenEndsCast(Current))
1060         Current.Type = TT_CastRParen;
1061       if (Current.MatchingParen && Current.Next &&
1062           !Current.Next->isBinaryOperator() &&
1063           !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1064                                  tok::period, tok::arrow, tok::coloncolon))
1065         if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1066           // Make sure this isn't the return type of an Obj-C block declaration
1067           if (AfterParen->Tok.isNot(tok::caret)) {
1068             if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1069               if (BeforeParen->is(tok::identifier) &&
1070                   BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1071                   (!BeforeParen->Previous ||
1072                    BeforeParen->Previous->ClosesTemplateDeclaration))
1073                 Current.Type = TT_FunctionAnnotationRParen;
1074           }
1075         }
1076     } else if (Current.is(tok::at) && Current.Next) {
1077       if (Current.Next->isStringLiteral()) {
1078         Current.Type = TT_ObjCStringLiteral;
1079       } else {
1080         switch (Current.Next->Tok.getObjCKeywordID()) {
1081         case tok::objc_interface:
1082         case tok::objc_implementation:
1083         case tok::objc_protocol:
1084           Current.Type = TT_ObjCDecl;
1085           break;
1086         case tok::objc_property:
1087           Current.Type = TT_ObjCProperty;
1088           break;
1089         default:
1090           break;
1091         }
1092       }
1093     } else if (Current.is(tok::period)) {
1094       FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1095       if (PreviousNoComment &&
1096           PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1097         Current.Type = TT_DesignatedInitializerPeriod;
1098       else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1099                Current.Previous->isOneOf(TT_JavaAnnotation,
1100                                          TT_LeadingJavaAnnotation)) {
1101         Current.Type = Current.Previous->Type;
1102       }
1103     } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1104                Current.Previous &&
1105                !Current.Previous->isOneOf(tok::equal, tok::at) &&
1106                Line.MightBeFunctionDecl && Contexts.size() == 1) {
1107       // Line.MightBeFunctionDecl can only be true after the parentheses of a
1108       // function declaration have been found.
1109       Current.Type = TT_TrailingAnnotation;
1110     } else if ((Style.Language == FormatStyle::LK_Java ||
1111                 Style.Language == FormatStyle::LK_JavaScript) &&
1112                Current.Previous) {
1113       if (Current.Previous->is(tok::at) &&
1114           Current.isNot(Keywords.kw_interface)) {
1115         const FormatToken &AtToken = *Current.Previous;
1116         const FormatToken *Previous = AtToken.getPreviousNonComment();
1117         if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1118           Current.Type = TT_LeadingJavaAnnotation;
1119         else
1120           Current.Type = TT_JavaAnnotation;
1121       } else if (Current.Previous->is(tok::period) &&
1122                  Current.Previous->isOneOf(TT_JavaAnnotation,
1123                                            TT_LeadingJavaAnnotation)) {
1124         Current.Type = Current.Previous->Type;
1125       }
1126     }
1127   }
1128 
1129   /// \brief Take a guess at whether \p Tok starts a name of a function or
1130   /// variable declaration.
1131   ///
1132   /// This is a heuristic based on whether \p Tok is an identifier following
1133   /// something that is likely a type.
1134   bool isStartOfName(const FormatToken &Tok) {
1135     if (Tok.isNot(tok::identifier) || !Tok.Previous)
1136       return false;
1137 
1138     if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof))
1139       return false;
1140     if (Style.Language == FormatStyle::LK_JavaScript &&
1141         Tok.Previous->is(Keywords.kw_in))
1142       return false;
1143 
1144     // Skip "const" as it does not have an influence on whether this is a name.
1145     FormatToken *PreviousNotConst = Tok.Previous;
1146     while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1147       PreviousNotConst = PreviousNotConst->Previous;
1148 
1149     if (!PreviousNotConst)
1150       return false;
1151 
1152     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1153                        PreviousNotConst->Previous &&
1154                        PreviousNotConst->Previous->is(tok::hash);
1155 
1156     if (PreviousNotConst->is(TT_TemplateCloser))
1157       return PreviousNotConst && PreviousNotConst->MatchingParen &&
1158              PreviousNotConst->MatchingParen->Previous &&
1159              PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1160              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1161 
1162     if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1163         PreviousNotConst->MatchingParen->Previous &&
1164         PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1165       return true;
1166 
1167     return (!IsPPKeyword &&
1168             PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1169            PreviousNotConst->is(TT_PointerOrReference) ||
1170            PreviousNotConst->isSimpleTypeSpecifier();
1171   }
1172 
1173   /// \brief Determine whether ')' is ending a cast.
1174   bool rParenEndsCast(const FormatToken &Tok) {
1175     // C-style casts are only used in C++ and Java.
1176     if (Style.Language != FormatStyle::LK_Cpp &&
1177         Style.Language != FormatStyle::LK_Java)
1178       return false;
1179 
1180     // Empty parens aren't casts and there are no casts at the end of the line.
1181     if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1182       return false;
1183 
1184     FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1185     if (LeftOfParens) {
1186       // If there is a closing parenthesis left of the current parentheses,
1187       // look past it as these might be chained casts.
1188       if (LeftOfParens->is(tok::r_paren)) {
1189         if (!LeftOfParens->MatchingParen ||
1190             !LeftOfParens->MatchingParen->Previous)
1191           return false;
1192         LeftOfParens = LeftOfParens->MatchingParen->Previous;
1193       }
1194 
1195       // If there is an identifier (or with a few exceptions a keyword) right
1196       // before the parentheses, this is unlikely to be a cast.
1197       if (LeftOfParens->Tok.getIdentifierInfo() &&
1198           !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1199                                  tok::kw_delete))
1200         return false;
1201 
1202       // Certain other tokens right before the parentheses are also signals that
1203       // this cannot be a cast.
1204       if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1205                                 TT_TemplateCloser, tok::ellipsis))
1206         return false;
1207     }
1208 
1209     if (Tok.Next->is(tok::question))
1210       return false;
1211 
1212     // As Java has no function types, a "(" after the ")" likely means that this
1213     // is a cast.
1214     if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1215       return true;
1216 
1217     // If a (non-string) literal follows, this is likely a cast.
1218     if (Tok.Next->isNot(tok::string_literal) &&
1219         (Tok.Next->Tok.isLiteral() ||
1220          Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1221       return true;
1222 
1223     // Heuristically try to determine whether the parentheses contain a type.
1224     bool ParensAreType =
1225         !Tok.Previous ||
1226         Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1227         Tok.Previous->isSimpleTypeSpecifier();
1228     bool ParensCouldEndDecl =
1229         Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1230     if (ParensAreType && !ParensCouldEndDecl)
1231       return true;
1232 
1233     // At this point, we heuristically assume that there are no casts at the
1234     // start of the line. We assume that we have found most cases where there
1235     // are by the logic above, e.g. "(void)x;".
1236     if (!LeftOfParens)
1237       return false;
1238 
1239     // Certain token types inside the parentheses mean that this can't be a
1240     // cast.
1241     for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1242          Token = Token->Next)
1243       if (Token->is(TT_BinaryOperator))
1244         return false;
1245 
1246     // If the following token is an identifier or 'this', this is a cast. All
1247     // cases where this can be something else are handled above.
1248     if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1249       return true;
1250 
1251     if (!Tok.Next->Next)
1252       return false;
1253 
1254     // If the next token after the parenthesis is a unary operator, assume
1255     // that this is cast, unless there are unexpected tokens inside the
1256     // parenthesis.
1257     bool NextIsUnary =
1258         Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1259     if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1260         !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1261       return false;
1262     // Search for unexpected tokens.
1263     for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1264          Prev = Prev->Previous) {
1265       if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1266         return false;
1267     }
1268     return true;
1269   }
1270 
1271   /// \brief Return the type of the given token assuming it is * or &.
1272   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1273                                   bool InTemplateArgument) {
1274     if (Style.Language == FormatStyle::LK_JavaScript)
1275       return TT_BinaryOperator;
1276 
1277     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1278     if (!PrevToken)
1279       return TT_UnaryOperator;
1280 
1281     const FormatToken *NextToken = Tok.getNextNonComment();
1282     if (!NextToken ||
1283         NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal,
1284                            Keywords.kw_override) ||
1285         (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1286       return TT_PointerOrReference;
1287 
1288     if (PrevToken->is(tok::coloncolon))
1289       return TT_PointerOrReference;
1290 
1291     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1292                            tok::comma, tok::semi, tok::kw_return, tok::colon,
1293                            tok::equal, tok::kw_delete, tok::kw_sizeof) ||
1294         PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1295                            TT_UnaryOperator, TT_CastRParen))
1296       return TT_UnaryOperator;
1297 
1298     if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1299       return TT_PointerOrReference;
1300     if (NextToken->is(tok::kw_operator) && !IsExpression)
1301       return TT_PointerOrReference;
1302     if (NextToken->isOneOf(tok::comma, tok::semi))
1303       return TT_PointerOrReference;
1304 
1305     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
1306         PrevToken->MatchingParen->Previous &&
1307         PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
1308                                                     tok::kw_decltype))
1309       return TT_PointerOrReference;
1310 
1311     if (PrevToken->Tok.isLiteral() ||
1312         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1313                            tok::kw_false, tok::r_brace) ||
1314         NextToken->Tok.isLiteral() ||
1315         NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1316         NextToken->isUnaryOperator() ||
1317         // If we know we're in a template argument, there are no named
1318         // declarations. Thus, having an identifier on the right-hand side
1319         // indicates a binary operator.
1320         (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1321       return TT_BinaryOperator;
1322 
1323     // "&&(" is quite unlikely to be two successive unary "&".
1324     if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1325       return TT_BinaryOperator;
1326 
1327     // This catches some cases where evaluation order is used as control flow:
1328     //   aaa && aaa->f();
1329     const FormatToken *NextNextToken = NextToken->getNextNonComment();
1330     if (NextNextToken && NextNextToken->is(tok::arrow))
1331       return TT_BinaryOperator;
1332 
1333     // It is very unlikely that we are going to find a pointer or reference type
1334     // definition on the RHS of an assignment.
1335     if (IsExpression && !Contexts.back().CaretFound)
1336       return TT_BinaryOperator;
1337 
1338     return TT_PointerOrReference;
1339   }
1340 
1341   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1342     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1343     if (!PrevToken)
1344       return TT_UnaryOperator;
1345 
1346     if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) &&
1347         !PrevToken->is(tok::exclaim))
1348       // There aren't any trailing unary operators except for TypeScript's
1349       // non-null operator (!). Thus, this must be squence of leading operators.
1350       return TT_UnaryOperator;
1351 
1352     // Use heuristics to recognize unary operators.
1353     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1354                            tok::question, tok::colon, tok::kw_return,
1355                            tok::kw_case, tok::at, tok::l_brace))
1356       return TT_UnaryOperator;
1357 
1358     // There can't be two consecutive binary operators.
1359     if (PrevToken->is(TT_BinaryOperator))
1360       return TT_UnaryOperator;
1361 
1362     // Fall back to marking the token as binary operator.
1363     return TT_BinaryOperator;
1364   }
1365 
1366   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1367   TokenType determineIncrementUsage(const FormatToken &Tok) {
1368     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1369     if (!PrevToken || PrevToken->is(TT_CastRParen))
1370       return TT_UnaryOperator;
1371     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1372       return TT_TrailingUnaryOperator;
1373 
1374     return TT_UnaryOperator;
1375   }
1376 
1377   SmallVector<Context, 8> Contexts;
1378 
1379   const FormatStyle &Style;
1380   AnnotatedLine &Line;
1381   FormatToken *CurrentToken;
1382   bool AutoFound;
1383   const AdditionalKeywords &Keywords;
1384 
1385   // Set of "<" tokens that do not open a template parameter list. If parseAngle
1386   // determines that a specific token can't be a template opener, it will make
1387   // same decision irrespective of the decisions for tokens leading up to it.
1388   // Store this information to prevent this from causing exponential runtime.
1389   llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1390 };
1391 
1392 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1393 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1394 
1395 /// \brief Parses binary expressions by inserting fake parenthesis based on
1396 /// operator precedence.
1397 class ExpressionParser {
1398 public:
1399   ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1400                    AnnotatedLine &Line)
1401       : Style(Style), Keywords(Keywords), Current(Line.First) {}
1402 
1403   /// \brief Parse expressions with the given operatore precedence.
1404   void parse(int Precedence = 0) {
1405     // Skip 'return' and ObjC selector colons as they are not part of a binary
1406     // expression.
1407     while (Current && (Current->is(tok::kw_return) ||
1408                        (Current->is(tok::colon) &&
1409                         Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1410       next();
1411 
1412     if (!Current || Precedence > PrecedenceArrowAndPeriod)
1413       return;
1414 
1415     // Conditional expressions need to be parsed separately for proper nesting.
1416     if (Precedence == prec::Conditional) {
1417       parseConditionalExpr();
1418       return;
1419     }
1420 
1421     // Parse unary operators, which all have a higher precedence than binary
1422     // operators.
1423     if (Precedence == PrecedenceUnaryOperator) {
1424       parseUnaryOperator();
1425       return;
1426     }
1427 
1428     FormatToken *Start = Current;
1429     FormatToken *LatestOperator = nullptr;
1430     unsigned OperatorIndex = 0;
1431 
1432     while (Current) {
1433       // Consume operators with higher precedence.
1434       parse(Precedence + 1);
1435 
1436       int CurrentPrecedence = getCurrentPrecedence();
1437 
1438       if (Current && Current->is(TT_SelectorName) &&
1439           Precedence == CurrentPrecedence) {
1440         if (LatestOperator)
1441           addFakeParenthesis(Start, prec::Level(Precedence));
1442         Start = Current;
1443       }
1444 
1445       // At the end of the line or when an operator with higher precedence is
1446       // found, insert fake parenthesis and return.
1447       if (!Current || (Current->closesScope() && Current->MatchingParen) ||
1448           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1449           (CurrentPrecedence == prec::Conditional &&
1450            Precedence == prec::Assignment && Current->is(tok::colon))) {
1451         break;
1452       }
1453 
1454       // Consume scopes: (), [], <> and {}
1455       if (Current->opensScope()) {
1456         while (Current && !Current->closesScope()) {
1457           next();
1458           parse();
1459         }
1460         next();
1461       } else {
1462         // Operator found.
1463         if (CurrentPrecedence == Precedence) {
1464           if (LatestOperator)
1465             LatestOperator->NextOperator = Current;
1466           LatestOperator = Current;
1467           Current->OperatorIndex = OperatorIndex;
1468           ++OperatorIndex;
1469         }
1470         next(/*SkipPastLeadingComments=*/Precedence > 0);
1471       }
1472     }
1473 
1474     if (LatestOperator && (Current || Precedence > 0)) {
1475       // LatestOperator->LastOperator = true;
1476       if (Precedence == PrecedenceArrowAndPeriod) {
1477         // Call expressions don't have a binary operator precedence.
1478         addFakeParenthesis(Start, prec::Unknown);
1479       } else {
1480         addFakeParenthesis(Start, prec::Level(Precedence));
1481       }
1482     }
1483   }
1484 
1485 private:
1486   /// \brief Gets the precedence (+1) of the given token for binary operators
1487   /// and other tokens that we treat like binary operators.
1488   int getCurrentPrecedence() {
1489     if (Current) {
1490       const FormatToken *NextNonComment = Current->getNextNonComment();
1491       if (Current->is(TT_ConditionalExpr))
1492         return prec::Conditional;
1493       if (NextNonComment && NextNonComment->is(tok::colon) &&
1494           NextNonComment->is(TT_DictLiteral))
1495         return prec::Comma;
1496       if (Current->is(TT_LambdaArrow))
1497         return prec::Comma;
1498       if (Current->is(TT_JsFatArrow))
1499         return prec::Assignment;
1500       if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName,
1501                            TT_JsComputedPropertyName) ||
1502           (Current->is(tok::comment) && NextNonComment &&
1503            NextNonComment->is(TT_SelectorName)))
1504         return 0;
1505       if (Current->is(TT_RangeBasedForLoopColon))
1506         return prec::Comma;
1507       if ((Style.Language == FormatStyle::LK_Java ||
1508            Style.Language == FormatStyle::LK_JavaScript) &&
1509           Current->is(Keywords.kw_instanceof))
1510         return prec::Relational;
1511       if (Style.Language == FormatStyle::LK_JavaScript &&
1512           Current->is(Keywords.kw_in))
1513         return prec::Relational;
1514       if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1515         return Current->getPrecedence();
1516       if (Current->isOneOf(tok::period, tok::arrow))
1517         return PrecedenceArrowAndPeriod;
1518       if ((Style.Language == FormatStyle::LK_Java ||
1519            Style.Language == FormatStyle::LK_JavaScript) &&
1520           Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1521                            Keywords.kw_throws))
1522         return 0;
1523     }
1524     return -1;
1525   }
1526 
1527   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1528     Start->FakeLParens.push_back(Precedence);
1529     if (Precedence > prec::Unknown)
1530       Start->StartsBinaryExpression = true;
1531     if (Current) {
1532       FormatToken *Previous = Current->Previous;
1533       while (Previous->is(tok::comment) && Previous->Previous)
1534         Previous = Previous->Previous;
1535       ++Previous->FakeRParens;
1536       if (Precedence > prec::Unknown)
1537         Previous->EndsBinaryExpression = true;
1538     }
1539   }
1540 
1541   /// \brief Parse unary operator expressions and surround them with fake
1542   /// parentheses if appropriate.
1543   void parseUnaryOperator() {
1544     if (!Current || Current->isNot(TT_UnaryOperator)) {
1545       parse(PrecedenceArrowAndPeriod);
1546       return;
1547     }
1548 
1549     FormatToken *Start = Current;
1550     next();
1551     parseUnaryOperator();
1552 
1553     // The actual precedence doesn't matter.
1554     addFakeParenthesis(Start, prec::Unknown);
1555   }
1556 
1557   void parseConditionalExpr() {
1558     while (Current && Current->isTrailingComment()) {
1559       next();
1560     }
1561     FormatToken *Start = Current;
1562     parse(prec::LogicalOr);
1563     if (!Current || !Current->is(tok::question))
1564       return;
1565     next();
1566     parse(prec::Assignment);
1567     if (!Current || Current->isNot(TT_ConditionalExpr))
1568       return;
1569     next();
1570     parse(prec::Assignment);
1571     addFakeParenthesis(Start, prec::Conditional);
1572   }
1573 
1574   void next(bool SkipPastLeadingComments = true) {
1575     if (Current)
1576       Current = Current->Next;
1577     while (Current &&
1578            (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1579            Current->isTrailingComment())
1580       Current = Current->Next;
1581   }
1582 
1583   const FormatStyle &Style;
1584   const AdditionalKeywords &Keywords;
1585   FormatToken *Current;
1586 };
1587 
1588 } // end anonymous namespace
1589 
1590 void TokenAnnotator::setCommentLineLevels(
1591     SmallVectorImpl<AnnotatedLine *> &Lines) {
1592   const AnnotatedLine *NextNonCommentLine = nullptr;
1593   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1594                                                           E = Lines.rend();
1595        I != E; ++I) {
1596     if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1597         (*I)->First->Next == nullptr)
1598       (*I)->Level = NextNonCommentLine->Level;
1599     else
1600       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1601 
1602     setCommentLineLevels((*I)->Children);
1603   }
1604 }
1605 
1606 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
1607   unsigned Result = 0;
1608   for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
1609     Result = std::max(Result, Tok->NestingLevel);
1610   return Result;
1611 }
1612 
1613 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1614   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1615                                                   E = Line.Children.end();
1616        I != E; ++I) {
1617     annotate(**I);
1618   }
1619   AnnotatingParser Parser(Style, Line, Keywords);
1620   Line.Type = Parser.parseLine();
1621 
1622   // With very deep nesting, ExpressionParser uses lots of stack and the
1623   // formatting algorithm is very slow. We're not going to do a good job here
1624   // anyway - it's probably generated code being formatted by mistake.
1625   // Just skip the whole line.
1626   if (maxNestingDepth(Line) > 50)
1627     Line.Type = LT_Invalid;
1628 
1629   if (Line.Type == LT_Invalid)
1630     return;
1631 
1632   ExpressionParser ExprParser(Style, Keywords, Line);
1633   ExprParser.parse();
1634 
1635   if (Line.startsWith(TT_ObjCMethodSpecifier))
1636     Line.Type = LT_ObjCMethodDecl;
1637   else if (Line.startsWith(TT_ObjCDecl))
1638     Line.Type = LT_ObjCDecl;
1639   else if (Line.startsWith(TT_ObjCProperty))
1640     Line.Type = LT_ObjCProperty;
1641 
1642   Line.First->SpacesRequiredBefore = 1;
1643   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1644 }
1645 
1646 // This function heuristically determines whether 'Current' starts the name of a
1647 // function declaration.
1648 static bool isFunctionDeclarationName(const FormatToken &Current,
1649                                       const AnnotatedLine &Line) {
1650   auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
1651     for (; Next; Next = Next->Next) {
1652       if (Next->is(TT_OverloadedOperatorLParen))
1653         return Next;
1654       if (Next->is(TT_OverloadedOperator))
1655         continue;
1656       if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1657         // For 'new[]' and 'delete[]'.
1658         if (Next->Next && Next->Next->is(tok::l_square) &&
1659             Next->Next->Next && Next->Next->Next->is(tok::r_square))
1660           Next = Next->Next->Next;
1661         continue;
1662       }
1663 
1664       break;
1665     }
1666     return nullptr;
1667   };
1668 
1669   // Find parentheses of parameter list.
1670   const FormatToken *Next = Current.Next;
1671   if (Current.is(tok::kw_operator)) {
1672     if (Current.Previous && Current.Previous->is(tok::coloncolon))
1673       return false;
1674     Next = skipOperatorName(Next);
1675   } else {
1676     if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
1677       return false;
1678     for (; Next; Next = Next->Next) {
1679       if (Next->is(TT_TemplateOpener)) {
1680         Next = Next->MatchingParen;
1681       } else if (Next->is(tok::coloncolon)) {
1682         Next = Next->Next;
1683         if (!Next)
1684           return false;
1685         if (Next->is(tok::kw_operator)) {
1686           Next = skipOperatorName(Next->Next);
1687           break;
1688         }
1689         if (!Next->is(tok::identifier))
1690           return false;
1691       } else if (Next->is(tok::l_paren)) {
1692         break;
1693       } else {
1694         return false;
1695       }
1696     }
1697   }
1698 
1699   // Check whether parameter list can be long to a function declaration.
1700   if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
1701     return false;
1702   // If the lines ends with "{", this is likely an function definition.
1703   if (Line.Last->is(tok::l_brace))
1704     return true;
1705   if (Next->Next == Next->MatchingParen)
1706     return true; // Empty parentheses.
1707   // If there is an &/&& after the r_paren, this is likely a function.
1708   if (Next->MatchingParen->Next &&
1709       Next->MatchingParen->Next->is(TT_PointerOrReference))
1710     return true;
1711   for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
1712        Tok = Tok->Next) {
1713     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1714         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
1715       return true;
1716     if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
1717         Tok->Tok.isLiteral())
1718       return false;
1719   }
1720   return false;
1721 }
1722 
1723 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
1724   assert(Line.MightBeFunctionDecl);
1725 
1726   if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
1727        Style.AlwaysBreakAfterReturnType ==
1728            FormatStyle::RTBS_TopLevelDefinitions) &&
1729       Line.Level > 0)
1730     return false;
1731 
1732   switch (Style.AlwaysBreakAfterReturnType) {
1733   case FormatStyle::RTBS_None:
1734     return false;
1735   case FormatStyle::RTBS_All:
1736   case FormatStyle::RTBS_TopLevel:
1737     return true;
1738   case FormatStyle::RTBS_AllDefinitions:
1739   case FormatStyle::RTBS_TopLevelDefinitions:
1740     return Line.mightBeFunctionDefinition();
1741   }
1742 
1743   return false;
1744 }
1745 
1746 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1747   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1748                                                   E = Line.Children.end();
1749        I != E; ++I) {
1750     calculateFormattingInformation(**I);
1751   }
1752 
1753   Line.First->TotalLength =
1754       Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1755   if (!Line.First->Next)
1756     return;
1757   FormatToken *Current = Line.First->Next;
1758   bool InFunctionDecl = Line.MightBeFunctionDecl;
1759   while (Current) {
1760     if (isFunctionDeclarationName(*Current, Line))
1761       Current->Type = TT_FunctionDeclarationName;
1762     if (Current->is(TT_LineComment)) {
1763       if (Current->Previous->BlockKind == BK_BracedInit &&
1764           Current->Previous->opensScope())
1765         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1766       else
1767         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1768 
1769       // If we find a trailing comment, iterate backwards to determine whether
1770       // it seems to relate to a specific parameter. If so, break before that
1771       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1772       // to the previous line in:
1773       //   SomeFunction(a,
1774       //                b, // comment
1775       //                c);
1776       if (!Current->HasUnescapedNewline) {
1777         for (FormatToken *Parameter = Current->Previous; Parameter;
1778              Parameter = Parameter->Previous) {
1779           if (Parameter->isOneOf(tok::comment, tok::r_brace))
1780             break;
1781           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1782             if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
1783                 Parameter->HasUnescapedNewline)
1784               Parameter->MustBreakBefore = true;
1785             break;
1786           }
1787         }
1788       }
1789     } else if (Current->SpacesRequiredBefore == 0 &&
1790                spaceRequiredBefore(Line, *Current)) {
1791       Current->SpacesRequiredBefore = 1;
1792     }
1793 
1794     Current->MustBreakBefore =
1795         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1796 
1797     if (!Current->MustBreakBefore && InFunctionDecl &&
1798         Current->is(TT_FunctionDeclarationName))
1799       Current->MustBreakBefore = mustBreakForReturnType(Line);
1800 
1801     Current->CanBreakBefore =
1802         Current->MustBreakBefore || canBreakBefore(Line, *Current);
1803     unsigned ChildSize = 0;
1804     if (Current->Previous->Children.size() == 1) {
1805       FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1806       ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1807                                                   : LastOfChild.TotalLength + 1;
1808     }
1809     const FormatToken *Prev = Current->Previous;
1810     if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
1811         (Prev->Children.size() == 1 &&
1812          Prev->Children[0]->First->MustBreakBefore) ||
1813         Current->IsMultiline)
1814       Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
1815     else
1816       Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
1817                              ChildSize + Current->SpacesRequiredBefore;
1818 
1819     if (Current->is(TT_CtorInitializerColon))
1820       InFunctionDecl = false;
1821 
1822     // FIXME: Only calculate this if CanBreakBefore is true once static
1823     // initializers etc. are sorted out.
1824     // FIXME: Move magic numbers to a better place.
1825     Current->SplitPenalty = 20 * Current->BindingStrength +
1826                             splitPenalty(Line, *Current, InFunctionDecl);
1827 
1828     Current = Current->Next;
1829   }
1830 
1831   calculateUnbreakableTailLengths(Line);
1832   for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1833     if (Current->Role)
1834       Current->Role->precomputeFormattingInfos(Current);
1835   }
1836 
1837   DEBUG({ printDebugInfo(Line); });
1838 }
1839 
1840 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1841   unsigned UnbreakableTailLength = 0;
1842   FormatToken *Current = Line.Last;
1843   while (Current) {
1844     Current->UnbreakableTailLength = UnbreakableTailLength;
1845     if (Current->CanBreakBefore ||
1846         Current->isOneOf(tok::comment, tok::string_literal)) {
1847       UnbreakableTailLength = 0;
1848     } else {
1849       UnbreakableTailLength +=
1850           Current->ColumnWidth + Current->SpacesRequiredBefore;
1851     }
1852     Current = Current->Previous;
1853   }
1854 }
1855 
1856 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1857                                       const FormatToken &Tok,
1858                                       bool InFunctionDecl) {
1859   const FormatToken &Left = *Tok.Previous;
1860   const FormatToken &Right = Tok;
1861 
1862   if (Left.is(tok::semi))
1863     return 0;
1864 
1865   if (Style.Language == FormatStyle::LK_Java) {
1866     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
1867       return 1;
1868     if (Right.is(Keywords.kw_implements))
1869       return 2;
1870     if (Left.is(tok::comma) && Left.NestingLevel == 0)
1871       return 3;
1872   } else if (Style.Language == FormatStyle::LK_JavaScript) {
1873     if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
1874       return 100;
1875     if (Left.is(TT_JsTypeColon))
1876       return 35;
1877     if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
1878         (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
1879       return 100;
1880   }
1881 
1882   if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
1883                               Right.Next->is(TT_DictLiteral)))
1884     return 1;
1885   if (Right.is(tok::l_square)) {
1886     if (Style.Language == FormatStyle::LK_Proto)
1887       return 1;
1888     if (Left.is(tok::r_square))
1889       return 200;
1890     // Slightly prefer formatting local lambda definitions like functions.
1891     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
1892       return 35;
1893     if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
1894                        TT_ArrayInitializerLSquare))
1895       return 500;
1896   }
1897 
1898   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
1899       Right.is(tok::kw_operator)) {
1900     if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1901       return 3;
1902     if (Left.is(TT_StartOfName))
1903       return 110;
1904     if (InFunctionDecl && Right.NestingLevel == 0)
1905       return Style.PenaltyReturnTypeOnItsOwnLine;
1906     return 200;
1907   }
1908   if (Right.is(TT_PointerOrReference))
1909     return 190;
1910   if (Right.is(TT_LambdaArrow))
1911     return 110;
1912   if (Left.is(tok::equal) && Right.is(tok::l_brace))
1913     return 150;
1914   if (Left.is(TT_CastRParen))
1915     return 100;
1916   if (Left.is(tok::coloncolon) ||
1917       (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1918     return 500;
1919   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1920     return 5000;
1921   if (Left.is(tok::comment))
1922     return 1000;
1923 
1924   if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon))
1925     return 2;
1926 
1927   if (Right.isMemberAccess()) {
1928     // Breaking before the "./->" of a chained call/member access is reasonably
1929     // cheap, as formatting those with one call per line is generally
1930     // desirable. In particular, it should be cheaper to break before the call
1931     // than it is to break inside a call's parameters, which could lead to weird
1932     // "hanging" indents. The exception is the very last "./->" to support this
1933     // frequent pattern:
1934     //
1935     //   aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
1936     //       dddddddd);
1937     //
1938     // which might otherwise be blown up onto many lines. Here, clang-format
1939     // won't produce "hanging" indents anyway as there is no other trailing
1940     // call.
1941     //
1942     // Also apply higher penalty is not a call as that might lead to a wrapping
1943     // like:
1944     //
1945     //   aaaaaaa
1946     //       .aaaaaaaaa.bbbbbbbb(cccccccc);
1947     return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
1948                ? 150
1949                : 35;
1950   }
1951 
1952   if (Right.is(TT_TrailingAnnotation) &&
1953       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
1954     // Moving trailing annotations to the next line is fine for ObjC method
1955     // declarations.
1956     if (Line.startsWith(TT_ObjCMethodSpecifier))
1957       return 10;
1958     // Generally, breaking before a trailing annotation is bad unless it is
1959     // function-like. It seems to be especially preferable to keep standard
1960     // annotations (i.e. "const", "final" and "override") on the same line.
1961     // Use a slightly higher penalty after ")" so that annotations like
1962     // "const override" are kept together.
1963     bool is_short_annotation = Right.TokenText.size() < 10;
1964     return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
1965   }
1966 
1967   // In for-loops, prefer breaking at ',' and ';'.
1968   if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
1969     return 4;
1970 
1971   // In Objective-C method expressions, prefer breaking before "param:" over
1972   // breaking after it.
1973   if (Right.is(TT_SelectorName))
1974     return 0;
1975   if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
1976     return Line.MightBeFunctionDecl ? 50 : 500;
1977 
1978   if (Left.is(tok::l_paren) && InFunctionDecl &&
1979       Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
1980     return 100;
1981   if (Left.is(tok::l_paren) && Left.Previous &&
1982       Left.Previous->isOneOf(tok::kw_if, tok::kw_for))
1983     return 1000;
1984   if (Left.is(tok::equal) && InFunctionDecl)
1985     return 110;
1986   if (Right.is(tok::r_brace))
1987     return 1;
1988   if (Left.is(TT_TemplateOpener))
1989     return 100;
1990   if (Left.opensScope()) {
1991     if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
1992       return 0;
1993     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1994                                    : 19;
1995   }
1996   if (Left.is(TT_JavaAnnotation))
1997     return 50;
1998 
1999   if (Right.is(tok::lessless)) {
2000     if (Left.is(tok::string_literal) &&
2001         (Right.NextOperator || Right.OperatorIndex != 1)) {
2002       StringRef Content = Left.TokenText;
2003       if (Content.startswith("\""))
2004         Content = Content.drop_front(1);
2005       if (Content.endswith("\""))
2006         Content = Content.drop_back(1);
2007       Content = Content.trim();
2008       if (Content.size() > 1 &&
2009           (Content.back() == ':' || Content.back() == '='))
2010         return 25;
2011     }
2012     return 1; // Breaking at a << is really cheap.
2013   }
2014   if (Left.is(TT_ConditionalExpr))
2015     return prec::Conditional;
2016   prec::Level Level = Left.getPrecedence();
2017   if (Level != prec::Unknown)
2018     return Level;
2019   Level = Right.getPrecedence();
2020   if (Level != prec::Unknown)
2021     return Level;
2022 
2023   return 3;
2024 }
2025 
2026 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2027                                           const FormatToken &Left,
2028                                           const FormatToken &Right) {
2029   if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2030     return true;
2031   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
2032       Left.Tok.getObjCKeywordID() == tok::objc_property)
2033     return true;
2034   if (Right.is(tok::hashhash))
2035     return Left.is(tok::hash);
2036   if (Left.isOneOf(tok::hashhash, tok::hash))
2037     return Right.is(tok::hash);
2038   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
2039     return Style.SpaceInEmptyParentheses;
2040   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2041     return (Right.is(TT_CastRParen) ||
2042             (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2043                ? Style.SpacesInCStyleCastParentheses
2044                : Style.SpacesInParentheses;
2045   if (Right.isOneOf(tok::semi, tok::comma))
2046     return false;
2047   if (Right.is(tok::less) &&
2048       Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)
2049     return true;
2050   if (Right.is(tok::less) && Left.is(tok::kw_template))
2051     return Style.SpaceAfterTemplateKeyword;
2052   if (Left.isOneOf(tok::exclaim, tok::tilde))
2053     return false;
2054   if (Left.is(tok::at) &&
2055       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2056                     tok::numeric_constant, tok::l_paren, tok::l_brace,
2057                     tok::kw_true, tok::kw_false))
2058     return false;
2059   if (Left.is(tok::colon))
2060     return !Left.is(TT_ObjCMethodExpr);
2061   if (Left.is(tok::coloncolon))
2062     return false;
2063   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
2064     return false;
2065   if (Right.is(tok::ellipsis))
2066     return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2067                                     Left.Previous->is(tok::kw_case));
2068   if (Left.is(tok::l_square) && Right.is(tok::amp))
2069     return false;
2070   if (Right.is(TT_PointerOrReference))
2071     return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) ||
2072            (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous &&
2073                                      Left.Previous->is(tok::r_paren)) ||
2074             (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
2075              (Style.PointerAlignment != FormatStyle::PAS_Left ||
2076               (Line.IsMultiVariableDeclStmt &&
2077                (Left.NestingLevel == 0 ||
2078                 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
2079   if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
2080       (!Left.is(TT_PointerOrReference) ||
2081        (Style.PointerAlignment != FormatStyle::PAS_Right &&
2082         !Line.IsMultiVariableDeclStmt)))
2083     return true;
2084   if (Left.is(TT_PointerOrReference))
2085     return Right.Tok.isLiteral() ||
2086            Right.isOneOf(TT_BlockComment, Keywords.kw_final,
2087                          Keywords.kw_override) ||
2088            (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
2089            (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
2090                            tok::l_paren) &&
2091             (Style.PointerAlignment != FormatStyle::PAS_Right &&
2092              !Line.IsMultiVariableDeclStmt) &&
2093             Left.Previous &&
2094             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
2095   if (Right.is(tok::star) && Left.is(tok::l_paren))
2096     return false;
2097   if (Left.is(tok::l_square))
2098     return (Left.is(TT_ArrayInitializerLSquare) &&
2099             Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
2100            (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&
2101             Right.isNot(tok::r_square));
2102   if (Right.is(tok::r_square))
2103     return Right.MatchingParen &&
2104            ((Style.SpacesInContainerLiterals &&
2105              Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
2106             (Style.SpacesInSquareBrackets &&
2107              Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
2108   if (Right.is(tok::l_square) &&
2109       !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) &&
2110       !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
2111     return false;
2112   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2113     return !Left.Children.empty(); // No spaces in "{}".
2114   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2115       (Right.is(tok::r_brace) && Right.MatchingParen &&
2116        Right.MatchingParen->BlockKind != BK_Block))
2117     return !Style.Cpp11BracedListStyle;
2118   if (Left.is(TT_BlockComment))
2119     return !Left.TokenText.endswith("=*/");
2120   if (Right.is(tok::l_paren)) {
2121     if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen))
2122       return true;
2123     return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2124            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
2125             (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
2126                           tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2127                           TT_ObjCForIn) ||
2128              (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2129                            tok::kw_new, tok::kw_delete) &&
2130               (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2131            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
2132             (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2133              Left.is(tok::r_paren)) &&
2134             Line.Type != LT_PreprocessorDirective);
2135   }
2136   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2137     return false;
2138   if (Right.is(TT_UnaryOperator))
2139     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2140            (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2141   if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2142                     tok::r_paren) ||
2143        Left.isSimpleTypeSpecifier()) &&
2144       Right.is(tok::l_brace) && Right.getNextNonComment() &&
2145       Right.BlockKind != BK_Block)
2146     return false;
2147   if (Left.is(tok::period) || Right.is(tok::period))
2148     return false;
2149   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2150     return false;
2151   if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2152       Left.MatchingParen->Previous &&
2153       Left.MatchingParen->Previous->is(tok::period))
2154     // A.<B>DoSomething();
2155     return false;
2156   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2157     return false;
2158   return true;
2159 }
2160 
2161 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2162                                          const FormatToken &Right) {
2163   const FormatToken &Left = *Right.Previous;
2164   if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2165     return true; // Never ever merge two identifiers.
2166   if (Style.Language == FormatStyle::LK_Cpp) {
2167     if (Left.is(tok::kw_operator))
2168       return Right.is(tok::coloncolon);
2169   } else if (Style.Language == FormatStyle::LK_Proto) {
2170     if (Right.is(tok::period) &&
2171         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2172                      Keywords.kw_repeated, Keywords.kw_extend))
2173       return true;
2174     if (Right.is(tok::l_paren) &&
2175         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2176       return true;
2177   } else if (Style.Language == FormatStyle::LK_JavaScript) {
2178     if (Left.is(TT_JsFatArrow))
2179       return true;
2180     if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2181         (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2182       return false;
2183     if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
2184       return false;
2185     if (Right.is(tok::star) &&
2186         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2187       return false;
2188     if (Right.isOneOf(tok::l_brace, tok::l_square) &&
2189         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2190       return true;
2191     // JS methods can use some keywords as names (e.g. `delete()`).
2192     if (Right.is(tok::l_paren) && Line.MustBeDeclaration &&
2193         Left.Tok.getIdentifierInfo())
2194       return false;
2195     if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
2196                      Keywords.kw_of, tok::kw_const) &&
2197         (!Left.Previous || !Left.Previous->is(tok::period)))
2198       return true;
2199     if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
2200         Left.Previous->is(tok::period) && Right.is(tok::l_paren))
2201       return false;
2202     if (Left.is(Keywords.kw_as) &&
2203         Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
2204       return true;
2205     if (Left.is(tok::kw_default) && Left.Previous &&
2206         Left.Previous->is(tok::kw_export))
2207       return true;
2208     if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2209       return true;
2210     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2211       return false;
2212     if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
2213       return false;
2214     if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2215         Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2216       return false;
2217     if (Left.is(tok::ellipsis))
2218       return false;
2219     if (Left.is(TT_TemplateCloser) &&
2220         !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2221                        Keywords.kw_implements, Keywords.kw_extends))
2222       // Type assertions ('<type>expr') are not followed by whitespace. Other
2223       // locations that should have whitespace following are identified by the
2224       // above set of follower tokens.
2225       return false;
2226     // Postfix non-null assertion operator, as in `foo!.bar()`.
2227     if (Right.is(tok::exclaim) && (Left.isOneOf(tok::identifier, tok::r_paren,
2228                                                 tok::r_square, tok::r_brace) ||
2229                                    Left.Tok.isLiteral()))
2230       return false;
2231     if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as))
2232       return true; // "x! as string"
2233   } else if (Style.Language == FormatStyle::LK_Java) {
2234     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2235       return true;
2236     if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2237       return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
2238     if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2239                       tok::kw_protected) ||
2240          Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2241                       Keywords.kw_native)) &&
2242         Right.is(TT_TemplateOpener))
2243       return true;
2244   }
2245   if (Left.is(TT_ImplicitStringLiteral))
2246     return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2247   if (Line.Type == LT_ObjCMethodDecl) {
2248     if (Left.is(TT_ObjCMethodSpecifier))
2249       return true;
2250     if (Left.is(tok::r_paren) && Right.is(tok::identifier))
2251       // Don't space between ')' and <id>
2252       return false;
2253   }
2254   if (Line.Type == LT_ObjCProperty &&
2255       (Right.is(tok::equal) || Left.is(tok::equal)))
2256     return false;
2257 
2258   if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2259       Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2260     return true;
2261   if (Right.is(TT_OverloadedOperatorLParen))
2262     return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2263   if (Left.is(tok::comma))
2264     return true;
2265   if (Right.is(tok::comma))
2266     return false;
2267   if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
2268     return true;
2269   if (Right.is(tok::colon)) {
2270     if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2271         !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2272       return false;
2273     if (Right.is(TT_ObjCMethodExpr))
2274       return false;
2275     if (Left.is(tok::question))
2276       return false;
2277     if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2278       return false;
2279     if (Right.is(TT_DictLiteral))
2280       return Style.SpacesInContainerLiterals;
2281     return true;
2282   }
2283   if (Left.is(TT_UnaryOperator))
2284     return Right.is(TT_BinaryOperator);
2285 
2286   // If the next token is a binary operator or a selector name, we have
2287   // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2288   if (Left.is(TT_CastRParen))
2289     return Style.SpaceAfterCStyleCast ||
2290            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2291 
2292   if (Left.is(tok::greater) && Right.is(tok::greater))
2293     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2294            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
2295   if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2296       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar))
2297     return false;
2298   if (!Style.SpaceBeforeAssignmentOperators &&
2299       Right.getPrecedence() == prec::Assignment)
2300     return false;
2301   if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
2302     return (Left.is(TT_TemplateOpener) &&
2303             Style.Standard == FormatStyle::LS_Cpp03) ||
2304            !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren,
2305                           tok::l_square) ||
2306              Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener));
2307   if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2308     return Style.SpacesInAngles;
2309   if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2310       (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2311        !Right.is(tok::r_paren)))
2312     return true;
2313   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2314       Right.isNot(TT_FunctionTypeLParen))
2315     return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2316   if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2317       Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2318     return false;
2319   if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2320       Line.startsWith(tok::hash))
2321     return true;
2322   if (Right.is(TT_TrailingUnaryOperator))
2323     return false;
2324   if (Left.is(TT_RegexLiteral))
2325     return false;
2326   return spaceRequiredBetween(Line, Left, Right);
2327 }
2328 
2329 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2330 static bool isAllmanBrace(const FormatToken &Tok) {
2331   return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2332          !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2333 }
2334 
2335 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2336                                      const FormatToken &Right) {
2337   const FormatToken &Left = *Right.Previous;
2338   if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2339     return true;
2340 
2341   if (Style.Language == FormatStyle::LK_JavaScript) {
2342     // FIXME: This might apply to other languages and token kinds.
2343     if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
2344         Left.Previous->is(tok::string_literal))
2345       return true;
2346     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2347         Left.Previous && Left.Previous->is(tok::equal) &&
2348         Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2349                             tok::kw_const) &&
2350         // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2351         // above.
2352         !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2353       // Object literals on the top level of a file are treated as "enum-style".
2354       // Each key/value pair is put on a separate line, instead of bin-packing.
2355       return true;
2356     if (Left.is(tok::l_brace) && Line.Level == 0 &&
2357         (Line.startsWith(tok::kw_enum) ||
2358          Line.startsWith(tok::kw_export, tok::kw_enum)))
2359       // JavaScript top-level enum key/value pairs are put on separate lines
2360       // instead of bin-packing.
2361       return true;
2362     if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2363         !Left.Children.empty())
2364       // Support AllowShortFunctionsOnASingleLine for JavaScript.
2365       return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
2366              Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
2367              (Left.NestingLevel == 0 && Line.Level == 0 &&
2368               Style.AllowShortFunctionsOnASingleLine ==
2369                   FormatStyle::SFS_Inline);
2370   } else if (Style.Language == FormatStyle::LK_Java) {
2371     if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2372         Right.Next->is(tok::string_literal))
2373       return true;
2374   }
2375 
2376   // If the last token before a '}' is a comma or a trailing comment, the
2377   // intention is to insert a line break after it in order to make shuffling
2378   // around entries easier.
2379   const FormatToken *BeforeClosingBrace = nullptr;
2380   if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
2381       Left.BlockKind != BK_Block && Left.MatchingParen)
2382     BeforeClosingBrace = Left.MatchingParen->Previous;
2383   else if (Right.MatchingParen &&
2384            Right.MatchingParen->isOneOf(tok::l_brace,
2385                                         TT_ArrayInitializerLSquare))
2386     BeforeClosingBrace = &Left;
2387   if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2388                              BeforeClosingBrace->isTrailingComment()))
2389     return true;
2390 
2391   if (Right.is(tok::comment))
2392     return Left.BlockKind != BK_BracedInit &&
2393            Left.isNot(TT_CtorInitializerColon) &&
2394            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2395   if (Left.isTrailingComment())
2396     return true;
2397   if (Left.isStringLiteral() &&
2398       (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
2399     return true;
2400   if (Right.Previous->IsUnterminatedLiteral)
2401     return true;
2402   if (Right.is(tok::lessless) && Right.Next &&
2403       Right.Previous->is(tok::string_literal) &&
2404       Right.Next->is(tok::string_literal))
2405     return true;
2406   if (Right.Previous->ClosesTemplateDeclaration &&
2407       Right.Previous->MatchingParen &&
2408       Right.Previous->MatchingParen->NestingLevel == 0 &&
2409       Style.AlwaysBreakTemplateDeclarations)
2410     return true;
2411   if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) &&
2412       Style.BreakConstructorInitializersBeforeComma &&
2413       !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2414     return true;
2415   if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2416     // Raw string literals are special wrt. line breaks. The author has made a
2417     // deliberate choice and might have aligned the contents of the string
2418     // literal accordingly. Thus, we try keep existing line breaks.
2419     return Right.NewlinesBefore > 0;
2420   if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
2421       Style.Language == FormatStyle::LK_Proto)
2422     // Don't put enums onto single lines in protocol buffers.
2423     return true;
2424   if (Right.is(TT_InlineASMBrace))
2425     return Right.HasUnescapedNewline;
2426   if (isAllmanBrace(Left) || isAllmanBrace(Right))
2427     return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2428            (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2429            (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2430   if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2431     return true;
2432 
2433   if ((Style.Language == FormatStyle::LK_Java ||
2434        Style.Language == FormatStyle::LK_JavaScript) &&
2435       Left.is(TT_LeadingJavaAnnotation) &&
2436       Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2437       (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2438     return true;
2439 
2440   return false;
2441 }
2442 
2443 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
2444                                     const FormatToken &Right) {
2445   const FormatToken &Left = *Right.Previous;
2446 
2447   // Language-specific stuff.
2448   if (Style.Language == FormatStyle::LK_Java) {
2449     if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2450                      Keywords.kw_implements))
2451       return false;
2452     if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2453                       Keywords.kw_implements))
2454       return true;
2455   } else if (Style.Language == FormatStyle::LK_JavaScript) {
2456     const FormatToken *NonComment = Right.getPreviousNonComment();
2457     if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
2458                      tok::kw_throw) ||
2459         (NonComment &&
2460          NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
2461                              tok::kw_throw)))
2462       return false; // Otherwise a semicolon is inserted.
2463     if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
2464       return false;
2465     if (Left.is(TT_JsTypeColon))
2466       return true;
2467     if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
2468       return false;
2469     if (Left.is(Keywords.kw_in))
2470       return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
2471     if (Right.is(Keywords.kw_in))
2472       return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
2473     if (Right.is(Keywords.kw_as))
2474       return false; // must not break before as in 'x as type' casts
2475     if (Left.is(Keywords.kw_declare) &&
2476         Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
2477                       Keywords.kw_function, tok::kw_class, tok::kw_enum,
2478                       Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
2479                       Keywords.kw_let, tok::kw_const))
2480       // See grammar for 'declare' statements at:
2481       // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
2482       return false;
2483     if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
2484         Right.isOneOf(tok::identifier, tok::string_literal)) {
2485       return false; // must not break in "module foo { ...}"
2486     }
2487   }
2488 
2489   if (Left.is(tok::at))
2490     return false;
2491   if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
2492     return false;
2493   if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
2494     return !Right.is(tok::l_paren);
2495   if (Right.is(TT_PointerOrReference))
2496     return Line.IsMultiVariableDeclStmt ||
2497            (Style.PointerAlignment == FormatStyle::PAS_Right &&
2498             (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
2499   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2500       Right.is(tok::kw_operator))
2501     return true;
2502   if (Left.is(TT_PointerOrReference))
2503     return false;
2504   if (Right.isTrailingComment())
2505     // We rely on MustBreakBefore being set correctly here as we should not
2506     // change the "binding" behavior of a comment.
2507     // The first comment in a braced lists is always interpreted as belonging to
2508     // the first list element. Otherwise, it should be placed outside of the
2509     // list.
2510     return Left.BlockKind == BK_BracedInit;
2511   if (Left.is(tok::question) && Right.is(tok::colon))
2512     return false;
2513   if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
2514     return Style.BreakBeforeTernaryOperators;
2515   if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
2516     return !Style.BreakBeforeTernaryOperators;
2517   if (Right.is(TT_InheritanceColon))
2518     return true;
2519   if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
2520       Left.isNot(TT_SelectorName))
2521     return true;
2522   if (Right.is(tok::colon) &&
2523       !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
2524     return false;
2525   if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))
2526     return true;
2527   if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
2528                                     Right.Next->is(TT_ObjCMethodExpr)))
2529     return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
2530   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
2531     return true;
2532   if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
2533     return true;
2534   if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
2535                     TT_OverloadedOperator))
2536     return false;
2537   if (Left.is(TT_RangeBasedForLoopColon))
2538     return true;
2539   if (Right.is(TT_RangeBasedForLoopColon))
2540     return false;
2541   if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
2542     return true;
2543   if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
2544       Left.is(tok::kw_operator))
2545     return false;
2546   if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
2547       Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
2548     return false;
2549   if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
2550     return false;
2551   if (Left.is(tok::l_paren) && Left.Previous &&
2552       (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
2553     return false;
2554   if (Right.is(TT_ImplicitStringLiteral))
2555     return false;
2556 
2557   if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
2558     return false;
2559   if (Right.is(tok::r_square) && Right.MatchingParen &&
2560       Right.MatchingParen->is(TT_LambdaLSquare))
2561     return false;
2562 
2563   // We only break before r_brace if there was a corresponding break before
2564   // the l_brace, which is tracked by BreakBeforeClosingBrace.
2565   if (Right.is(tok::r_brace))
2566     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
2567 
2568   // Allow breaking after a trailing annotation, e.g. after a method
2569   // declaration.
2570   if (Left.is(TT_TrailingAnnotation))
2571     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
2572                           tok::less, tok::coloncolon);
2573 
2574   if (Right.is(tok::kw___attribute))
2575     return true;
2576 
2577   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
2578     return true;
2579 
2580   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2581     return true;
2582 
2583   if (Left.is(TT_CtorInitializerComma) &&
2584       Style.BreakConstructorInitializersBeforeComma)
2585     return false;
2586   if (Right.is(TT_CtorInitializerComma) &&
2587       Style.BreakConstructorInitializersBeforeComma)
2588     return true;
2589   if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
2590       (Left.is(tok::less) && Right.is(tok::less)))
2591     return false;
2592   if (Right.is(TT_BinaryOperator) &&
2593       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
2594       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
2595        Right.getPrecedence() != prec::Assignment))
2596     return true;
2597   if (Left.is(TT_ArrayInitializerLSquare))
2598     return true;
2599   if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
2600     return true;
2601   if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
2602       !Left.isOneOf(tok::arrowstar, tok::lessless) &&
2603       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
2604       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
2605        Left.getPrecedence() == prec::Assignment))
2606     return true;
2607   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
2608                       tok::kw_class, tok::kw_struct, tok::comment) ||
2609          Right.isMemberAccess() ||
2610          Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
2611                        tok::colon, tok::l_square, tok::at) ||
2612          (Left.is(tok::r_paren) &&
2613           Right.isOneOf(tok::identifier, tok::kw_const)) ||
2614          (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
2615 }
2616 
2617 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
2618   llvm::errs() << "AnnotatedTokens:\n";
2619   const FormatToken *Tok = Line.First;
2620   while (Tok) {
2621     llvm::errs() << " M=" << Tok->MustBreakBefore
2622                  << " C=" << Tok->CanBreakBefore
2623                  << " T=" << getTokenTypeName(Tok->Type)
2624                  << " S=" << Tok->SpacesRequiredBefore
2625                  << " B=" << Tok->BlockParameterCount
2626                  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
2627                  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
2628                  << " FakeLParens=";
2629     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
2630       llvm::errs() << Tok->FakeLParens[i] << "/";
2631     llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
2632     if (!Tok->Next)
2633       assert(Tok == Line.Last);
2634     Tok = Tok->Next;
2635   }
2636   llvm::errs() << "----\n";
2637 }
2638 
2639 } // namespace format
2640 } // namespace clang
2641