1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "format-token-annotator"
22 
23 namespace clang {
24 namespace format {
25 
26 namespace {
27 
28 /// Returns \c true if the token can be used as an identifier in
29 /// an Objective-C \c @selector, \c false otherwise.
30 ///
31 /// Because getFormattingLangOpts() always lexes source code as
32 /// Objective-C++, C++ keywords like \c new and \c delete are
33 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
34 ///
35 /// For Objective-C and Objective-C++, both identifiers and keywords
36 /// are valid inside @selector(...) (or a macro which
37 /// invokes @selector(...)). So, we allow treat any identifier or
38 /// keyword as a potential Objective-C selector component.
39 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
40   return Tok.Tok.getIdentifierInfo() != nullptr;
41 }
42 
43 /// With `Left` being '(', check if we're at either `[...](` or
44 /// `[...]<...>(`, where the [ opens a lambda capture list.
45 static bool isLambdaParameterList(const FormatToken *Left) {
46   // Skip <...> if present.
47   if (Left->Previous && Left->Previous->is(tok::greater) &&
48       Left->Previous->MatchingParen &&
49       Left->Previous->MatchingParen->is(TT_TemplateOpener))
50     Left = Left->Previous->MatchingParen;
51 
52   // Check for `[...]`.
53   return Left->Previous && Left->Previous->is(tok::r_square) &&
54          Left->Previous->MatchingParen &&
55          Left->Previous->MatchingParen->is(TT_LambdaLSquare);
56 }
57 
58 /// A parser that gathers additional information about tokens.
59 ///
60 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
61 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
62 /// into template parameter lists.
63 class AnnotatingParser {
64 public:
65   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
66                    const AdditionalKeywords &Keywords)
67       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
68         Keywords(Keywords) {
69     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
70     resetTokenMetadata(CurrentToken);
71   }
72 
73 private:
74   bool parseAngle() {
75     if (!CurrentToken || !CurrentToken->Previous)
76       return false;
77     if (NonTemplateLess.count(CurrentToken->Previous))
78       return false;
79 
80     const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
81     if (Previous.Previous) {
82       if (Previous.Previous->Tok.isLiteral())
83         return false;
84       if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
85           (!Previous.Previous->MatchingParen ||
86            !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
87         return false;
88     }
89 
90     FormatToken *Left = CurrentToken->Previous;
91     Left->ParentBracket = Contexts.back().ContextKind;
92     ScopedContextCreator ContextCreator(*this, tok::less, 12);
93 
94     // If this angle is in the context of an expression, we need to be more
95     // hesitant to detect it as opening template parameters.
96     bool InExprContext = Contexts.back().IsExpression;
97 
98     Contexts.back().IsExpression = false;
99     // If there's a template keyword before the opening angle bracket, this is a
100     // template parameter, not an argument.
101     Contexts.back().InTemplateArgument =
102         Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
103 
104     if (Style.Language == FormatStyle::LK_Java &&
105         CurrentToken->is(tok::question))
106       next();
107 
108     while (CurrentToken) {
109       if (CurrentToken->is(tok::greater)) {
110         Left->MatchingParen = CurrentToken;
111         CurrentToken->MatchingParen = Left;
112         // In TT_Proto, we must distignuish between:
113         //   map<key, value>
114         //   msg < item: data >
115         //   msg: < item: data >
116         // In TT_TextProto, map<key, value> does not occur.
117         if (Style.Language == FormatStyle::LK_TextProto ||
118             (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
119              Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral)))
120           CurrentToken->Type = TT_DictLiteral;
121         else
122           CurrentToken->Type = TT_TemplateCloser;
123         next();
124         return true;
125       }
126       if (CurrentToken->is(tok::question) &&
127           Style.Language == FormatStyle::LK_Java) {
128         next();
129         continue;
130       }
131       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
132           (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
133            Style.Language != FormatStyle::LK_Proto &&
134            Style.Language != FormatStyle::LK_TextProto))
135         return false;
136       // If a && or || is found and interpreted as a binary operator, this set
137       // of angles is likely part of something like "a < b && c > d". If the
138       // angles are inside an expression, the ||/&& might also be a binary
139       // operator that was misinterpreted because we are parsing template
140       // parameters.
141       // FIXME: This is getting out of hand, write a decent parser.
142       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
143           CurrentToken->Previous->is(TT_BinaryOperator) &&
144           Contexts[Contexts.size() - 2].IsExpression &&
145           !Line.startsWith(tok::kw_template))
146         return false;
147       updateParameterCount(Left, CurrentToken);
148       if (Style.Language == FormatStyle::LK_Proto) {
149         if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
150           if (CurrentToken->is(tok::colon) ||
151               (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
152                Previous->isNot(tok::colon)))
153             Previous->Type = TT_SelectorName;
154         }
155       }
156       if (!consumeToken())
157         return false;
158     }
159     return false;
160   }
161 
162   bool parseParens(bool LookForDecls = false) {
163     if (!CurrentToken)
164       return false;
165     FormatToken *Left = CurrentToken->Previous;
166     Left->ParentBracket = Contexts.back().ContextKind;
167     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
168 
169     // FIXME: This is a bit of a hack. Do better.
170     Contexts.back().ColonIsForRangeExpr =
171         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
172 
173     bool StartsObjCMethodExpr = false;
174     if (FormatToken *MaybeSel = Left->Previous) {
175       // @selector( starts a selector.
176       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
177           MaybeSel->Previous->is(tok::at)) {
178         StartsObjCMethodExpr = true;
179       }
180     }
181 
182     if (Left->is(TT_OverloadedOperatorLParen)) {
183       Contexts.back().IsExpression = false;
184     } else if (Style.Language == FormatStyle::LK_JavaScript &&
185                (Line.startsWith(Keywords.kw_type, tok::identifier) ||
186                 Line.startsWith(tok::kw_export, Keywords.kw_type,
187                                 tok::identifier))) {
188       // type X = (...);
189       // export type X = (...);
190       Contexts.back().IsExpression = false;
191     } else if (Left->Previous &&
192                (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
193                                         tok::kw_while, tok::l_paren,
194                                         tok::comma) ||
195                 Left->Previous->isIf() ||
196                 Left->Previous->is(TT_BinaryOperator))) {
197       // static_assert, if and while usually contain expressions.
198       Contexts.back().IsExpression = true;
199     } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
200                (Left->Previous->is(Keywords.kw_function) ||
201                 (Left->Previous->endsSequence(tok::identifier,
202                                               Keywords.kw_function)))) {
203       // function(...) or function f(...)
204       Contexts.back().IsExpression = false;
205     } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
206                Left->Previous->is(TT_JsTypeColon)) {
207       // let x: (SomeType);
208       Contexts.back().IsExpression = false;
209     } else if (isLambdaParameterList(Left)) {
210       // This is a parameter list of a lambda expression.
211       Contexts.back().IsExpression = false;
212     } else if (Line.InPPDirective &&
213                (!Left->Previous || !Left->Previous->is(tok::identifier))) {
214       Contexts.back().IsExpression = true;
215     } else if (Contexts[Contexts.size() - 2].CaretFound) {
216       // This is the parameter list of an ObjC block.
217       Contexts.back().IsExpression = false;
218     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
219       Left->Type = TT_AttributeParen;
220     } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
221       // The first argument to a foreach macro is a declaration.
222       Contexts.back().IsForEachMacro = true;
223       Contexts.back().IsExpression = false;
224     } else if (Left->Previous && Left->Previous->MatchingParen &&
225                Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
226       Contexts.back().IsExpression = false;
227     } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
228       bool IsForOrCatch =
229           Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
230       Contexts.back().IsExpression = !IsForOrCatch;
231     }
232 
233     if (StartsObjCMethodExpr) {
234       Contexts.back().ColonIsObjCMethodExpr = true;
235       Left->Type = TT_ObjCMethodExpr;
236     }
237 
238     // MightBeFunctionType and ProbablyFunctionType are used for
239     // function pointer and reference types as well as Objective-C
240     // block types:
241     //
242     // void (*FunctionPointer)(void);
243     // void (&FunctionReference)(void);
244     // void (^ObjCBlock)(void);
245     bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
246     bool ProbablyFunctionType =
247         CurrentToken->isOneOf(tok::star, tok::amp, tok::caret);
248     bool HasMultipleLines = false;
249     bool HasMultipleParametersOnALine = false;
250     bool MightBeObjCForRangeLoop =
251         Left->Previous && Left->Previous->is(tok::kw_for);
252     FormatToken *PossibleObjCForInToken = nullptr;
253     while (CurrentToken) {
254       // LookForDecls is set when "if (" has been seen. Check for
255       // 'identifier' '*' 'identifier' followed by not '=' -- this
256       // '*' has to be a binary operator but determineStarAmpUsage() will
257       // categorize it as an unary operator, so set the right type here.
258       if (LookForDecls && CurrentToken->Next) {
259         FormatToken *Prev = CurrentToken->getPreviousNonComment();
260         if (Prev) {
261           FormatToken *PrevPrev = Prev->getPreviousNonComment();
262           FormatToken *Next = CurrentToken->Next;
263           if (PrevPrev && PrevPrev->is(tok::identifier) &&
264               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
265               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
266             Prev->Type = TT_BinaryOperator;
267             LookForDecls = false;
268           }
269         }
270       }
271 
272       if (CurrentToken->Previous->is(TT_PointerOrReference) &&
273           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
274                                                     tok::coloncolon))
275         ProbablyFunctionType = true;
276       if (CurrentToken->is(tok::comma))
277         MightBeFunctionType = false;
278       if (CurrentToken->Previous->is(TT_BinaryOperator))
279         Contexts.back().IsExpression = true;
280       if (CurrentToken->is(tok::r_paren)) {
281         if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
282             (CurrentToken->Next->is(tok::l_paren) ||
283              (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
284           Left->Type = Left->Next->is(tok::caret) ? TT_ObjCBlockLParen
285                                                   : TT_FunctionTypeLParen;
286         Left->MatchingParen = CurrentToken;
287         CurrentToken->MatchingParen = Left;
288 
289         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
290             Left->Previous && Left->Previous->is(tok::l_paren)) {
291           // Detect the case where macros are used to generate lambdas or
292           // function bodies, e.g.:
293           //   auto my_lambda = MARCO((Type *type, int i) { .. body .. });
294           for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
295             if (Tok->is(TT_BinaryOperator) &&
296                 Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
297               Tok->Type = TT_PointerOrReference;
298           }
299         }
300 
301         if (StartsObjCMethodExpr) {
302           CurrentToken->Type = TT_ObjCMethodExpr;
303           if (Contexts.back().FirstObjCSelectorName) {
304             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
305                 Contexts.back().LongestObjCSelectorName;
306           }
307         }
308 
309         if (Left->is(TT_AttributeParen))
310           CurrentToken->Type = TT_AttributeParen;
311         if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
312           CurrentToken->Type = TT_JavaAnnotation;
313         if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
314           CurrentToken->Type = TT_LeadingJavaAnnotation;
315         if (Left->Previous && Left->Previous->is(TT_AttributeSquare))
316           CurrentToken->Type = TT_AttributeSquare;
317 
318         if (!HasMultipleLines)
319           Left->PackingKind = PPK_Inconclusive;
320         else if (HasMultipleParametersOnALine)
321           Left->PackingKind = PPK_BinPacked;
322         else
323           Left->PackingKind = PPK_OnePerLine;
324 
325         next();
326         return true;
327       }
328       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
329         return false;
330 
331       if (CurrentToken->is(tok::l_brace))
332         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
333       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
334           !CurrentToken->Next->HasUnescapedNewline &&
335           !CurrentToken->Next->isTrailingComment())
336         HasMultipleParametersOnALine = true;
337       if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
338            CurrentToken->Previous->isSimpleTypeSpecifier()) &&
339           !CurrentToken->is(tok::l_brace))
340         Contexts.back().IsExpression = false;
341       if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
342         MightBeObjCForRangeLoop = false;
343         if (PossibleObjCForInToken) {
344           PossibleObjCForInToken->Type = TT_Unknown;
345           PossibleObjCForInToken = nullptr;
346         }
347       }
348       if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
349         PossibleObjCForInToken = CurrentToken;
350         PossibleObjCForInToken->Type = TT_ObjCForIn;
351       }
352       // When we discover a 'new', we set CanBeExpression to 'false' in order to
353       // parse the type correctly. Reset that after a comma.
354       if (CurrentToken->is(tok::comma))
355         Contexts.back().CanBeExpression = true;
356 
357       FormatToken *Tok = CurrentToken;
358       if (!consumeToken())
359         return false;
360       updateParameterCount(Left, Tok);
361       if (CurrentToken && CurrentToken->HasUnescapedNewline)
362         HasMultipleLines = true;
363     }
364     return false;
365   }
366 
367   bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
368     if (!Style.isCSharp())
369       return false;
370 
371     const FormatToken *AttrTok = Tok.Next;
372     if (!AttrTok)
373       return false;
374 
375     // Just an empty declaration e.g. string [].
376     if (AttrTok->is(tok::r_square))
377       return false;
378 
379     // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
380     while (AttrTok && AttrTok->isNot(tok::r_square)) {
381       AttrTok = AttrTok->Next;
382     }
383 
384     if (!AttrTok)
385       return false;
386 
387     // Move past the end of ']'.
388     AttrTok = AttrTok->Next;
389     if (!AttrTok)
390       return false;
391 
392     // Limit this to being an access modifier that follows.
393     if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
394                          tok::kw_class, tok::kw_static, tok::l_square,
395                          Keywords.kw_internal)) {
396       return true;
397     }
398     return false;
399   }
400 
401   bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
402     if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
403       return false;
404     // The first square bracket is part of an ObjC array literal
405     if (Tok.Previous && Tok.Previous->is(tok::at)) {
406       return false;
407     }
408     const FormatToken *AttrTok = Tok.Next->Next;
409     if (!AttrTok)
410       return false;
411     // C++17 '[[using ns: foo, bar(baz, blech)]]'
412     // We assume nobody will name an ObjC variable 'using'.
413     if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
414       return true;
415     if (AttrTok->isNot(tok::identifier))
416       return false;
417     while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
418       // ObjC message send. We assume nobody will use : in a C++11 attribute
419       // specifier parameter, although this is technically valid:
420       // [[foo(:)]].
421       if (AttrTok->is(tok::colon) ||
422           AttrTok->startsSequence(tok::identifier, tok::identifier) ||
423           AttrTok->startsSequence(tok::r_paren, tok::identifier))
424         return false;
425       if (AttrTok->is(tok::ellipsis))
426         return true;
427       AttrTok = AttrTok->Next;
428     }
429     return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
430   }
431 
432   bool parseSquare() {
433     if (!CurrentToken)
434       return false;
435 
436     // A '[' could be an index subscript (after an identifier or after
437     // ')' or ']'), it could be the start of an Objective-C method
438     // expression, it could the start of an Objective-C array literal,
439     // or it could be a C++ attribute specifier [[foo::bar]].
440     FormatToken *Left = CurrentToken->Previous;
441     Left->ParentBracket = Contexts.back().ContextKind;
442     FormatToken *Parent = Left->getPreviousNonComment();
443 
444     // Cases where '>' is followed by '['.
445     // In C++, this can happen either in array of templates (foo<int>[10])
446     // or when array is a nested template type (unique_ptr<type1<type2>[]>).
447     bool CppArrayTemplates =
448         Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
449         (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
450          Contexts.back().InTemplateArgument);
451 
452     bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) ||
453                                      Contexts.back().InCpp11AttributeSpecifier;
454 
455     // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
456     bool IsCSharp11AttributeSpecifier =
457         isCSharpAttributeSpecifier(*Left) ||
458         Contexts.back().InCSharpAttributeSpecifier;
459 
460     bool InsideInlineASM = Line.startsWith(tok::kw_asm);
461     bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
462     bool StartsObjCMethodExpr =
463         !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
464         Style.isCpp() && !IsCpp11AttributeSpecifier &&
465         Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
466         !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
467         (!Parent ||
468          Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
469                          tok::kw_return, tok::kw_throw) ||
470          Parent->isUnaryOperator() ||
471          // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
472          Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
473          (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
474           prec::Unknown));
475     bool ColonFound = false;
476 
477     unsigned BindingIncrease = 1;
478     if (IsCppStructuredBinding) {
479       Left->Type = TT_StructuredBindingLSquare;
480     } else if (Left->is(TT_Unknown)) {
481       if (StartsObjCMethodExpr) {
482         Left->Type = TT_ObjCMethodExpr;
483       } else if (IsCpp11AttributeSpecifier) {
484         Left->Type = TT_AttributeSquare;
485       } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
486                  Contexts.back().ContextKind == tok::l_brace &&
487                  Parent->isOneOf(tok::l_brace, tok::comma)) {
488         Left->Type = TT_JsComputedPropertyName;
489       } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
490                  Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
491         Left->Type = TT_DesignatedInitializerLSquare;
492       } else if (CurrentToken->is(tok::r_square) && Parent &&
493                  Parent->is(TT_TemplateCloser)) {
494         Left->Type = TT_ArraySubscriptLSquare;
495       } else if (Style.Language == FormatStyle::LK_Proto ||
496                  Style.Language == FormatStyle::LK_TextProto) {
497         // Square braces in LK_Proto can either be message field attributes:
498         //
499         // optional Aaa aaa = 1 [
500         //   (aaa) = aaa
501         // ];
502         //
503         // extensions 123 [
504         //   (aaa) = aaa
505         // ];
506         //
507         // or text proto extensions (in options):
508         //
509         // option (Aaa.options) = {
510         //   [type.type/type] {
511         //     key: value
512         //   }
513         // }
514         //
515         // or repeated fields (in options):
516         //
517         // option (Aaa.options) = {
518         //   keys: [ 1, 2, 3 ]
519         // }
520         //
521         // In the first and the third case we want to spread the contents inside
522         // the square braces; in the second we want to keep them inline.
523         Left->Type = TT_ArrayInitializerLSquare;
524         if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
525                                 tok::equal) &&
526             !Left->endsSequence(tok::l_square, tok::numeric_constant,
527                                 tok::identifier) &&
528             !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
529           Left->Type = TT_ProtoExtensionLSquare;
530           BindingIncrease = 10;
531         }
532       } else if (!CppArrayTemplates && Parent &&
533                  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
534                                  tok::comma, tok::l_paren, tok::l_square,
535                                  tok::question, tok::colon, tok::kw_return,
536                                  // Should only be relevant to JavaScript:
537                                  tok::kw_default)) {
538         Left->Type = TT_ArrayInitializerLSquare;
539       } else if (IsCSharp11AttributeSpecifier) {
540         Left->Type = TT_AttributeSquare;
541       } else {
542         BindingIncrease = 10;
543         Left->Type = TT_ArraySubscriptLSquare;
544       }
545     }
546 
547     ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
548     Contexts.back().IsExpression = true;
549     if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
550         Parent->is(TT_JsTypeColon))
551       Contexts.back().IsExpression = false;
552 
553     Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
554     Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
555     Contexts.back().InCSharpAttributeSpecifier = IsCSharp11AttributeSpecifier;
556 
557     while (CurrentToken) {
558       if (CurrentToken->is(tok::r_square)) {
559         if (IsCpp11AttributeSpecifier)
560           CurrentToken->Type = TT_AttributeSquare;
561         if (IsCSharp11AttributeSpecifier)
562           CurrentToken->Type = TT_AttributeSquare;
563         else if (((CurrentToken->Next &&
564                    CurrentToken->Next->is(tok::l_paren)) ||
565                   (CurrentToken->Previous &&
566                    CurrentToken->Previous->Previous == Left)) &&
567                  Left->is(TT_ObjCMethodExpr)) {
568           // An ObjC method call is rarely followed by an open parenthesis. It
569           // also can't be composed of just one token, unless it's a macro that
570           // will be expanded to more tokens.
571           // FIXME: Do we incorrectly label ":" with this?
572           StartsObjCMethodExpr = false;
573           Left->Type = TT_Unknown;
574         }
575         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
576           CurrentToken->Type = TT_ObjCMethodExpr;
577           // If we haven't seen a colon yet, make sure the last identifier
578           // before the r_square is tagged as a selector name component.
579           if (!ColonFound && CurrentToken->Previous &&
580               CurrentToken->Previous->is(TT_Unknown) &&
581               canBeObjCSelectorComponent(*CurrentToken->Previous))
582             CurrentToken->Previous->Type = TT_SelectorName;
583           // determineStarAmpUsage() thinks that '*' '[' is allocating an
584           // array of pointers, but if '[' starts a selector then '*' is a
585           // binary operator.
586           if (Parent && Parent->is(TT_PointerOrReference))
587             Parent->Type = TT_BinaryOperator;
588         }
589         // An arrow after an ObjC method expression is not a lambda arrow.
590         if (CurrentToken->Type == TT_ObjCMethodExpr && CurrentToken->Next &&
591             CurrentToken->Next->is(TT_LambdaArrow))
592           CurrentToken->Next->Type = TT_Unknown;
593         Left->MatchingParen = CurrentToken;
594         CurrentToken->MatchingParen = Left;
595         // FirstObjCSelectorName is set when a colon is found. This does
596         // not work, however, when the method has no parameters.
597         // Here, we set FirstObjCSelectorName when the end of the method call is
598         // reached, in case it was not set already.
599         if (!Contexts.back().FirstObjCSelectorName) {
600           FormatToken *Previous = CurrentToken->getPreviousNonComment();
601           if (Previous && Previous->is(TT_SelectorName)) {
602             Previous->ObjCSelectorNameParts = 1;
603             Contexts.back().FirstObjCSelectorName = Previous;
604           }
605         } else {
606           Left->ParameterCount =
607               Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
608         }
609         if (Contexts.back().FirstObjCSelectorName) {
610           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
611               Contexts.back().LongestObjCSelectorName;
612           if (Left->BlockParameterCount > 1)
613             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
614         }
615         next();
616         return true;
617       }
618       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
619         return false;
620       if (CurrentToken->is(tok::colon)) {
621         if (IsCpp11AttributeSpecifier &&
622             CurrentToken->endsSequence(tok::colon, tok::identifier,
623                                        tok::kw_using)) {
624           // Remember that this is a [[using ns: foo]] C++ attribute, so we
625           // don't add a space before the colon (unlike other colons).
626           CurrentToken->Type = TT_AttributeColon;
627         } else if (Left->isOneOf(TT_ArraySubscriptLSquare,
628                                  TT_DesignatedInitializerLSquare)) {
629           Left->Type = TT_ObjCMethodExpr;
630           StartsObjCMethodExpr = true;
631           Contexts.back().ColonIsObjCMethodExpr = true;
632           if (Parent && Parent->is(tok::r_paren))
633             // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
634             Parent->Type = TT_CastRParen;
635         }
636         ColonFound = true;
637       }
638       if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
639           !ColonFound)
640         Left->Type = TT_ArrayInitializerLSquare;
641       FormatToken *Tok = CurrentToken;
642       if (!consumeToken())
643         return false;
644       updateParameterCount(Left, Tok);
645     }
646     return false;
647   }
648 
649   bool parseBrace() {
650     if (CurrentToken) {
651       FormatToken *Left = CurrentToken->Previous;
652       Left->ParentBracket = Contexts.back().ContextKind;
653 
654       if (Contexts.back().CaretFound)
655         Left->Type = TT_ObjCBlockLBrace;
656       Contexts.back().CaretFound = false;
657 
658       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
659       Contexts.back().ColonIsDictLiteral = true;
660       if (Left->BlockKind == BK_BracedInit)
661         Contexts.back().IsExpression = true;
662       if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
663           Left->Previous->is(TT_JsTypeColon))
664         Contexts.back().IsExpression = false;
665 
666       while (CurrentToken) {
667         if (CurrentToken->is(tok::r_brace)) {
668           Left->MatchingParen = CurrentToken;
669           CurrentToken->MatchingParen = Left;
670           next();
671           return true;
672         }
673         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
674           return false;
675         updateParameterCount(Left, CurrentToken);
676         if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
677           FormatToken *Previous = CurrentToken->getPreviousNonComment();
678           if (Previous->is(TT_JsTypeOptionalQuestion))
679             Previous = Previous->getPreviousNonComment();
680           if ((CurrentToken->is(tok::colon) &&
681                (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
682               Style.Language == FormatStyle::LK_Proto ||
683               Style.Language == FormatStyle::LK_TextProto) {
684             Left->Type = TT_DictLiteral;
685             if (Previous->Tok.getIdentifierInfo() ||
686                 Previous->is(tok::string_literal))
687               Previous->Type = TT_SelectorName;
688           }
689           if (CurrentToken->is(tok::colon) ||
690               Style.Language == FormatStyle::LK_JavaScript)
691             Left->Type = TT_DictLiteral;
692         }
693         if (CurrentToken->is(tok::comma) &&
694             Style.Language == FormatStyle::LK_JavaScript)
695           Left->Type = TT_DictLiteral;
696         if (!consumeToken())
697           return false;
698       }
699     }
700     return true;
701   }
702 
703   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
704     // For ObjC methods, the number of parameters is calculated differently as
705     // method declarations have a different structure (the parameters are not
706     // inside a bracket scope).
707     if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
708       ++Left->BlockParameterCount;
709     if (Current->is(tok::comma)) {
710       ++Left->ParameterCount;
711       if (!Left->Role)
712         Left->Role.reset(new CommaSeparatedList(Style));
713       Left->Role->CommaFound(Current);
714     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
715       Left->ParameterCount = 1;
716     }
717   }
718 
719   bool parseConditional() {
720     while (CurrentToken) {
721       if (CurrentToken->is(tok::colon)) {
722         CurrentToken->Type = TT_ConditionalExpr;
723         next();
724         return true;
725       }
726       if (!consumeToken())
727         return false;
728     }
729     return false;
730   }
731 
732   bool parseTemplateDeclaration() {
733     if (CurrentToken && CurrentToken->is(tok::less)) {
734       CurrentToken->Type = TT_TemplateOpener;
735       next();
736       if (!parseAngle())
737         return false;
738       if (CurrentToken)
739         CurrentToken->Previous->ClosesTemplateDeclaration = true;
740       return true;
741     }
742     return false;
743   }
744 
745   bool consumeToken() {
746     FormatToken *Tok = CurrentToken;
747     next();
748     switch (Tok->Tok.getKind()) {
749     case tok::plus:
750     case tok::minus:
751       if (!Tok->Previous && Line.MustBeDeclaration)
752         Tok->Type = TT_ObjCMethodSpecifier;
753       break;
754     case tok::colon:
755       if (!Tok->Previous)
756         return false;
757       // Colons from ?: are handled in parseConditional().
758       if (Style.Language == FormatStyle::LK_JavaScript) {
759         if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
760             (Contexts.size() == 1 &&               // switch/case labels
761              !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
762             Contexts.back().ContextKind == tok::l_paren ||  // function params
763             Contexts.back().ContextKind == tok::l_square || // array type
764             (!Contexts.back().IsExpression &&
765              Contexts.back().ContextKind == tok::l_brace) || // object type
766             (Contexts.size() == 1 &&
767              Line.MustBeDeclaration)) { // method/property declaration
768           Contexts.back().IsExpression = false;
769           Tok->Type = TT_JsTypeColon;
770           break;
771         }
772       }
773       if (Contexts.back().ColonIsDictLiteral ||
774           Style.Language == FormatStyle::LK_Proto ||
775           Style.Language == FormatStyle::LK_TextProto) {
776         Tok->Type = TT_DictLiteral;
777         if (Style.Language == FormatStyle::LK_TextProto) {
778           if (FormatToken *Previous = Tok->getPreviousNonComment())
779             Previous->Type = TT_SelectorName;
780         }
781       } else if (Contexts.back().ColonIsObjCMethodExpr ||
782                  Line.startsWith(TT_ObjCMethodSpecifier)) {
783         Tok->Type = TT_ObjCMethodExpr;
784         const FormatToken *BeforePrevious = Tok->Previous->Previous;
785         // Ensure we tag all identifiers in method declarations as
786         // TT_SelectorName.
787         bool UnknownIdentifierInMethodDeclaration =
788             Line.startsWith(TT_ObjCMethodSpecifier) &&
789             Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
790         if (!BeforePrevious ||
791             // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
792             !(BeforePrevious->is(TT_CastRParen) ||
793               (BeforePrevious->is(TT_ObjCMethodExpr) &&
794                BeforePrevious->is(tok::colon))) ||
795             BeforePrevious->is(tok::r_square) ||
796             Contexts.back().LongestObjCSelectorName == 0 ||
797             UnknownIdentifierInMethodDeclaration) {
798           Tok->Previous->Type = TT_SelectorName;
799           if (!Contexts.back().FirstObjCSelectorName)
800             Contexts.back().FirstObjCSelectorName = Tok->Previous;
801           else if (Tok->Previous->ColumnWidth >
802                    Contexts.back().LongestObjCSelectorName)
803             Contexts.back().LongestObjCSelectorName =
804                 Tok->Previous->ColumnWidth;
805           Tok->Previous->ParameterIndex =
806               Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
807           ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
808         }
809       } else if (Contexts.back().ColonIsForRangeExpr) {
810         Tok->Type = TT_RangeBasedForLoopColon;
811       } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
812         Tok->Type = TT_BitFieldColon;
813       } else if (Contexts.size() == 1 &&
814                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
815         if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
816                                                   tok::kw_noexcept))
817           Tok->Type = TT_CtorInitializerColon;
818         else
819           Tok->Type = TT_InheritanceColon;
820       } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
821                  (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
822                   (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
823                    Tok->Next->Next->is(tok::colon)))) {
824         // This handles a special macro in ObjC code where selectors including
825         // the colon are passed as macro arguments.
826         Tok->Type = TT_ObjCMethodExpr;
827       } else if (Contexts.back().ContextKind == tok::l_paren) {
828         Tok->Type = TT_InlineASMColon;
829       }
830       break;
831     case tok::pipe:
832     case tok::amp:
833       // | and & in declarations/type expressions represent union and
834       // intersection types, respectively.
835       if (Style.Language == FormatStyle::LK_JavaScript &&
836           !Contexts.back().IsExpression)
837         Tok->Type = TT_JsTypeOperator;
838       break;
839     case tok::kw_if:
840     case tok::kw_while:
841       assert(!Line.startsWith(tok::hash));
842       if (Tok->is(tok::kw_if) && CurrentToken &&
843           CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier))
844         next();
845       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
846         next();
847         if (!parseParens(/*LookForDecls=*/true))
848           return false;
849       }
850       break;
851     case tok::kw_for:
852       if (Style.Language == FormatStyle::LK_JavaScript) {
853         // x.for and {for: ...}
854         if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
855             (Tok->Next && Tok->Next->is(tok::colon)))
856           break;
857         // JS' for await ( ...
858         if (CurrentToken && CurrentToken->is(Keywords.kw_await))
859           next();
860       }
861       Contexts.back().ColonIsForRangeExpr = true;
862       next();
863       if (!parseParens())
864         return false;
865       break;
866     case tok::l_paren:
867       // When faced with 'operator()()', the kw_operator handler incorrectly
868       // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
869       // the first two parens OverloadedOperators and the second l_paren an
870       // OverloadedOperatorLParen.
871       if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
872           Tok->Previous->MatchingParen &&
873           Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
874         Tok->Previous->Type = TT_OverloadedOperator;
875         Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
876         Tok->Type = TT_OverloadedOperatorLParen;
877       }
878 
879       if (!parseParens())
880         return false;
881       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
882           !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
883           (!Tok->Previous ||
884            !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
885                                    TT_LeadingJavaAnnotation)))
886         Line.MightBeFunctionDecl = true;
887       break;
888     case tok::l_square:
889       if (!parseSquare())
890         return false;
891       break;
892     case tok::l_brace:
893       if (Style.Language == FormatStyle::LK_TextProto) {
894         FormatToken *Previous = Tok->getPreviousNonComment();
895         if (Previous && Previous->Type != TT_DictLiteral)
896           Previous->Type = TT_SelectorName;
897       }
898       if (!parseBrace())
899         return false;
900       break;
901     case tok::less:
902       if (parseAngle()) {
903         Tok->Type = TT_TemplateOpener;
904         // In TT_Proto, we must distignuish between:
905         //   map<key, value>
906         //   msg < item: data >
907         //   msg: < item: data >
908         // In TT_TextProto, map<key, value> does not occur.
909         if (Style.Language == FormatStyle::LK_TextProto ||
910             (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
911              Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
912           Tok->Type = TT_DictLiteral;
913           FormatToken *Previous = Tok->getPreviousNonComment();
914           if (Previous && Previous->Type != TT_DictLiteral)
915             Previous->Type = TT_SelectorName;
916         }
917       } else {
918         Tok->Type = TT_BinaryOperator;
919         NonTemplateLess.insert(Tok);
920         CurrentToken = Tok;
921         next();
922       }
923       break;
924     case tok::r_paren:
925     case tok::r_square:
926       return false;
927     case tok::r_brace:
928       // Lines can start with '}'.
929       if (Tok->Previous)
930         return false;
931       break;
932     case tok::greater:
933       if (Style.Language != FormatStyle::LK_TextProto)
934         Tok->Type = TT_BinaryOperator;
935       if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
936         Tok->SpacesRequiredBefore = 1;
937       break;
938     case tok::kw_operator:
939       if (Style.Language == FormatStyle::LK_TextProto ||
940           Style.Language == FormatStyle::LK_Proto)
941         break;
942       while (CurrentToken &&
943              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
944         if (CurrentToken->isOneOf(tok::star, tok::amp))
945           CurrentToken->Type = TT_PointerOrReference;
946         consumeToken();
947         if (CurrentToken &&
948             CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,
949                                             tok::comma))
950           CurrentToken->Previous->Type = TT_OverloadedOperator;
951       }
952       if (CurrentToken) {
953         CurrentToken->Type = TT_OverloadedOperatorLParen;
954         if (CurrentToken->Previous->is(TT_BinaryOperator))
955           CurrentToken->Previous->Type = TT_OverloadedOperator;
956       }
957       break;
958     case tok::question:
959       if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
960           Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
961                              tok::r_brace)) {
962         // Question marks before semicolons, colons, etc. indicate optional
963         // types (fields, parameters), e.g.
964         //   function(x?: string, y?) {...}
965         //   class X { y?; }
966         Tok->Type = TT_JsTypeOptionalQuestion;
967         break;
968       }
969       // Declarations cannot be conditional expressions, this can only be part
970       // of a type declaration.
971       if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
972           Style.Language == FormatStyle::LK_JavaScript)
973         break;
974       parseConditional();
975       break;
976     case tok::kw_template:
977       parseTemplateDeclaration();
978       break;
979     case tok::comma:
980       if (Contexts.back().InCtorInitializer)
981         Tok->Type = TT_CtorInitializerComma;
982       else if (Contexts.back().InInheritanceList)
983         Tok->Type = TT_InheritanceComma;
984       else if (Contexts.back().FirstStartOfName &&
985                (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
986         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
987         Line.IsMultiVariableDeclStmt = true;
988       }
989       if (Contexts.back().IsForEachMacro)
990         Contexts.back().IsExpression = true;
991       break;
992     case tok::identifier:
993       if (Tok->isOneOf(Keywords.kw___has_include,
994                        Keywords.kw___has_include_next)) {
995         parseHasInclude();
996       }
997       break;
998     default:
999       break;
1000     }
1001     return true;
1002   }
1003 
1004   void parseIncludeDirective() {
1005     if (CurrentToken && CurrentToken->is(tok::less)) {
1006       next();
1007       while (CurrentToken) {
1008         // Mark tokens up to the trailing line comments as implicit string
1009         // literals.
1010         if (CurrentToken->isNot(tok::comment) &&
1011             !CurrentToken->TokenText.startswith("//"))
1012           CurrentToken->Type = TT_ImplicitStringLiteral;
1013         next();
1014       }
1015     }
1016   }
1017 
1018   void parseWarningOrError() {
1019     next();
1020     // We still want to format the whitespace left of the first token of the
1021     // warning or error.
1022     next();
1023     while (CurrentToken) {
1024       CurrentToken->Type = TT_ImplicitStringLiteral;
1025       next();
1026     }
1027   }
1028 
1029   void parsePragma() {
1030     next(); // Consume "pragma".
1031     if (CurrentToken &&
1032         CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
1033       bool IsMark = CurrentToken->is(Keywords.kw_mark);
1034       next(); // Consume "mark".
1035       next(); // Consume first token (so we fix leading whitespace).
1036       while (CurrentToken) {
1037         if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
1038           CurrentToken->Type = TT_ImplicitStringLiteral;
1039         next();
1040       }
1041     }
1042   }
1043 
1044   void parseHasInclude() {
1045     if (!CurrentToken || !CurrentToken->is(tok::l_paren))
1046       return;
1047     next(); // '('
1048     parseIncludeDirective();
1049     next(); // ')'
1050   }
1051 
1052   LineType parsePreprocessorDirective() {
1053     bool IsFirstToken = CurrentToken->IsFirst;
1054     LineType Type = LT_PreprocessorDirective;
1055     next();
1056     if (!CurrentToken)
1057       return Type;
1058 
1059     if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
1060       // JavaScript files can contain shebang lines of the form:
1061       // #!/usr/bin/env node
1062       // Treat these like C++ #include directives.
1063       while (CurrentToken) {
1064         // Tokens cannot be comments here.
1065         CurrentToken->Type = TT_ImplicitStringLiteral;
1066         next();
1067       }
1068       return LT_ImportStatement;
1069     }
1070 
1071     if (CurrentToken->Tok.is(tok::numeric_constant)) {
1072       CurrentToken->SpacesRequiredBefore = 1;
1073       return Type;
1074     }
1075     // Hashes in the middle of a line can lead to any strange token
1076     // sequence.
1077     if (!CurrentToken->Tok.getIdentifierInfo())
1078       return Type;
1079     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1080     case tok::pp_include:
1081     case tok::pp_include_next:
1082     case tok::pp_import:
1083       next();
1084       parseIncludeDirective();
1085       Type = LT_ImportStatement;
1086       break;
1087     case tok::pp_error:
1088     case tok::pp_warning:
1089       parseWarningOrError();
1090       break;
1091     case tok::pp_pragma:
1092       parsePragma();
1093       break;
1094     case tok::pp_if:
1095     case tok::pp_elif:
1096       Contexts.back().IsExpression = true;
1097       next();
1098       parseLine();
1099       break;
1100     default:
1101       break;
1102     }
1103     while (CurrentToken) {
1104       FormatToken *Tok = CurrentToken;
1105       next();
1106       if (Tok->is(tok::l_paren))
1107         parseParens();
1108       else if (Tok->isOneOf(Keywords.kw___has_include,
1109                             Keywords.kw___has_include_next))
1110         parseHasInclude();
1111     }
1112     return Type;
1113   }
1114 
1115 public:
1116   LineType parseLine() {
1117     if (!CurrentToken)
1118       return LT_Invalid;
1119     NonTemplateLess.clear();
1120     if (CurrentToken->is(tok::hash))
1121       return parsePreprocessorDirective();
1122 
1123     // Directly allow to 'import <string-literal>' to support protocol buffer
1124     // definitions (github.com/google/protobuf) or missing "#" (either way we
1125     // should not break the line).
1126     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1127     if ((Style.Language == FormatStyle::LK_Java &&
1128          CurrentToken->is(Keywords.kw_package)) ||
1129         (Info && Info->getPPKeywordID() == tok::pp_import &&
1130          CurrentToken->Next &&
1131          CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1132                                      tok::kw_static))) {
1133       next();
1134       parseIncludeDirective();
1135       return LT_ImportStatement;
1136     }
1137 
1138     // If this line starts and ends in '<' and '>', respectively, it is likely
1139     // part of "#define <a/b.h>".
1140     if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1141       parseIncludeDirective();
1142       return LT_ImportStatement;
1143     }
1144 
1145     // In .proto files, top-level options and package statements are very
1146     // similar to import statements and should not be line-wrapped.
1147     if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1148         CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1149       next();
1150       if (CurrentToken && CurrentToken->is(tok::identifier)) {
1151         while (CurrentToken)
1152           next();
1153         return LT_ImportStatement;
1154       }
1155     }
1156 
1157     bool KeywordVirtualFound = false;
1158     bool ImportStatement = false;
1159 
1160     // import {...} from '...';
1161     if (Style.Language == FormatStyle::LK_JavaScript &&
1162         CurrentToken->is(Keywords.kw_import))
1163       ImportStatement = true;
1164 
1165     while (CurrentToken) {
1166       if (CurrentToken->is(tok::kw_virtual))
1167         KeywordVirtualFound = true;
1168       if (Style.Language == FormatStyle::LK_JavaScript) {
1169         // export {...} from '...';
1170         // An export followed by "from 'some string';" is a re-export from
1171         // another module identified by a URI and is treated as a
1172         // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1173         // Just "export {...};" or "export class ..." should not be treated as
1174         // an import in this sense.
1175         if (Line.First->is(tok::kw_export) &&
1176             CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1177             CurrentToken->Next->isStringLiteral())
1178           ImportStatement = true;
1179         if (isClosureImportStatement(*CurrentToken))
1180           ImportStatement = true;
1181       }
1182       if (!consumeToken())
1183         return LT_Invalid;
1184     }
1185     if (KeywordVirtualFound)
1186       return LT_VirtualFunctionDecl;
1187     if (ImportStatement)
1188       return LT_ImportStatement;
1189 
1190     if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1191       if (Contexts.back().FirstObjCSelectorName)
1192         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1193             Contexts.back().LongestObjCSelectorName;
1194       return LT_ObjCMethodDecl;
1195     }
1196 
1197     return LT_Other;
1198   }
1199 
1200 private:
1201   bool isClosureImportStatement(const FormatToken &Tok) {
1202     // FIXME: Closure-library specific stuff should not be hard-coded but be
1203     // configurable.
1204     return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1205            Tok.Next->Next &&
1206            (Tok.Next->Next->TokenText == "module" ||
1207             Tok.Next->Next->TokenText == "provide" ||
1208             Tok.Next->Next->TokenText == "require" ||
1209             Tok.Next->Next->TokenText == "requireType" ||
1210             Tok.Next->Next->TokenText == "forwardDeclare") &&
1211            Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1212   }
1213 
1214   void resetTokenMetadata(FormatToken *Token) {
1215     if (!Token)
1216       return;
1217 
1218     // Reset token type in case we have already looked at it and then
1219     // recovered from an error (e.g. failure to find the matching >).
1220     if (!CurrentToken->isOneOf(
1221             TT_LambdaLSquare, TT_LambdaLBrace, TT_ForEachMacro,
1222             TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral,
1223             TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_NamespaceMacro,
1224             TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString,
1225             TT_ObjCStringLiteral))
1226       CurrentToken->Type = TT_Unknown;
1227     CurrentToken->Role.reset();
1228     CurrentToken->MatchingParen = nullptr;
1229     CurrentToken->FakeLParens.clear();
1230     CurrentToken->FakeRParens = 0;
1231   }
1232 
1233   void next() {
1234     if (CurrentToken) {
1235       CurrentToken->NestingLevel = Contexts.size() - 1;
1236       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1237       modifyContext(*CurrentToken);
1238       determineTokenType(*CurrentToken);
1239       CurrentToken = CurrentToken->Next;
1240     }
1241 
1242     resetTokenMetadata(CurrentToken);
1243   }
1244 
1245   /// A struct to hold information valid in a specific context, e.g.
1246   /// a pair of parenthesis.
1247   struct Context {
1248     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1249             bool IsExpression)
1250         : ContextKind(ContextKind), BindingStrength(BindingStrength),
1251           IsExpression(IsExpression) {}
1252 
1253     tok::TokenKind ContextKind;
1254     unsigned BindingStrength;
1255     bool IsExpression;
1256     unsigned LongestObjCSelectorName = 0;
1257     bool ColonIsForRangeExpr = false;
1258     bool ColonIsDictLiteral = false;
1259     bool ColonIsObjCMethodExpr = false;
1260     FormatToken *FirstObjCSelectorName = nullptr;
1261     FormatToken *FirstStartOfName = nullptr;
1262     bool CanBeExpression = true;
1263     bool InTemplateArgument = false;
1264     bool InCtorInitializer = false;
1265     bool InInheritanceList = false;
1266     bool CaretFound = false;
1267     bool IsForEachMacro = false;
1268     bool InCpp11AttributeSpecifier = false;
1269     bool InCSharpAttributeSpecifier = false;
1270   };
1271 
1272   /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1273   /// of each instance.
1274   struct ScopedContextCreator {
1275     AnnotatingParser &P;
1276 
1277     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1278                          unsigned Increase)
1279         : P(P) {
1280       P.Contexts.push_back(Context(ContextKind,
1281                                    P.Contexts.back().BindingStrength + Increase,
1282                                    P.Contexts.back().IsExpression));
1283     }
1284 
1285     ~ScopedContextCreator() { P.Contexts.pop_back(); }
1286   };
1287 
1288   void modifyContext(const FormatToken &Current) {
1289     if (Current.getPrecedence() == prec::Assignment &&
1290         !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1291         // Type aliases use `type X = ...;` in TypeScript and can be exported
1292         // using `export type ...`.
1293         !(Style.Language == FormatStyle::LK_JavaScript &&
1294           (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1295            Line.startsWith(tok::kw_export, Keywords.kw_type,
1296                            tok::identifier))) &&
1297         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1298       Contexts.back().IsExpression = true;
1299       if (!Line.startsWith(TT_UnaryOperator)) {
1300         for (FormatToken *Previous = Current.Previous;
1301              Previous && Previous->Previous &&
1302              !Previous->Previous->isOneOf(tok::comma, tok::semi);
1303              Previous = Previous->Previous) {
1304           if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1305             Previous = Previous->MatchingParen;
1306             if (!Previous)
1307               break;
1308           }
1309           if (Previous->opensScope())
1310             break;
1311           if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1312               Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1313               Previous->Previous && Previous->Previous->isNot(tok::equal))
1314             Previous->Type = TT_PointerOrReference;
1315         }
1316       }
1317     } else if (Current.is(tok::lessless) &&
1318                (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1319       Contexts.back().IsExpression = true;
1320     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1321       Contexts.back().IsExpression = true;
1322     } else if (Current.is(TT_TrailingReturnArrow)) {
1323       Contexts.back().IsExpression = false;
1324     } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1325       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1326     } else if (Current.Previous &&
1327                Current.Previous->is(TT_CtorInitializerColon)) {
1328       Contexts.back().IsExpression = true;
1329       Contexts.back().InCtorInitializer = true;
1330     } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1331       Contexts.back().InInheritanceList = true;
1332     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1333       for (FormatToken *Previous = Current.Previous;
1334            Previous && Previous->isOneOf(tok::star, tok::amp);
1335            Previous = Previous->Previous)
1336         Previous->Type = TT_PointerOrReference;
1337       if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1338         Contexts.back().IsExpression = false;
1339     } else if (Current.is(tok::kw_new)) {
1340       Contexts.back().CanBeExpression = false;
1341     } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
1342       // This should be the condition or increment in a for-loop.
1343       Contexts.back().IsExpression = true;
1344     }
1345   }
1346 
1347   void determineTokenType(FormatToken &Current) {
1348     if (!Current.is(TT_Unknown))
1349       // The token type is already known.
1350       return;
1351 
1352     if (Style.Language == FormatStyle::LK_JavaScript) {
1353       if (Current.is(tok::exclaim)) {
1354         if (Current.Previous &&
1355             (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
1356                                        tok::r_paren, tok::r_square,
1357                                        tok::r_brace) ||
1358              Current.Previous->Tok.isLiteral())) {
1359           Current.Type = TT_JsNonNullAssertion;
1360           return;
1361         }
1362         if (Current.Next &&
1363             Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1364           Current.Type = TT_JsNonNullAssertion;
1365           return;
1366         }
1367       }
1368     }
1369 
1370     // Line.MightBeFunctionDecl can only be true after the parentheses of a
1371     // function declaration have been found. In this case, 'Current' is a
1372     // trailing token of this declaration and thus cannot be a name.
1373     if (Current.is(Keywords.kw_instanceof)) {
1374       Current.Type = TT_BinaryOperator;
1375     } else if (isStartOfName(Current) &&
1376                (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1377       Contexts.back().FirstStartOfName = &Current;
1378       Current.Type = TT_StartOfName;
1379     } else if (Current.is(tok::semi)) {
1380       // Reset FirstStartOfName after finding a semicolon so that a for loop
1381       // with multiple increment statements is not confused with a for loop
1382       // having multiple variable declarations.
1383       Contexts.back().FirstStartOfName = nullptr;
1384     } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1385       AutoFound = true;
1386     } else if (Current.is(tok::arrow) &&
1387                Style.Language == FormatStyle::LK_Java) {
1388       Current.Type = TT_LambdaArrow;
1389     } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1390                Current.NestingLevel == 0) {
1391       Current.Type = TT_TrailingReturnArrow;
1392     } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1393       Current.Type = determineStarAmpUsage(Current,
1394                                            Contexts.back().CanBeExpression &&
1395                                                Contexts.back().IsExpression,
1396                                            Contexts.back().InTemplateArgument);
1397     } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1398       Current.Type = determinePlusMinusCaretUsage(Current);
1399       if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1400         Contexts.back().CaretFound = true;
1401     } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1402       Current.Type = determineIncrementUsage(Current);
1403     } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1404       Current.Type = TT_UnaryOperator;
1405     } else if (Current.is(tok::question)) {
1406       if (Style.Language == FormatStyle::LK_JavaScript &&
1407           Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1408         // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1409         // on the interface, not a ternary expression.
1410         Current.Type = TT_JsTypeOptionalQuestion;
1411       } else {
1412         Current.Type = TT_ConditionalExpr;
1413       }
1414     } else if (Current.isBinaryOperator() &&
1415                (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
1416                (!Current.is(tok::greater) &&
1417                 Style.Language != FormatStyle::LK_TextProto)) {
1418       Current.Type = TT_BinaryOperator;
1419     } else if (Current.is(tok::comment)) {
1420       if (Current.TokenText.startswith("/*")) {
1421         if (Current.TokenText.endswith("*/"))
1422           Current.Type = TT_BlockComment;
1423         else
1424           // The lexer has for some reason determined a comment here. But we
1425           // cannot really handle it, if it isn't properly terminated.
1426           Current.Tok.setKind(tok::unknown);
1427       } else {
1428         Current.Type = TT_LineComment;
1429       }
1430     } else if (Current.is(tok::r_paren)) {
1431       if (rParenEndsCast(Current))
1432         Current.Type = TT_CastRParen;
1433       if (Current.MatchingParen && Current.Next &&
1434           !Current.Next->isBinaryOperator() &&
1435           !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1436                                  tok::comma, tok::period, tok::arrow,
1437                                  tok::coloncolon))
1438         if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1439           // Make sure this isn't the return type of an Obj-C block declaration
1440           if (AfterParen->Tok.isNot(tok::caret)) {
1441             if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1442               if (BeforeParen->is(tok::identifier) &&
1443                   !BeforeParen->is(TT_TypenameMacro) &&
1444                   BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1445                   (!BeforeParen->Previous ||
1446                    BeforeParen->Previous->ClosesTemplateDeclaration))
1447                 Current.Type = TT_FunctionAnnotationRParen;
1448           }
1449         }
1450     } else if (Current.is(tok::at) && Current.Next &&
1451                Style.Language != FormatStyle::LK_JavaScript &&
1452                Style.Language != FormatStyle::LK_Java) {
1453       // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1454       // marks declarations and properties that need special formatting.
1455       switch (Current.Next->Tok.getObjCKeywordID()) {
1456       case tok::objc_interface:
1457       case tok::objc_implementation:
1458       case tok::objc_protocol:
1459         Current.Type = TT_ObjCDecl;
1460         break;
1461       case tok::objc_property:
1462         Current.Type = TT_ObjCProperty;
1463         break;
1464       default:
1465         break;
1466       }
1467     } else if (Current.is(tok::period)) {
1468       FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1469       if (PreviousNoComment &&
1470           PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1471         Current.Type = TT_DesignatedInitializerPeriod;
1472       else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1473                Current.Previous->isOneOf(TT_JavaAnnotation,
1474                                          TT_LeadingJavaAnnotation)) {
1475         Current.Type = Current.Previous->Type;
1476       }
1477     } else if (canBeObjCSelectorComponent(Current) &&
1478                // FIXME(bug 36976): ObjC return types shouldn't use
1479                // TT_CastRParen.
1480                Current.Previous && Current.Previous->is(TT_CastRParen) &&
1481                Current.Previous->MatchingParen &&
1482                Current.Previous->MatchingParen->Previous &&
1483                Current.Previous->MatchingParen->Previous->is(
1484                    TT_ObjCMethodSpecifier)) {
1485       // This is the first part of an Objective-C selector name. (If there's no
1486       // colon after this, this is the only place which annotates the identifier
1487       // as a selector.)
1488       Current.Type = TT_SelectorName;
1489     } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1490                Current.Previous &&
1491                !Current.Previous->isOneOf(tok::equal, tok::at) &&
1492                Line.MightBeFunctionDecl && Contexts.size() == 1) {
1493       // Line.MightBeFunctionDecl can only be true after the parentheses of a
1494       // function declaration have been found.
1495       Current.Type = TT_TrailingAnnotation;
1496     } else if ((Style.Language == FormatStyle::LK_Java ||
1497                 Style.Language == FormatStyle::LK_JavaScript) &&
1498                Current.Previous) {
1499       if (Current.Previous->is(tok::at) &&
1500           Current.isNot(Keywords.kw_interface)) {
1501         const FormatToken &AtToken = *Current.Previous;
1502         const FormatToken *Previous = AtToken.getPreviousNonComment();
1503         if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1504           Current.Type = TT_LeadingJavaAnnotation;
1505         else
1506           Current.Type = TT_JavaAnnotation;
1507       } else if (Current.Previous->is(tok::period) &&
1508                  Current.Previous->isOneOf(TT_JavaAnnotation,
1509                                            TT_LeadingJavaAnnotation)) {
1510         Current.Type = Current.Previous->Type;
1511       }
1512     }
1513   }
1514 
1515   /// Take a guess at whether \p Tok starts a name of a function or
1516   /// variable declaration.
1517   ///
1518   /// This is a heuristic based on whether \p Tok is an identifier following
1519   /// something that is likely a type.
1520   bool isStartOfName(const FormatToken &Tok) {
1521     if (Tok.isNot(tok::identifier) || !Tok.Previous)
1522       return false;
1523 
1524     if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1525                               Keywords.kw_as))
1526       return false;
1527     if (Style.Language == FormatStyle::LK_JavaScript &&
1528         Tok.Previous->is(Keywords.kw_in))
1529       return false;
1530 
1531     // Skip "const" as it does not have an influence on whether this is a name.
1532     FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1533     while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1534       PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1535 
1536     if (!PreviousNotConst)
1537       return false;
1538 
1539     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1540                        PreviousNotConst->Previous &&
1541                        PreviousNotConst->Previous->is(tok::hash);
1542 
1543     if (PreviousNotConst->is(TT_TemplateCloser))
1544       return PreviousNotConst && PreviousNotConst->MatchingParen &&
1545              PreviousNotConst->MatchingParen->Previous &&
1546              PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1547              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1548 
1549     if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1550         PreviousNotConst->MatchingParen->Previous &&
1551         PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1552       return true;
1553 
1554     return (!IsPPKeyword &&
1555             PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1556            PreviousNotConst->is(TT_PointerOrReference) ||
1557            PreviousNotConst->isSimpleTypeSpecifier();
1558   }
1559 
1560   /// Determine whether ')' is ending a cast.
1561   bool rParenEndsCast(const FormatToken &Tok) {
1562     // C-style casts are only used in C++ and Java.
1563     if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java)
1564       return false;
1565 
1566     // Empty parens aren't casts and there are no casts at the end of the line.
1567     if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1568       return false;
1569 
1570     FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1571     if (LeftOfParens) {
1572       // If there is a closing parenthesis left of the current parentheses,
1573       // look past it as these might be chained casts.
1574       if (LeftOfParens->is(tok::r_paren)) {
1575         if (!LeftOfParens->MatchingParen ||
1576             !LeftOfParens->MatchingParen->Previous)
1577           return false;
1578         LeftOfParens = LeftOfParens->MatchingParen->Previous;
1579       }
1580 
1581       // If there is an identifier (or with a few exceptions a keyword) right
1582       // before the parentheses, this is unlikely to be a cast.
1583       if (LeftOfParens->Tok.getIdentifierInfo() &&
1584           !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1585                                  tok::kw_delete))
1586         return false;
1587 
1588       // Certain other tokens right before the parentheses are also signals that
1589       // this cannot be a cast.
1590       if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1591                                 TT_TemplateCloser, tok::ellipsis))
1592         return false;
1593     }
1594 
1595     if (Tok.Next->is(tok::question))
1596       return false;
1597 
1598     // As Java has no function types, a "(" after the ")" likely means that this
1599     // is a cast.
1600     if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1601       return true;
1602 
1603     // If a (non-string) literal follows, this is likely a cast.
1604     if (Tok.Next->isNot(tok::string_literal) &&
1605         (Tok.Next->Tok.isLiteral() ||
1606          Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1607       return true;
1608 
1609     // Heuristically try to determine whether the parentheses contain a type.
1610     bool ParensAreType =
1611         !Tok.Previous ||
1612         Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1613         Tok.Previous->isSimpleTypeSpecifier();
1614     bool ParensCouldEndDecl =
1615         Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1616     if (ParensAreType && !ParensCouldEndDecl)
1617       return true;
1618 
1619     // At this point, we heuristically assume that there are no casts at the
1620     // start of the line. We assume that we have found most cases where there
1621     // are by the logic above, e.g. "(void)x;".
1622     if (!LeftOfParens)
1623       return false;
1624 
1625     // Certain token types inside the parentheses mean that this can't be a
1626     // cast.
1627     for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1628          Token = Token->Next)
1629       if (Token->is(TT_BinaryOperator))
1630         return false;
1631 
1632     // If the following token is an identifier or 'this', this is a cast. All
1633     // cases where this can be something else are handled above.
1634     if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1635       return true;
1636 
1637     if (!Tok.Next->Next)
1638       return false;
1639 
1640     // If the next token after the parenthesis is a unary operator, assume
1641     // that this is cast, unless there are unexpected tokens inside the
1642     // parenthesis.
1643     bool NextIsUnary =
1644         Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1645     if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1646         !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1647       return false;
1648     // Search for unexpected tokens.
1649     for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1650          Prev = Prev->Previous) {
1651       if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1652         return false;
1653     }
1654     return true;
1655   }
1656 
1657   /// Return the type of the given token assuming it is * or &.
1658   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1659                                   bool InTemplateArgument) {
1660     if (Style.Language == FormatStyle::LK_JavaScript)
1661       return TT_BinaryOperator;
1662 
1663     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1664     if (!PrevToken)
1665       return TT_UnaryOperator;
1666 
1667     const FormatToken *NextToken = Tok.getNextNonComment();
1668     if (!NextToken ||
1669         NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) ||
1670         (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1671       return TT_PointerOrReference;
1672 
1673     if (PrevToken->is(tok::coloncolon))
1674       return TT_PointerOrReference;
1675 
1676     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1677                            tok::comma, tok::semi, tok::kw_return, tok::colon,
1678                            tok::equal, tok::kw_delete, tok::kw_sizeof,
1679                            tok::kw_throw) ||
1680         PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1681                            TT_UnaryOperator, TT_CastRParen))
1682       return TT_UnaryOperator;
1683 
1684     if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1685       return TT_PointerOrReference;
1686     if (NextToken->is(tok::kw_operator) && !IsExpression)
1687       return TT_PointerOrReference;
1688     if (NextToken->isOneOf(tok::comma, tok::semi))
1689       return TT_PointerOrReference;
1690 
1691     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) {
1692       FormatToken *TokenBeforeMatchingParen =
1693           PrevToken->MatchingParen->getPreviousNonComment();
1694       if (TokenBeforeMatchingParen &&
1695           TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype,
1696                                             TT_TypenameMacro))
1697         return TT_PointerOrReference;
1698     }
1699 
1700     if (PrevToken->Tok.isLiteral() ||
1701         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1702                            tok::kw_false, tok::r_brace) ||
1703         NextToken->Tok.isLiteral() ||
1704         NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1705         NextToken->isUnaryOperator() ||
1706         // If we know we're in a template argument, there are no named
1707         // declarations. Thus, having an identifier on the right-hand side
1708         // indicates a binary operator.
1709         (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1710       return TT_BinaryOperator;
1711 
1712     // "&&(" is quite unlikely to be two successive unary "&".
1713     if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1714       return TT_BinaryOperator;
1715 
1716     // This catches some cases where evaluation order is used as control flow:
1717     //   aaa && aaa->f();
1718     const FormatToken *NextNextToken = NextToken->getNextNonComment();
1719     if (NextNextToken && NextNextToken->is(tok::arrow))
1720       return TT_BinaryOperator;
1721 
1722     // It is very unlikely that we are going to find a pointer or reference type
1723     // definition on the RHS of an assignment.
1724     if (IsExpression && !Contexts.back().CaretFound)
1725       return TT_BinaryOperator;
1726 
1727     return TT_PointerOrReference;
1728   }
1729 
1730   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1731     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1732     if (!PrevToken)
1733       return TT_UnaryOperator;
1734 
1735     if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
1736       // This must be a sequence of leading unary operators.
1737       return TT_UnaryOperator;
1738 
1739     // Use heuristics to recognize unary operators.
1740     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1741                            tok::question, tok::colon, tok::kw_return,
1742                            tok::kw_case, tok::at, tok::l_brace))
1743       return TT_UnaryOperator;
1744 
1745     // There can't be two consecutive binary operators.
1746     if (PrevToken->is(TT_BinaryOperator))
1747       return TT_UnaryOperator;
1748 
1749     // Fall back to marking the token as binary operator.
1750     return TT_BinaryOperator;
1751   }
1752 
1753   /// Determine whether ++/-- are pre- or post-increments/-decrements.
1754   TokenType determineIncrementUsage(const FormatToken &Tok) {
1755     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1756     if (!PrevToken || PrevToken->is(TT_CastRParen))
1757       return TT_UnaryOperator;
1758     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1759       return TT_TrailingUnaryOperator;
1760 
1761     return TT_UnaryOperator;
1762   }
1763 
1764   SmallVector<Context, 8> Contexts;
1765 
1766   const FormatStyle &Style;
1767   AnnotatedLine &Line;
1768   FormatToken *CurrentToken;
1769   bool AutoFound;
1770   const AdditionalKeywords &Keywords;
1771 
1772   // Set of "<" tokens that do not open a template parameter list. If parseAngle
1773   // determines that a specific token can't be a template opener, it will make
1774   // same decision irrespective of the decisions for tokens leading up to it.
1775   // Store this information to prevent this from causing exponential runtime.
1776   llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1777 };
1778 
1779 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1780 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1781 
1782 /// Parses binary expressions by inserting fake parenthesis based on
1783 /// operator precedence.
1784 class ExpressionParser {
1785 public:
1786   ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1787                    AnnotatedLine &Line)
1788       : Style(Style), Keywords(Keywords), Current(Line.First) {}
1789 
1790   /// Parse expressions with the given operator precedence.
1791   void parse(int Precedence = 0) {
1792     // Skip 'return' and ObjC selector colons as they are not part of a binary
1793     // expression.
1794     while (Current && (Current->is(tok::kw_return) ||
1795                        (Current->is(tok::colon) &&
1796                         Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1797       next();
1798 
1799     if (!Current || Precedence > PrecedenceArrowAndPeriod)
1800       return;
1801 
1802     // Conditional expressions need to be parsed separately for proper nesting.
1803     if (Precedence == prec::Conditional) {
1804       parseConditionalExpr();
1805       return;
1806     }
1807 
1808     // Parse unary operators, which all have a higher precedence than binary
1809     // operators.
1810     if (Precedence == PrecedenceUnaryOperator) {
1811       parseUnaryOperator();
1812       return;
1813     }
1814 
1815     FormatToken *Start = Current;
1816     FormatToken *LatestOperator = nullptr;
1817     unsigned OperatorIndex = 0;
1818 
1819     while (Current) {
1820       // Consume operators with higher precedence.
1821       parse(Precedence + 1);
1822 
1823       int CurrentPrecedence = getCurrentPrecedence();
1824 
1825       if (Current && Current->is(TT_SelectorName) &&
1826           Precedence == CurrentPrecedence) {
1827         if (LatestOperator)
1828           addFakeParenthesis(Start, prec::Level(Precedence));
1829         Start = Current;
1830       }
1831 
1832       // At the end of the line or when an operator with higher precedence is
1833       // found, insert fake parenthesis and return.
1834       if (!Current ||
1835           (Current->closesScope() &&
1836            (Current->MatchingParen || Current->is(TT_TemplateString))) ||
1837           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1838           (CurrentPrecedence == prec::Conditional &&
1839            Precedence == prec::Assignment && Current->is(tok::colon))) {
1840         break;
1841       }
1842 
1843       // Consume scopes: (), [], <> and {}
1844       if (Current->opensScope()) {
1845         // In fragment of a JavaScript template string can look like '}..${' and
1846         // thus close a scope and open a new one at the same time.
1847         while (Current && (!Current->closesScope() || Current->opensScope())) {
1848           next();
1849           parse();
1850         }
1851         next();
1852       } else {
1853         // Operator found.
1854         if (CurrentPrecedence == Precedence) {
1855           if (LatestOperator)
1856             LatestOperator->NextOperator = Current;
1857           LatestOperator = Current;
1858           Current->OperatorIndex = OperatorIndex;
1859           ++OperatorIndex;
1860         }
1861         next(/*SkipPastLeadingComments=*/Precedence > 0);
1862       }
1863     }
1864 
1865     if (LatestOperator && (Current || Precedence > 0)) {
1866       // LatestOperator->LastOperator = true;
1867       if (Precedence == PrecedenceArrowAndPeriod) {
1868         // Call expressions don't have a binary operator precedence.
1869         addFakeParenthesis(Start, prec::Unknown);
1870       } else {
1871         addFakeParenthesis(Start, prec::Level(Precedence));
1872       }
1873     }
1874   }
1875 
1876 private:
1877   /// Gets the precedence (+1) of the given token for binary operators
1878   /// and other tokens that we treat like binary operators.
1879   int getCurrentPrecedence() {
1880     if (Current) {
1881       const FormatToken *NextNonComment = Current->getNextNonComment();
1882       if (Current->is(TT_ConditionalExpr))
1883         return prec::Conditional;
1884       if (NextNonComment && Current->is(TT_SelectorName) &&
1885           (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
1886            ((Style.Language == FormatStyle::LK_Proto ||
1887              Style.Language == FormatStyle::LK_TextProto) &&
1888             NextNonComment->is(tok::less))))
1889         return prec::Assignment;
1890       if (Current->is(TT_JsComputedPropertyName))
1891         return prec::Assignment;
1892       if (Current->is(TT_LambdaArrow))
1893         return prec::Comma;
1894       if (Current->is(TT_JsFatArrow))
1895         return prec::Assignment;
1896       if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
1897           (Current->is(tok::comment) && NextNonComment &&
1898            NextNonComment->is(TT_SelectorName)))
1899         return 0;
1900       if (Current->is(TT_RangeBasedForLoopColon))
1901         return prec::Comma;
1902       if ((Style.Language == FormatStyle::LK_Java ||
1903            Style.Language == FormatStyle::LK_JavaScript) &&
1904           Current->is(Keywords.kw_instanceof))
1905         return prec::Relational;
1906       if (Style.Language == FormatStyle::LK_JavaScript &&
1907           Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
1908         return prec::Relational;
1909       if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1910         return Current->getPrecedence();
1911       if (Current->isOneOf(tok::period, tok::arrow))
1912         return PrecedenceArrowAndPeriod;
1913       if ((Style.Language == FormatStyle::LK_Java ||
1914            Style.Language == FormatStyle::LK_JavaScript) &&
1915           Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1916                            Keywords.kw_throws))
1917         return 0;
1918     }
1919     return -1;
1920   }
1921 
1922   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1923     Start->FakeLParens.push_back(Precedence);
1924     if (Precedence > prec::Unknown)
1925       Start->StartsBinaryExpression = true;
1926     if (Current) {
1927       FormatToken *Previous = Current->Previous;
1928       while (Previous->is(tok::comment) && Previous->Previous)
1929         Previous = Previous->Previous;
1930       ++Previous->FakeRParens;
1931       if (Precedence > prec::Unknown)
1932         Previous->EndsBinaryExpression = true;
1933     }
1934   }
1935 
1936   /// Parse unary operator expressions and surround them with fake
1937   /// parentheses if appropriate.
1938   void parseUnaryOperator() {
1939     llvm::SmallVector<FormatToken *, 2> Tokens;
1940     while (Current && Current->is(TT_UnaryOperator)) {
1941       Tokens.push_back(Current);
1942       next();
1943     }
1944     parse(PrecedenceArrowAndPeriod);
1945     for (FormatToken *Token : llvm::reverse(Tokens))
1946       // The actual precedence doesn't matter.
1947       addFakeParenthesis(Token, prec::Unknown);
1948   }
1949 
1950   void parseConditionalExpr() {
1951     while (Current && Current->isTrailingComment()) {
1952       next();
1953     }
1954     FormatToken *Start = Current;
1955     parse(prec::LogicalOr);
1956     if (!Current || !Current->is(tok::question))
1957       return;
1958     next();
1959     parse(prec::Assignment);
1960     if (!Current || Current->isNot(TT_ConditionalExpr))
1961       return;
1962     next();
1963     parse(prec::Assignment);
1964     addFakeParenthesis(Start, prec::Conditional);
1965   }
1966 
1967   void next(bool SkipPastLeadingComments = true) {
1968     if (Current)
1969       Current = Current->Next;
1970     while (Current &&
1971            (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1972            Current->isTrailingComment())
1973       Current = Current->Next;
1974   }
1975 
1976   const FormatStyle &Style;
1977   const AdditionalKeywords &Keywords;
1978   FormatToken *Current;
1979 };
1980 
1981 } // end anonymous namespace
1982 
1983 void TokenAnnotator::setCommentLineLevels(
1984     SmallVectorImpl<AnnotatedLine *> &Lines) {
1985   const AnnotatedLine *NextNonCommentLine = nullptr;
1986   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1987                                                           E = Lines.rend();
1988        I != E; ++I) {
1989     bool CommentLine = true;
1990     for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
1991       if (!Tok->is(tok::comment)) {
1992         CommentLine = false;
1993         break;
1994       }
1995     }
1996 
1997     // If the comment is currently aligned with the line immediately following
1998     // it, that's probably intentional and we should keep it.
1999     if (NextNonCommentLine && CommentLine &&
2000         NextNonCommentLine->First->NewlinesBefore <= 1 &&
2001         NextNonCommentLine->First->OriginalColumn ==
2002             (*I)->First->OriginalColumn) {
2003       // Align comments for preprocessor lines with the # in column 0 if
2004       // preprocessor lines are not indented. Otherwise, align with the next
2005       // line.
2006       (*I)->Level =
2007           (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
2008            (NextNonCommentLine->Type == LT_PreprocessorDirective ||
2009             NextNonCommentLine->Type == LT_ImportStatement))
2010               ? 0
2011               : NextNonCommentLine->Level;
2012     } else {
2013       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
2014     }
2015 
2016     setCommentLineLevels((*I)->Children);
2017   }
2018 }
2019 
2020 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
2021   unsigned Result = 0;
2022   for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
2023     Result = std::max(Result, Tok->NestingLevel);
2024   return Result;
2025 }
2026 
2027 void TokenAnnotator::annotate(AnnotatedLine &Line) {
2028   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
2029                                                   E = Line.Children.end();
2030        I != E; ++I) {
2031     annotate(**I);
2032   }
2033   AnnotatingParser Parser(Style, Line, Keywords);
2034   Line.Type = Parser.parseLine();
2035 
2036   // With very deep nesting, ExpressionParser uses lots of stack and the
2037   // formatting algorithm is very slow. We're not going to do a good job here
2038   // anyway - it's probably generated code being formatted by mistake.
2039   // Just skip the whole line.
2040   if (maxNestingDepth(Line) > 50)
2041     Line.Type = LT_Invalid;
2042 
2043   if (Line.Type == LT_Invalid)
2044     return;
2045 
2046   ExpressionParser ExprParser(Style, Keywords, Line);
2047   ExprParser.parse();
2048 
2049   if (Line.startsWith(TT_ObjCMethodSpecifier))
2050     Line.Type = LT_ObjCMethodDecl;
2051   else if (Line.startsWith(TT_ObjCDecl))
2052     Line.Type = LT_ObjCDecl;
2053   else if (Line.startsWith(TT_ObjCProperty))
2054     Line.Type = LT_ObjCProperty;
2055 
2056   Line.First->SpacesRequiredBefore = 1;
2057   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
2058 }
2059 
2060 // This function heuristically determines whether 'Current' starts the name of a
2061 // function declaration.
2062 static bool isFunctionDeclarationName(const FormatToken &Current,
2063                                       const AnnotatedLine &Line) {
2064   auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
2065     for (; Next; Next = Next->Next) {
2066       if (Next->is(TT_OverloadedOperatorLParen))
2067         return Next;
2068       if (Next->is(TT_OverloadedOperator))
2069         continue;
2070       if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
2071         // For 'new[]' and 'delete[]'.
2072         if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next &&
2073             Next->Next->Next->is(tok::r_square))
2074           Next = Next->Next->Next;
2075         continue;
2076       }
2077 
2078       break;
2079     }
2080     return nullptr;
2081   };
2082 
2083   // Find parentheses of parameter list.
2084   const FormatToken *Next = Current.Next;
2085   if (Current.is(tok::kw_operator)) {
2086     if (Current.Previous && Current.Previous->is(tok::coloncolon))
2087       return false;
2088     Next = skipOperatorName(Next);
2089   } else {
2090     if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
2091       return false;
2092     for (; Next; Next = Next->Next) {
2093       if (Next->is(TT_TemplateOpener)) {
2094         Next = Next->MatchingParen;
2095       } else if (Next->is(tok::coloncolon)) {
2096         Next = Next->Next;
2097         if (!Next)
2098           return false;
2099         if (Next->is(tok::kw_operator)) {
2100           Next = skipOperatorName(Next->Next);
2101           break;
2102         }
2103         if (!Next->is(tok::identifier))
2104           return false;
2105       } else if (Next->is(tok::l_paren)) {
2106         break;
2107       } else {
2108         return false;
2109       }
2110     }
2111   }
2112 
2113   // Check whether parameter list can belong to a function declaration.
2114   if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
2115     return false;
2116   // If the lines ends with "{", this is likely an function definition.
2117   if (Line.Last->is(tok::l_brace))
2118     return true;
2119   if (Next->Next == Next->MatchingParen)
2120     return true; // Empty parentheses.
2121   // If there is an &/&& after the r_paren, this is likely a function.
2122   if (Next->MatchingParen->Next &&
2123       Next->MatchingParen->Next->is(TT_PointerOrReference))
2124     return true;
2125   for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
2126        Tok = Tok->Next) {
2127     if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
2128       Tok = Tok->MatchingParen;
2129       continue;
2130     }
2131     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
2132         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
2133       return true;
2134     if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
2135         Tok->Tok.isLiteral())
2136       return false;
2137   }
2138   return false;
2139 }
2140 
2141 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
2142   assert(Line.MightBeFunctionDecl);
2143 
2144   if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
2145        Style.AlwaysBreakAfterReturnType ==
2146            FormatStyle::RTBS_TopLevelDefinitions) &&
2147       Line.Level > 0)
2148     return false;
2149 
2150   switch (Style.AlwaysBreakAfterReturnType) {
2151   case FormatStyle::RTBS_None:
2152     return false;
2153   case FormatStyle::RTBS_All:
2154   case FormatStyle::RTBS_TopLevel:
2155     return true;
2156   case FormatStyle::RTBS_AllDefinitions:
2157   case FormatStyle::RTBS_TopLevelDefinitions:
2158     return Line.mightBeFunctionDefinition();
2159   }
2160 
2161   return false;
2162 }
2163 
2164 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
2165   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
2166                                                   E = Line.Children.end();
2167        I != E; ++I) {
2168     calculateFormattingInformation(**I);
2169   }
2170 
2171   Line.First->TotalLength =
2172       Line.First->IsMultiline ? Style.ColumnLimit
2173                               : Line.FirstStartColumn + Line.First->ColumnWidth;
2174   FormatToken *Current = Line.First->Next;
2175   bool InFunctionDecl = Line.MightBeFunctionDecl;
2176   while (Current) {
2177     if (isFunctionDeclarationName(*Current, Line))
2178       Current->Type = TT_FunctionDeclarationName;
2179     if (Current->is(TT_LineComment)) {
2180       if (Current->Previous->BlockKind == BK_BracedInit &&
2181           Current->Previous->opensScope())
2182         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
2183       else
2184         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
2185 
2186       // If we find a trailing comment, iterate backwards to determine whether
2187       // it seems to relate to a specific parameter. If so, break before that
2188       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
2189       // to the previous line in:
2190       //   SomeFunction(a,
2191       //                b, // comment
2192       //                c);
2193       if (!Current->HasUnescapedNewline) {
2194         for (FormatToken *Parameter = Current->Previous; Parameter;
2195              Parameter = Parameter->Previous) {
2196           if (Parameter->isOneOf(tok::comment, tok::r_brace))
2197             break;
2198           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
2199             if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
2200                 Parameter->HasUnescapedNewline)
2201               Parameter->MustBreakBefore = true;
2202             break;
2203           }
2204         }
2205       }
2206     } else if (Current->SpacesRequiredBefore == 0 &&
2207                spaceRequiredBefore(Line, *Current)) {
2208       Current->SpacesRequiredBefore = 1;
2209     }
2210 
2211     Current->MustBreakBefore =
2212         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
2213 
2214     if (!Current->MustBreakBefore && InFunctionDecl &&
2215         Current->is(TT_FunctionDeclarationName))
2216       Current->MustBreakBefore = mustBreakForReturnType(Line);
2217 
2218     Current->CanBreakBefore =
2219         Current->MustBreakBefore || canBreakBefore(Line, *Current);
2220     unsigned ChildSize = 0;
2221     if (Current->Previous->Children.size() == 1) {
2222       FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
2223       ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
2224                                                   : LastOfChild.TotalLength + 1;
2225     }
2226     const FormatToken *Prev = Current->Previous;
2227     if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
2228         (Prev->Children.size() == 1 &&
2229          Prev->Children[0]->First->MustBreakBefore) ||
2230         Current->IsMultiline)
2231       Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
2232     else
2233       Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
2234                              ChildSize + Current->SpacesRequiredBefore;
2235 
2236     if (Current->is(TT_CtorInitializerColon))
2237       InFunctionDecl = false;
2238 
2239     // FIXME: Only calculate this if CanBreakBefore is true once static
2240     // initializers etc. are sorted out.
2241     // FIXME: Move magic numbers to a better place.
2242 
2243     // Reduce penalty for aligning ObjC method arguments using the colon
2244     // alignment as this is the canonical way (still prefer fitting everything
2245     // into one line if possible). Trying to fit a whole expression into one
2246     // line should not force other line breaks (e.g. when ObjC method
2247     // expression is a part of other expression).
2248     Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
2249     if (Style.Language == FormatStyle::LK_ObjC &&
2250         Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
2251       if (Current->ParameterIndex == 1)
2252         Current->SplitPenalty += 5 * Current->BindingStrength;
2253     } else {
2254       Current->SplitPenalty += 20 * Current->BindingStrength;
2255     }
2256 
2257     Current = Current->Next;
2258   }
2259 
2260   calculateUnbreakableTailLengths(Line);
2261   unsigned IndentLevel = Line.Level;
2262   for (Current = Line.First; Current != nullptr; Current = Current->Next) {
2263     if (Current->Role)
2264       Current->Role->precomputeFormattingInfos(Current);
2265     if (Current->MatchingParen &&
2266         Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
2267       assert(IndentLevel > 0);
2268       --IndentLevel;
2269     }
2270     Current->IndentLevel = IndentLevel;
2271     if (Current->opensBlockOrBlockTypeList(Style))
2272       ++IndentLevel;
2273   }
2274 
2275   LLVM_DEBUG({ printDebugInfo(Line); });
2276 }
2277 
2278 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
2279   unsigned UnbreakableTailLength = 0;
2280   FormatToken *Current = Line.Last;
2281   while (Current) {
2282     Current->UnbreakableTailLength = UnbreakableTailLength;
2283     if (Current->CanBreakBefore ||
2284         Current->isOneOf(tok::comment, tok::string_literal)) {
2285       UnbreakableTailLength = 0;
2286     } else {
2287       UnbreakableTailLength +=
2288           Current->ColumnWidth + Current->SpacesRequiredBefore;
2289     }
2290     Current = Current->Previous;
2291   }
2292 }
2293 
2294 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
2295                                       const FormatToken &Tok,
2296                                       bool InFunctionDecl) {
2297   const FormatToken &Left = *Tok.Previous;
2298   const FormatToken &Right = Tok;
2299 
2300   if (Left.is(tok::semi))
2301     return 0;
2302 
2303   if (Style.Language == FormatStyle::LK_Java) {
2304     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
2305       return 1;
2306     if (Right.is(Keywords.kw_implements))
2307       return 2;
2308     if (Left.is(tok::comma) && Left.NestingLevel == 0)
2309       return 3;
2310   } else if (Style.Language == FormatStyle::LK_JavaScript) {
2311     if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2312       return 100;
2313     if (Left.is(TT_JsTypeColon))
2314       return 35;
2315     if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2316         (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2317       return 100;
2318     // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
2319     if (Left.opensScope() && Right.closesScope())
2320       return 200;
2321   }
2322 
2323   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2324     return 1;
2325   if (Right.is(tok::l_square)) {
2326     if (Style.Language == FormatStyle::LK_Proto)
2327       return 1;
2328     if (Left.is(tok::r_square))
2329       return 200;
2330     // Slightly prefer formatting local lambda definitions like functions.
2331     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2332       return 35;
2333     if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2334                        TT_ArrayInitializerLSquare,
2335                        TT_DesignatedInitializerLSquare, TT_AttributeSquare))
2336       return 500;
2337   }
2338 
2339   if (Left.is(tok::coloncolon) ||
2340       (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2341     return 500;
2342   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2343       Right.is(tok::kw_operator)) {
2344     if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2345       return 3;
2346     if (Left.is(TT_StartOfName))
2347       return 110;
2348     if (InFunctionDecl && Right.NestingLevel == 0)
2349       return Style.PenaltyReturnTypeOnItsOwnLine;
2350     return 200;
2351   }
2352   if (Right.is(TT_PointerOrReference))
2353     return 190;
2354   if (Right.is(TT_LambdaArrow))
2355     return 110;
2356   if (Left.is(tok::equal) && Right.is(tok::l_brace))
2357     return 160;
2358   if (Left.is(TT_CastRParen))
2359     return 100;
2360   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2361     return 5000;
2362   if (Left.is(tok::comment))
2363     return 1000;
2364 
2365   if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
2366                    TT_CtorInitializerColon))
2367     return 2;
2368 
2369   if (Right.isMemberAccess()) {
2370     // Breaking before the "./->" of a chained call/member access is reasonably
2371     // cheap, as formatting those with one call per line is generally
2372     // desirable. In particular, it should be cheaper to break before the call
2373     // than it is to break inside a call's parameters, which could lead to weird
2374     // "hanging" indents. The exception is the very last "./->" to support this
2375     // frequent pattern:
2376     //
2377     //   aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2378     //       dddddddd);
2379     //
2380     // which might otherwise be blown up onto many lines. Here, clang-format
2381     // won't produce "hanging" indents anyway as there is no other trailing
2382     // call.
2383     //
2384     // Also apply higher penalty is not a call as that might lead to a wrapping
2385     // like:
2386     //
2387     //   aaaaaaa
2388     //       .aaaaaaaaa.bbbbbbbb(cccccccc);
2389     return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2390                ? 150
2391                : 35;
2392   }
2393 
2394   if (Right.is(TT_TrailingAnnotation) &&
2395       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2396     // Moving trailing annotations to the next line is fine for ObjC method
2397     // declarations.
2398     if (Line.startsWith(TT_ObjCMethodSpecifier))
2399       return 10;
2400     // Generally, breaking before a trailing annotation is bad unless it is
2401     // function-like. It seems to be especially preferable to keep standard
2402     // annotations (i.e. "const", "final" and "override") on the same line.
2403     // Use a slightly higher penalty after ")" so that annotations like
2404     // "const override" are kept together.
2405     bool is_short_annotation = Right.TokenText.size() < 10;
2406     return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2407   }
2408 
2409   // In for-loops, prefer breaking at ',' and ';'.
2410   if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2411     return 4;
2412 
2413   // In Objective-C method expressions, prefer breaking before "param:" over
2414   // breaking after it.
2415   if (Right.is(TT_SelectorName))
2416     return 0;
2417   if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2418     return Line.MightBeFunctionDecl ? 50 : 500;
2419 
2420   // In Objective-C type declarations, avoid breaking after the category's
2421   // open paren (we'll prefer breaking after the protocol list's opening
2422   // angle bracket, if present).
2423   if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
2424       Left.Previous->isOneOf(tok::identifier, tok::greater))
2425     return 500;
2426 
2427   if (Left.is(tok::l_paren) && InFunctionDecl &&
2428       Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
2429     return 100;
2430   if (Left.is(tok::l_paren) && Left.Previous &&
2431       (Left.Previous->is(tok::kw_for) || Left.Previous->isIf()))
2432     return 1000;
2433   if (Left.is(tok::equal) && InFunctionDecl)
2434     return 110;
2435   if (Right.is(tok::r_brace))
2436     return 1;
2437   if (Left.is(TT_TemplateOpener))
2438     return 100;
2439   if (Left.opensScope()) {
2440     if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
2441       return 0;
2442     if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
2443       return 19;
2444     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
2445                                    : 19;
2446   }
2447   if (Left.is(TT_JavaAnnotation))
2448     return 50;
2449 
2450   if (Left.is(TT_UnaryOperator))
2451     return 60;
2452   if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2453       Left.Previous->isLabelString() &&
2454       (Left.NextOperator || Left.OperatorIndex != 0))
2455     return 50;
2456   if (Right.is(tok::plus) && Left.isLabelString() &&
2457       (Right.NextOperator || Right.OperatorIndex != 0))
2458     return 25;
2459   if (Left.is(tok::comma))
2460     return 1;
2461   if (Right.is(tok::lessless) && Left.isLabelString() &&
2462       (Right.NextOperator || Right.OperatorIndex != 1))
2463     return 25;
2464   if (Right.is(tok::lessless)) {
2465     // Breaking at a << is really cheap.
2466     if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2467       // Slightly prefer to break before the first one in log-like statements.
2468       return 2;
2469     return 1;
2470   }
2471   if (Left.ClosesTemplateDeclaration)
2472     return Style.PenaltyBreakTemplateDeclaration;
2473   if (Left.is(TT_ConditionalExpr))
2474     return prec::Conditional;
2475   prec::Level Level = Left.getPrecedence();
2476   if (Level == prec::Unknown)
2477     Level = Right.getPrecedence();
2478   if (Level == prec::Assignment)
2479     return Style.PenaltyBreakAssignment;
2480   if (Level != prec::Unknown)
2481     return Level;
2482 
2483   return 3;
2484 }
2485 
2486 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
2487   return Style.SpaceBeforeParens == FormatStyle::SBPO_Always ||
2488          (Style.SpaceBeforeParens == FormatStyle::SBPO_NonEmptyParentheses &&
2489           Right.ParameterCount > 0);
2490 }
2491 
2492 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2493                                           const FormatToken &Left,
2494                                           const FormatToken &Right) {
2495   if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2496     return true;
2497   if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
2498     return true;
2499   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
2500       Left.Tok.getObjCKeywordID() == tok::objc_property)
2501     return true;
2502   if (Right.is(tok::hashhash))
2503     return Left.is(tok::hash);
2504   if (Left.isOneOf(tok::hashhash, tok::hash))
2505     return Right.is(tok::hash);
2506   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
2507     return Style.SpaceInEmptyParentheses;
2508   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2509     return (Right.is(TT_CastRParen) ||
2510             (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2511                ? Style.SpacesInCStyleCastParentheses
2512                : Style.SpacesInParentheses;
2513   if (Right.isOneOf(tok::semi, tok::comma))
2514     return false;
2515   if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
2516     bool IsLightweightGeneric = Right.MatchingParen &&
2517                                 Right.MatchingParen->Next &&
2518                                 Right.MatchingParen->Next->is(tok::colon);
2519     return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
2520   }
2521   if (Right.is(tok::less) && Left.is(tok::kw_template))
2522     return Style.SpaceAfterTemplateKeyword;
2523   if (Left.isOneOf(tok::exclaim, tok::tilde))
2524     return false;
2525   if (Left.is(tok::at) &&
2526       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2527                     tok::numeric_constant, tok::l_paren, tok::l_brace,
2528                     tok::kw_true, tok::kw_false))
2529     return false;
2530   if (Left.is(tok::colon))
2531     return !Left.is(TT_ObjCMethodExpr);
2532   if (Left.is(tok::coloncolon))
2533     return false;
2534   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
2535     if (Style.Language == FormatStyle::LK_TextProto ||
2536         (Style.Language == FormatStyle::LK_Proto &&
2537          (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
2538       // Format empty list as `<>`.
2539       if (Left.is(tok::less) && Right.is(tok::greater))
2540         return false;
2541       return !Style.Cpp11BracedListStyle;
2542     }
2543     return false;
2544   }
2545   if (Right.is(tok::ellipsis))
2546     return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2547                                     Left.Previous->is(tok::kw_case));
2548   if (Left.is(tok::l_square) && Right.is(tok::amp))
2549     return false;
2550   if (Right.is(TT_PointerOrReference)) {
2551     if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
2552       if (!Left.MatchingParen)
2553         return true;
2554       FormatToken *TokenBeforeMatchingParen =
2555           Left.MatchingParen->getPreviousNonComment();
2556       if (!TokenBeforeMatchingParen ||
2557           !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype,
2558                                              TT_TypenameMacro))
2559         return true;
2560     }
2561     return (Left.Tok.isLiteral() ||
2562             (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
2563              (Style.PointerAlignment != FormatStyle::PAS_Left ||
2564               (Line.IsMultiVariableDeclStmt &&
2565                (Left.NestingLevel == 0 ||
2566                 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
2567   }
2568   if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
2569       (!Left.is(TT_PointerOrReference) ||
2570        (Style.PointerAlignment != FormatStyle::PAS_Right &&
2571         !Line.IsMultiVariableDeclStmt)))
2572     return true;
2573   if (Left.is(TT_PointerOrReference))
2574     return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
2575            (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
2576             !Right.is(TT_StartOfName)) ||
2577            (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
2578            (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
2579                            tok::l_paren) &&
2580             (Style.PointerAlignment != FormatStyle::PAS_Right &&
2581              !Line.IsMultiVariableDeclStmt) &&
2582             Left.Previous &&
2583             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
2584   if (Right.is(tok::star) && Left.is(tok::l_paren))
2585     return false;
2586   const auto SpaceRequiredForArrayInitializerLSquare =
2587       [](const FormatToken &LSquareTok, const FormatStyle &Style) {
2588         return Style.SpacesInContainerLiterals ||
2589                ((Style.Language == FormatStyle::LK_Proto ||
2590                  Style.Language == FormatStyle::LK_TextProto) &&
2591                 !Style.Cpp11BracedListStyle &&
2592                 LSquareTok.endsSequence(tok::l_square, tok::colon,
2593                                         TT_SelectorName));
2594       };
2595   if (Left.is(tok::l_square))
2596     return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
2597             SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
2598            (Left.isOneOf(TT_ArraySubscriptLSquare,
2599                          TT_StructuredBindingLSquare) &&
2600             Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
2601   if (Right.is(tok::r_square))
2602     return Right.MatchingParen &&
2603            ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
2604              SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
2605                                                      Style)) ||
2606             (Style.SpacesInSquareBrackets &&
2607              Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
2608                                           TT_StructuredBindingLSquare)) ||
2609             Right.MatchingParen->is(TT_AttributeParen));
2610   if (Right.is(tok::l_square) &&
2611       !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2612                      TT_DesignatedInitializerLSquare,
2613                      TT_StructuredBindingLSquare, TT_AttributeSquare) &&
2614       !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
2615     return false;
2616   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2617     return !Left.Children.empty(); // No spaces in "{}".
2618   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2619       (Right.is(tok::r_brace) && Right.MatchingParen &&
2620        Right.MatchingParen->BlockKind != BK_Block))
2621     return !Style.Cpp11BracedListStyle;
2622   if (Left.is(TT_BlockComment))
2623     // No whitespace in x(/*foo=*/1), except for JavaScript.
2624     return Style.Language == FormatStyle::LK_JavaScript ||
2625            !Left.TokenText.endswith("=*/");
2626   if (Right.is(tok::l_paren)) {
2627     // using (FileStream fs...
2628     if (Style.isCSharp() && Left.is(tok::kw_using) &&
2629         Style.SpaceBeforeParens != FormatStyle::SBPO_Never)
2630       return true;
2631     if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
2632         (Left.is(tok::r_square) && Left.is(TT_AttributeSquare)))
2633       return true;
2634     return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2635            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
2636             (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while,
2637                           tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2638                           TT_ObjCForIn) ||
2639              Left.isIf(Line.Type != LT_PreprocessorDirective) ||
2640              (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2641                            tok::kw_new, tok::kw_delete) &&
2642               (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2643            (spaceRequiredBeforeParens(Right) &&
2644             (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2645              Left.is(tok::r_paren) || Left.isSimpleTypeSpecifier() ||
2646              (Left.is(tok::r_square) && Left.MatchingParen &&
2647               Left.MatchingParen->is(TT_LambdaLSquare))) &&
2648             Line.Type != LT_PreprocessorDirective);
2649   }
2650   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2651     return false;
2652   if (Right.is(TT_UnaryOperator))
2653     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2654            (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2655   if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2656                     tok::r_paren) ||
2657        Left.isSimpleTypeSpecifier()) &&
2658       Right.is(tok::l_brace) && Right.getNextNonComment() &&
2659       Right.BlockKind != BK_Block)
2660     return false;
2661   if (Left.is(tok::period) || Right.is(tok::period))
2662     return false;
2663   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2664     return false;
2665   if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2666       Left.MatchingParen->Previous &&
2667       (Left.MatchingParen->Previous->is(tok::period) ||
2668        Left.MatchingParen->Previous->is(tok::coloncolon)))
2669     // Java call to generic function with explicit type:
2670     // A.<B<C<...>>>DoSomething();
2671     // A::<B<C<...>>>DoSomething();  // With a Java 8 method reference.
2672     return false;
2673   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2674     return false;
2675   if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at))
2676     // Objective-C dictionary literal -> no space after opening brace.
2677     return false;
2678   if (Right.is(tok::r_brace) && Right.MatchingParen &&
2679       Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at))
2680     // Objective-C dictionary literal -> no space before closing brace.
2681     return false;
2682   return true;
2683 }
2684 
2685 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2686                                          const FormatToken &Right) {
2687   const FormatToken &Left = *Right.Previous;
2688   if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2689     return true; // Never ever merge two identifiers.
2690   if (Style.isCpp()) {
2691     if (Left.is(tok::kw_operator))
2692       return Right.is(tok::coloncolon);
2693     if (Right.is(tok::l_brace) && Right.BlockKind == BK_BracedInit &&
2694         !Left.opensScope() && Style.SpaceBeforeCpp11BracedList)
2695       return true;
2696   } else if (Style.Language == FormatStyle::LK_Proto ||
2697              Style.Language == FormatStyle::LK_TextProto) {
2698     if (Right.is(tok::period) &&
2699         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2700                      Keywords.kw_repeated, Keywords.kw_extend))
2701       return true;
2702     if (Right.is(tok::l_paren) &&
2703         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2704       return true;
2705     if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
2706       return true;
2707     // Slashes occur in text protocol extension syntax: [type/type] { ... }.
2708     if (Left.is(tok::slash) || Right.is(tok::slash))
2709       return false;
2710     if (Left.MatchingParen &&
2711         Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
2712         Right.isOneOf(tok::l_brace, tok::less))
2713       return !Style.Cpp11BracedListStyle;
2714     // A percent is probably part of a formatting specification, such as %lld.
2715     if (Left.is(tok::percent))
2716       return false;
2717     // Preserve the existence of a space before a percent for cases like 0x%04x
2718     // and "%d %d"
2719     if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
2720       return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin();
2721   } else if (Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2722     if (Left.is(TT_JsFatArrow))
2723       return true;
2724     // for await ( ...
2725     if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
2726         Left.Previous->is(tok::kw_for))
2727       return true;
2728     if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
2729         Right.MatchingParen) {
2730       const FormatToken *Next = Right.MatchingParen->getNextNonComment();
2731       // An async arrow function, for example: `x = async () => foo();`,
2732       // as opposed to calling a function called async: `x = async();`
2733       if (Next && Next->is(TT_JsFatArrow))
2734         return true;
2735     }
2736     if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2737         (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2738       return false;
2739     // In tagged template literals ("html`bar baz`"), there is no space between
2740     // the tag identifier and the template string. getIdentifierInfo makes sure
2741     // that the identifier is not a pseudo keyword like `yield`, either.
2742     if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
2743         Right.is(TT_TemplateString))
2744       return false;
2745     if (Right.is(tok::star) &&
2746         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2747       return false;
2748     if (Right.isOneOf(tok::l_brace, tok::l_square) &&
2749         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
2750                      Keywords.kw_extends, Keywords.kw_implements))
2751       return true;
2752     if (Right.is(tok::l_paren)) {
2753       // JS methods can use some keywords as names (e.g. `delete()`).
2754       if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
2755         return false;
2756       // Valid JS method names can include keywords, e.g. `foo.delete()` or
2757       // `bar.instanceof()`. Recognize call positions by preceding period.
2758       if (Left.Previous && Left.Previous->is(tok::period) &&
2759           Left.Tok.getIdentifierInfo())
2760         return false;
2761       // Additional unary JavaScript operators that need a space after.
2762       if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
2763                        tok::kw_void))
2764         return true;
2765     }
2766     // `foo as const;` casts into a const type.
2767     if (Left.endsSequence(tok::kw_const, Keywords.kw_as)) {
2768       return false;
2769     }
2770     if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
2771                       tok::kw_const) ||
2772          // "of" is only a keyword if it appears after another identifier
2773          // (e.g. as "const x of y" in a for loop), or after a destructuring
2774          // operation (const [x, y] of z, const {a, b} of c).
2775          (Left.is(Keywords.kw_of) && Left.Previous &&
2776           (Left.Previous->Tok.is(tok::identifier) ||
2777            Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
2778         (!Left.Previous || !Left.Previous->is(tok::period)))
2779       return true;
2780     if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
2781         Left.Previous->is(tok::period) && Right.is(tok::l_paren))
2782       return false;
2783     if (Left.is(Keywords.kw_as) &&
2784         Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
2785       return true;
2786     if (Left.is(tok::kw_default) && Left.Previous &&
2787         Left.Previous->is(tok::kw_export))
2788       return true;
2789     if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2790       return true;
2791     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2792       return false;
2793     if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
2794       return false;
2795     if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2796         Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2797       return false;
2798     if (Left.is(tok::ellipsis))
2799       return false;
2800     if (Left.is(TT_TemplateCloser) &&
2801         !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2802                        Keywords.kw_implements, Keywords.kw_extends))
2803       // Type assertions ('<type>expr') are not followed by whitespace. Other
2804       // locations that should have whitespace following are identified by the
2805       // above set of follower tokens.
2806       return false;
2807     if (Right.is(TT_JsNonNullAssertion))
2808       return false;
2809     if (Left.is(TT_JsNonNullAssertion) &&
2810         Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
2811       return true; // "x! as string", "x! in y"
2812   } else if (Style.Language == FormatStyle::LK_Java) {
2813     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2814       return true;
2815     if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2816       return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
2817     if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2818                       tok::kw_protected) ||
2819          Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2820                       Keywords.kw_native)) &&
2821         Right.is(TT_TemplateOpener))
2822       return true;
2823   }
2824   if (Left.is(TT_ImplicitStringLiteral))
2825     return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2826   if (Line.Type == LT_ObjCMethodDecl) {
2827     if (Left.is(TT_ObjCMethodSpecifier))
2828       return true;
2829     if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right))
2830       // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
2831       // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
2832       // method declaration.
2833       return false;
2834   }
2835   if (Line.Type == LT_ObjCProperty &&
2836       (Right.is(tok::equal) || Left.is(tok::equal)))
2837     return false;
2838 
2839   if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2840       Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2841     return true;
2842   if (Right.is(TT_OverloadedOperatorLParen))
2843     return spaceRequiredBeforeParens(Right);
2844   if (Left.is(tok::comma))
2845     return true;
2846   if (Right.is(tok::comma))
2847     return false;
2848   if (Right.is(TT_ObjCBlockLParen))
2849     return true;
2850   if (Right.is(TT_CtorInitializerColon))
2851     return Style.SpaceBeforeCtorInitializerColon;
2852   if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
2853     return false;
2854   if (Right.is(TT_RangeBasedForLoopColon) &&
2855       !Style.SpaceBeforeRangeBasedForLoopColon)
2856     return false;
2857   if (Right.is(tok::colon)) {
2858     if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2859         !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2860       return false;
2861     if (Right.is(TT_ObjCMethodExpr))
2862       return false;
2863     if (Left.is(tok::question))
2864       return false;
2865     if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2866       return false;
2867     if (Right.is(TT_DictLiteral))
2868       return Style.SpacesInContainerLiterals;
2869     if (Right.is(TT_AttributeColon))
2870       return false;
2871     return true;
2872   }
2873   if (Left.is(TT_UnaryOperator))
2874     return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
2875            Right.is(TT_BinaryOperator);
2876 
2877   // If the next token is a binary operator or a selector name, we have
2878   // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2879   if (Left.is(TT_CastRParen))
2880     return Style.SpaceAfterCStyleCast ||
2881            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2882 
2883   if (Left.is(tok::greater) && Right.is(tok::greater)) {
2884     if (Style.Language == FormatStyle::LK_TextProto ||
2885         (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral)))
2886       return !Style.Cpp11BracedListStyle;
2887     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2888            (Style.Standard < FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
2889   }
2890   if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
2891       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2892       (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
2893     return false;
2894   if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
2895       Right.getPrecedence() == prec::Assignment)
2896     return false;
2897   if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
2898       (Left.is(tok::identifier) || Left.is(tok::kw_this)))
2899     return false;
2900   if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
2901     // Generally don't remove existing spaces between an identifier and "::".
2902     // The identifier might actually be a macro name such as ALWAYS_INLINE. If
2903     // this turns out to be too lenient, add analysis of the identifier itself.
2904     return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2905   if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
2906     return (Left.is(TT_TemplateOpener) &&
2907             Style.Standard < FormatStyle::LS_Cpp11) ||
2908            !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
2909                           tok::kw___super, TT_TemplateCloser,
2910                           TT_TemplateOpener)) ||
2911            (Left.is(tok ::l_paren) && Style.SpacesInParentheses);
2912   if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2913     return Style.SpacesInAngles;
2914   // Space before TT_StructuredBindingLSquare.
2915   if (Right.is(TT_StructuredBindingLSquare))
2916     return !Left.isOneOf(tok::amp, tok::ampamp) ||
2917            Style.PointerAlignment != FormatStyle::PAS_Right;
2918   // Space before & or && following a TT_StructuredBindingLSquare.
2919   if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
2920       Right.isOneOf(tok::amp, tok::ampamp))
2921     return Style.PointerAlignment != FormatStyle::PAS_Left;
2922   if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2923       (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2924        !Right.is(tok::r_paren)))
2925     return true;
2926   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2927       Right.isNot(TT_FunctionTypeLParen))
2928     return spaceRequiredBeforeParens(Right);
2929   if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2930       Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2931     return false;
2932   if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2933       Line.startsWith(tok::hash))
2934     return true;
2935   if (Right.is(TT_TrailingUnaryOperator))
2936     return false;
2937   if (Left.is(TT_RegexLiteral))
2938     return false;
2939   return spaceRequiredBetween(Line, Left, Right);
2940 }
2941 
2942 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2943 static bool isAllmanBrace(const FormatToken &Tok) {
2944   return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2945          !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
2946 }
2947 
2948 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2949                                      const FormatToken &Right) {
2950   const FormatToken &Left = *Right.Previous;
2951   if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2952     return true;
2953 
2954   if (Style.Language == FormatStyle::LK_JavaScript) {
2955     // FIXME: This might apply to other languages and token kinds.
2956     if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
2957         Left.Previous->is(tok::string_literal))
2958       return true;
2959     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2960         Left.Previous && Left.Previous->is(tok::equal) &&
2961         Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2962                             tok::kw_const) &&
2963         // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2964         // above.
2965         !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2966       // Object literals on the top level of a file are treated as "enum-style".
2967       // Each key/value pair is put on a separate line, instead of bin-packing.
2968       return true;
2969     if (Left.is(tok::l_brace) && Line.Level == 0 &&
2970         (Line.startsWith(tok::kw_enum) ||
2971          Line.startsWith(tok::kw_const, tok::kw_enum) ||
2972          Line.startsWith(tok::kw_export, tok::kw_enum) ||
2973          Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
2974       // JavaScript top-level enum key/value pairs are put on separate lines
2975       // instead of bin-packing.
2976       return true;
2977     if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2978         !Left.Children.empty())
2979       // Support AllowShortFunctionsOnASingleLine for JavaScript.
2980       return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
2981              Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
2982              (Left.NestingLevel == 0 && Line.Level == 0 &&
2983               Style.AllowShortFunctionsOnASingleLine &
2984                   FormatStyle::SFS_InlineOnly);
2985   } else if (Style.Language == FormatStyle::LK_Java) {
2986     if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2987         Right.Next->is(tok::string_literal))
2988       return true;
2989   } else if (Style.Language == FormatStyle::LK_Cpp ||
2990              Style.Language == FormatStyle::LK_ObjC ||
2991              Style.Language == FormatStyle::LK_Proto ||
2992              Style.Language == FormatStyle::LK_TableGen ||
2993              Style.Language == FormatStyle::LK_TextProto) {
2994     if (Left.isStringLiteral() && Right.isStringLiteral())
2995       return true;
2996   }
2997 
2998   // If the last token before a '}', ']', or ')' is a comma or a trailing
2999   // comment, the intention is to insert a line break after it in order to make
3000   // shuffling around entries easier. Import statements, especially in
3001   // JavaScript, can be an exception to this rule.
3002   if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
3003     const FormatToken *BeforeClosingBrace = nullptr;
3004     if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
3005          (Style.Language == FormatStyle::LK_JavaScript &&
3006           Left.is(tok::l_paren))) &&
3007         Left.BlockKind != BK_Block && Left.MatchingParen)
3008       BeforeClosingBrace = Left.MatchingParen->Previous;
3009     else if (Right.MatchingParen &&
3010              (Right.MatchingParen->isOneOf(tok::l_brace,
3011                                            TT_ArrayInitializerLSquare) ||
3012               (Style.Language == FormatStyle::LK_JavaScript &&
3013                Right.MatchingParen->is(tok::l_paren))))
3014       BeforeClosingBrace = &Left;
3015     if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
3016                                BeforeClosingBrace->isTrailingComment()))
3017       return true;
3018   }
3019 
3020   if (Right.is(tok::comment))
3021     return Left.BlockKind != BK_BracedInit &&
3022            Left.isNot(TT_CtorInitializerColon) &&
3023            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
3024   if (Left.isTrailingComment())
3025     return true;
3026   if (Right.Previous->IsUnterminatedLiteral)
3027     return true;
3028   if (Right.is(tok::lessless) && Right.Next &&
3029       Right.Previous->is(tok::string_literal) &&
3030       Right.Next->is(tok::string_literal))
3031     return true;
3032   if (Right.Previous->ClosesTemplateDeclaration &&
3033       Right.Previous->MatchingParen &&
3034       Right.Previous->MatchingParen->NestingLevel == 0 &&
3035       Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes)
3036     return true;
3037   if (Right.is(TT_CtorInitializerComma) &&
3038       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
3039       !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
3040     return true;
3041   if (Right.is(TT_CtorInitializerColon) &&
3042       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
3043       !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
3044     return true;
3045   // Break only if we have multiple inheritance.
3046   if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
3047       Right.is(TT_InheritanceComma))
3048     return true;
3049   if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
3050     // Multiline raw string literals are special wrt. line breaks. The author
3051     // has made a deliberate choice and might have aligned the contents of the
3052     // string literal accordingly. Thus, we try keep existing line breaks.
3053     return Right.IsMultiline && Right.NewlinesBefore > 0;
3054   if ((Right.Previous->is(tok::l_brace) ||
3055        (Right.Previous->is(tok::less) && Right.Previous->Previous &&
3056         Right.Previous->Previous->is(tok::equal))) &&
3057       Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
3058     // Don't put enums or option definitions onto single lines in protocol
3059     // buffers.
3060     return true;
3061   }
3062   if (Right.is(TT_InlineASMBrace))
3063     return Right.HasUnescapedNewline;
3064   if (isAllmanBrace(Left) || isAllmanBrace(Right))
3065     return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
3066            (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&
3067             Style.BraceWrapping.AfterEnum) ||
3068            (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
3069            (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
3070   if (Left.is(TT_ObjCBlockLBrace) &&
3071       Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
3072     return true;
3073 
3074   if (Left.is(TT_LambdaLBrace)) {
3075     if (Left.MatchingParen && Left.MatchingParen->Next &&
3076         Left.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren) &&
3077         Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline)
3078       return false;
3079 
3080     if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
3081         Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
3082         (!Left.Children.empty() &&
3083          Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty))
3084       return true;
3085   }
3086 
3087   // Put multiple C# attributes on a new line.
3088   if (Style.isCSharp() &&
3089       ((Left.is(TT_AttributeSquare) && Left.is(tok::r_square)) ||
3090        (Left.is(tok::r_square) && Right.is(TT_AttributeSquare) &&
3091         Right.is(tok::l_square))))
3092     return true;
3093 
3094   // Put multiple Java annotation on a new line.
3095   if ((Style.Language == FormatStyle::LK_Java ||
3096        Style.Language == FormatStyle::LK_JavaScript) &&
3097       Left.is(TT_LeadingJavaAnnotation) &&
3098       Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
3099       (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
3100     return true;
3101 
3102   if (Right.is(TT_ProtoExtensionLSquare))
3103     return true;
3104 
3105   // In text proto instances if a submessage contains at least 2 entries and at
3106   // least one of them is a submessage, like A { ... B { ... } ... },
3107   // put all of the entries of A on separate lines by forcing the selector of
3108   // the submessage B to be put on a newline.
3109   //
3110   // Example: these can stay on one line:
3111   // a { scalar_1: 1 scalar_2: 2 }
3112   // a { b { key: value } }
3113   //
3114   // and these entries need to be on a new line even if putting them all in one
3115   // line is under the column limit:
3116   // a {
3117   //   scalar: 1
3118   //   b { key: value }
3119   // }
3120   //
3121   // We enforce this by breaking before a submessage field that has previous
3122   // siblings, *and* breaking before a field that follows a submessage field.
3123   //
3124   // Be careful to exclude the case  [proto.ext] { ... } since the `]` is
3125   // the TT_SelectorName there, but we don't want to break inside the brackets.
3126   //
3127   // Another edge case is @submessage { key: value }, which is a common
3128   // substitution placeholder. In this case we want to keep `@` and `submessage`
3129   // together.
3130   //
3131   // We ensure elsewhere that extensions are always on their own line.
3132   if ((Style.Language == FormatStyle::LK_Proto ||
3133        Style.Language == FormatStyle::LK_TextProto) &&
3134       Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
3135     // Keep `@submessage` together in:
3136     // @submessage { key: value }
3137     if (Right.Previous && Right.Previous->is(tok::at))
3138       return false;
3139     // Look for the scope opener after selector in cases like:
3140     // selector { ...
3141     // selector: { ...
3142     // selector: @base { ...
3143     FormatToken *LBrace = Right.Next;
3144     if (LBrace && LBrace->is(tok::colon)) {
3145       LBrace = LBrace->Next;
3146       if (LBrace && LBrace->is(tok::at)) {
3147         LBrace = LBrace->Next;
3148         if (LBrace)
3149           LBrace = LBrace->Next;
3150       }
3151     }
3152     if (LBrace &&
3153         // The scope opener is one of {, [, <:
3154         // selector { ... }
3155         // selector [ ... ]
3156         // selector < ... >
3157         //
3158         // In case of selector { ... }, the l_brace is TT_DictLiteral.
3159         // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
3160         // so we check for immediately following r_brace.
3161         ((LBrace->is(tok::l_brace) &&
3162           (LBrace->is(TT_DictLiteral) ||
3163            (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
3164          LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
3165       // If Left.ParameterCount is 0, then this submessage entry is not the
3166       // first in its parent submessage, and we want to break before this entry.
3167       // If Left.ParameterCount is greater than 0, then its parent submessage
3168       // might contain 1 or more entries and we want to break before this entry
3169       // if it contains at least 2 entries. We deal with this case later by
3170       // detecting and breaking before the next entry in the parent submessage.
3171       if (Left.ParameterCount == 0)
3172         return true;
3173       // However, if this submessage is the first entry in its parent
3174       // submessage, Left.ParameterCount might be 1 in some cases.
3175       // We deal with this case later by detecting an entry
3176       // following a closing paren of this submessage.
3177     }
3178 
3179     // If this is an entry immediately following a submessage, it will be
3180     // preceded by a closing paren of that submessage, like in:
3181     //     left---.  .---right
3182     //            v  v
3183     // sub: { ... } key: value
3184     // If there was a comment between `}` an `key` above, then `key` would be
3185     // put on a new line anyways.
3186     if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
3187       return true;
3188   }
3189 
3190   // Deal with lambda arguments in C++ - we want consistent line breaks whether
3191   // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced
3192   // as aggressive line breaks are placed when the lambda is not the last arg.
3193   if ((Style.Language == FormatStyle::LK_Cpp ||
3194        Style.Language == FormatStyle::LK_ObjC) &&
3195       Left.is(tok::l_paren) && Left.BlockParameterCount > 0 &&
3196       !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) {
3197     // Multiple lambdas in the same function call force line breaks.
3198     if (Left.BlockParameterCount > 1)
3199       return true;
3200 
3201     // A lambda followed by another arg forces a line break.
3202     if (!Left.Role)
3203       return false;
3204     auto Comma = Left.Role->lastComma();
3205     if (!Comma)
3206       return false;
3207     auto Next = Comma->getNextNonComment();
3208     if (!Next)
3209       return false;
3210     if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret))
3211       return true;
3212   }
3213 
3214   return false;
3215 }
3216 
3217 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
3218                                     const FormatToken &Right) {
3219   const FormatToken &Left = *Right.Previous;
3220 
3221   // Language-specific stuff.
3222   if (Style.Language == FormatStyle::LK_Java) {
3223     if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3224                      Keywords.kw_implements))
3225       return false;
3226     if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3227                       Keywords.kw_implements))
3228       return true;
3229   } else if (Style.Language == FormatStyle::LK_JavaScript) {
3230     const FormatToken *NonComment = Right.getPreviousNonComment();
3231     if (NonComment &&
3232         NonComment->isOneOf(
3233             tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
3234             tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
3235             tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
3236             Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
3237             Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
3238       return false; // Otherwise automatic semicolon insertion would trigger.
3239     if (Right.NestingLevel == 0 &&
3240         (Left.Tok.getIdentifierInfo() ||
3241          Left.isOneOf(tok::r_square, tok::r_paren)) &&
3242         Right.isOneOf(tok::l_square, tok::l_paren))
3243       return false; // Otherwise automatic semicolon insertion would trigger.
3244     if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
3245       return false;
3246     if (Left.is(TT_JsTypeColon))
3247       return true;
3248     // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
3249     if (Left.is(tok::exclaim) && Right.is(tok::colon))
3250       return false;
3251     // Look for is type annotations like:
3252     // function f(): a is B { ... }
3253     // Do not break before is in these cases.
3254     if (Right.is(Keywords.kw_is)) {
3255       const FormatToken *Next = Right.getNextNonComment();
3256       // If `is` is followed by a colon, it's likely that it's a dict key, so
3257       // ignore it for this check.
3258       // For example this is common in Polymer:
3259       // Polymer({
3260       //   is: 'name',
3261       //   ...
3262       // });
3263       if (!Next || !Next->is(tok::colon))
3264         return false;
3265     }
3266     if (Left.is(Keywords.kw_in))
3267       return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
3268     if (Right.is(Keywords.kw_in))
3269       return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
3270     if (Right.is(Keywords.kw_as))
3271       return false; // must not break before as in 'x as type' casts
3272     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
3273       // extends and infer can appear as keywords in conditional types:
3274       //   https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
3275       // do not break before them, as the expressions are subject to ASI.
3276       return false;
3277     }
3278     if (Left.is(Keywords.kw_as))
3279       return true;
3280     if (Left.is(TT_JsNonNullAssertion))
3281       return true;
3282     if (Left.is(Keywords.kw_declare) &&
3283         Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
3284                       Keywords.kw_function, tok::kw_class, tok::kw_enum,
3285                       Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
3286                       Keywords.kw_let, tok::kw_const))
3287       // See grammar for 'declare' statements at:
3288       // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
3289       return false;
3290     if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
3291         Right.isOneOf(tok::identifier, tok::string_literal))
3292       return false; // must not break in "module foo { ...}"
3293     if (Right.is(TT_TemplateString) && Right.closesScope())
3294       return false;
3295     // Don't split tagged template literal so there is a break between the tag
3296     // identifier and template string.
3297     if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) {
3298       return false;
3299     }
3300     if (Left.is(TT_TemplateString) && Left.opensScope())
3301       return true;
3302   }
3303 
3304   if (Left.is(tok::at))
3305     return false;
3306   if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
3307     return false;
3308   if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
3309     return !Right.is(tok::l_paren);
3310   if (Right.is(TT_PointerOrReference))
3311     return Line.IsMultiVariableDeclStmt ||
3312            (Style.PointerAlignment == FormatStyle::PAS_Right &&
3313             (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
3314   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3315       Right.is(tok::kw_operator))
3316     return true;
3317   if (Left.is(TT_PointerOrReference))
3318     return false;
3319   if (Right.isTrailingComment())
3320     // We rely on MustBreakBefore being set correctly here as we should not
3321     // change the "binding" behavior of a comment.
3322     // The first comment in a braced lists is always interpreted as belonging to
3323     // the first list element. Otherwise, it should be placed outside of the
3324     // list.
3325     return Left.BlockKind == BK_BracedInit ||
3326            (Left.is(TT_CtorInitializerColon) &&
3327             Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
3328   if (Left.is(tok::question) && Right.is(tok::colon))
3329     return false;
3330   if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
3331     return Style.BreakBeforeTernaryOperators;
3332   if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
3333     return !Style.BreakBeforeTernaryOperators;
3334   if (Left.is(TT_InheritanceColon))
3335     return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
3336   if (Right.is(TT_InheritanceColon))
3337     return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
3338   if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
3339       Left.isNot(TT_SelectorName))
3340     return true;
3341 
3342   if (Right.is(tok::colon) &&
3343       !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
3344     return false;
3345   if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
3346     if (Style.Language == FormatStyle::LK_Proto ||
3347         Style.Language == FormatStyle::LK_TextProto) {
3348       if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
3349         return false;
3350       // Prevent cases like:
3351       //
3352       // submessage:
3353       //     { key: valueeeeeeeeeeee }
3354       //
3355       // when the snippet does not fit into one line.
3356       // Prefer:
3357       //
3358       // submessage: {
3359       //   key: valueeeeeeeeeeee
3360       // }
3361       //
3362       // instead, even if it is longer by one line.
3363       //
3364       // Note that this allows allows the "{" to go over the column limit
3365       // when the column limit is just between ":" and "{", but that does
3366       // not happen too often and alternative formattings in this case are
3367       // not much better.
3368       //
3369       // The code covers the cases:
3370       //
3371       // submessage: { ... }
3372       // submessage: < ... >
3373       // repeated: [ ... ]
3374       if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
3375            Right.is(TT_DictLiteral)) ||
3376           Right.is(TT_ArrayInitializerLSquare))
3377         return false;
3378     }
3379     return true;
3380   }
3381   if (Right.is(tok::r_square) && Right.MatchingParen &&
3382       Right.MatchingParen->is(TT_ProtoExtensionLSquare))
3383     return false;
3384   if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
3385                                     Right.Next->is(TT_ObjCMethodExpr)))
3386     return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
3387   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
3388     return true;
3389   if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
3390     return true;
3391   if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
3392                     TT_OverloadedOperator))
3393     return false;
3394   if (Left.is(TT_RangeBasedForLoopColon))
3395     return true;
3396   if (Right.is(TT_RangeBasedForLoopColon))
3397     return false;
3398   if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
3399     return true;
3400   if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
3401       Left.is(tok::kw_operator))
3402     return false;
3403   if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
3404       Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
3405     return false;
3406   if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
3407       !Style.Cpp11BracedListStyle)
3408     return false;
3409   if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
3410     return false;
3411   if (Left.is(tok::l_paren) && Left.Previous &&
3412       (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
3413     return false;
3414   if (Right.is(TT_ImplicitStringLiteral))
3415     return false;
3416 
3417   if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
3418     return false;
3419   if (Right.is(tok::r_square) && Right.MatchingParen &&
3420       Right.MatchingParen->is(TT_LambdaLSquare))
3421     return false;
3422 
3423   // We only break before r_brace if there was a corresponding break before
3424   // the l_brace, which is tracked by BreakBeforeClosingBrace.
3425   if (Right.is(tok::r_brace))
3426     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
3427 
3428   // Allow breaking after a trailing annotation, e.g. after a method
3429   // declaration.
3430   if (Left.is(TT_TrailingAnnotation))
3431     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
3432                           tok::less, tok::coloncolon);
3433 
3434   if (Right.is(tok::kw___attribute) ||
3435       (Right.is(tok::l_square) && Right.is(TT_AttributeSquare)))
3436     return true;
3437 
3438   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
3439     return true;
3440 
3441   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3442     return true;
3443 
3444   if (Left.is(TT_CtorInitializerColon))
3445     return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
3446   if (Right.is(TT_CtorInitializerColon))
3447     return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
3448   if (Left.is(TT_CtorInitializerComma) &&
3449       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
3450     return false;
3451   if (Right.is(TT_CtorInitializerComma) &&
3452       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
3453     return true;
3454   if (Left.is(TT_InheritanceComma) &&
3455       Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
3456     return false;
3457   if (Right.is(TT_InheritanceComma) &&
3458       Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
3459     return true;
3460   if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
3461       (Left.is(tok::less) && Right.is(tok::less)))
3462     return false;
3463   if (Right.is(TT_BinaryOperator) &&
3464       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
3465       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
3466        Right.getPrecedence() != prec::Assignment))
3467     return true;
3468   if (Left.is(TT_ArrayInitializerLSquare))
3469     return true;
3470   if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
3471     return true;
3472   if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
3473       !Left.isOneOf(tok::arrowstar, tok::lessless) &&
3474       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
3475       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
3476        Left.getPrecedence() == prec::Assignment))
3477     return true;
3478   if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
3479       (Left.is(tok::r_square) && Right.is(TT_AttributeSquare)))
3480     return false;
3481   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
3482                       tok::kw_class, tok::kw_struct, tok::comment) ||
3483          Right.isMemberAccess() ||
3484          Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
3485                        tok::colon, tok::l_square, tok::at) ||
3486          (Left.is(tok::r_paren) &&
3487           Right.isOneOf(tok::identifier, tok::kw_const)) ||
3488          (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
3489          (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
3490 }
3491 
3492 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
3493   llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
3494   const FormatToken *Tok = Line.First;
3495   while (Tok) {
3496     llvm::errs() << " M=" << Tok->MustBreakBefore
3497                  << " C=" << Tok->CanBreakBefore
3498                  << " T=" << getTokenTypeName(Tok->Type)
3499                  << " S=" << Tok->SpacesRequiredBefore
3500                  << " B=" << Tok->BlockParameterCount
3501                  << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty
3502                  << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
3503                  << " PPK=" << Tok->PackingKind << " FakeLParens=";
3504     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
3505       llvm::errs() << Tok->FakeLParens[i] << "/";
3506     llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
3507     llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
3508     llvm::errs() << " Text='" << Tok->TokenText << "'\n";
3509     if (!Tok->Next)
3510       assert(Tok == Line.Last);
3511     Tok = Tok->Next;
3512   }
3513   llvm::errs() << "----\n";
3514 }
3515 
3516 } // namespace format
3517 } // namespace clang
3518