1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "format-token-annotator"
22 
23 namespace clang {
24 namespace format {
25 
26 namespace {
27 
28 /// \brief A parser that gathers additional information about tokens.
29 ///
30 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
31 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
32 /// into template parameter lists.
33 class AnnotatingParser {
34 public:
35   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
36                    const AdditionalKeywords &Keywords)
37       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
38         Keywords(Keywords) {
39     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
40     resetTokenMetadata(CurrentToken);
41   }
42 
43 private:
44   bool parseAngle() {
45     if (!CurrentToken || !CurrentToken->Previous)
46       return false;
47     if (NonTemplateLess.count(CurrentToken->Previous))
48       return false;
49 
50     const FormatToken& Previous = *CurrentToken->Previous;
51     if (Previous.Previous) {
52       if (Previous.Previous->Tok.isLiteral())
53         return false;
54       if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
55           (!Previous.Previous->MatchingParen ||
56            !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
57         return false;
58     }
59 
60     FormatToken *Left = CurrentToken->Previous;
61     Left->ParentBracket = Contexts.back().ContextKind;
62     ScopedContextCreator ContextCreator(*this, tok::less, 12);
63 
64     // If this angle is in the context of an expression, we need to be more
65     // hesitant to detect it as opening template parameters.
66     bool InExprContext = Contexts.back().IsExpression;
67 
68     Contexts.back().IsExpression = false;
69     // If there's a template keyword before the opening angle bracket, this is a
70     // template parameter, not an argument.
71     Contexts.back().InTemplateArgument =
72         Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
73 
74     if (Style.Language == FormatStyle::LK_Java &&
75         CurrentToken->is(tok::question))
76       next();
77 
78     while (CurrentToken) {
79       if (CurrentToken->is(tok::greater)) {
80         Left->MatchingParen = CurrentToken;
81         CurrentToken->MatchingParen = Left;
82         CurrentToken->Type = TT_TemplateCloser;
83         next();
84         return true;
85       }
86       if (CurrentToken->is(tok::question) &&
87           Style.Language == FormatStyle::LK_Java) {
88         next();
89         continue;
90       }
91       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
92           (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
93         return false;
94       // If a && or || is found and interpreted as a binary operator, this set
95       // of angles is likely part of something like "a < b && c > d". If the
96       // angles are inside an expression, the ||/&& might also be a binary
97       // operator that was misinterpreted because we are parsing template
98       // parameters.
99       // FIXME: This is getting out of hand, write a decent parser.
100       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
101           CurrentToken->Previous->is(TT_BinaryOperator) &&
102           Contexts[Contexts.size() - 2].IsExpression &&
103           !Line.startsWith(tok::kw_template))
104         return false;
105       updateParameterCount(Left, CurrentToken);
106       if (!consumeToken())
107         return false;
108     }
109     return false;
110   }
111 
112   bool parseParens(bool LookForDecls = false) {
113     if (!CurrentToken)
114       return false;
115     FormatToken *Left = CurrentToken->Previous;
116     Left->ParentBracket = Contexts.back().ContextKind;
117     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
118 
119     // FIXME: This is a bit of a hack. Do better.
120     Contexts.back().ColonIsForRangeExpr =
121         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
122 
123     bool StartsObjCMethodExpr = false;
124     if (CurrentToken->is(tok::caret)) {
125       // (^ can start a block type.
126       Left->Type = TT_ObjCBlockLParen;
127     } else if (FormatToken *MaybeSel = Left->Previous) {
128       // @selector( starts a selector.
129       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
130           MaybeSel->Previous->is(tok::at)) {
131         StartsObjCMethodExpr = true;
132       }
133     }
134 
135     if (Left->is(TT_OverloadedOperatorLParen)) {
136       Contexts.back().IsExpression = false;
137     } else if (Left->Previous &&
138         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
139                                  tok::kw_if, tok::kw_while, tok::l_paren,
140                                  tok::comma) ||
141          Left->Previous->is(TT_BinaryOperator))) {
142       // static_assert, if and while usually contain expressions.
143       Contexts.back().IsExpression = true;
144     } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
145                Left->Previous->is(Keywords.kw_function)) {
146       Contexts.back().IsExpression = false;
147     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
148                Left->Previous->MatchingParen &&
149                Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
150       // This is a parameter list of a lambda expression.
151       Contexts.back().IsExpression = false;
152     } else if (Line.InPPDirective &&
153                (!Left->Previous || !Left->Previous->is(tok::identifier))) {
154       Contexts.back().IsExpression = true;
155     } else if (Contexts[Contexts.size() - 2].CaretFound) {
156       // This is the parameter list of an ObjC block.
157       Contexts.back().IsExpression = false;
158     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
159       Left->Type = TT_AttributeParen;
160     } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
161       // The first argument to a foreach macro is a declaration.
162       Contexts.back().IsForEachMacro = true;
163       Contexts.back().IsExpression = false;
164     } else if (Left->Previous && Left->Previous->MatchingParen &&
165                Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
166       Contexts.back().IsExpression = false;
167     } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
168       bool IsForOrCatch =
169           Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
170       Contexts.back().IsExpression = !IsForOrCatch;
171     }
172 
173     if (StartsObjCMethodExpr) {
174       Contexts.back().ColonIsObjCMethodExpr = true;
175       Left->Type = TT_ObjCMethodExpr;
176     }
177 
178     bool MightBeFunctionType = CurrentToken->isOneOf(tok::star, tok::amp) &&
179                                !Contexts[Contexts.size() - 2].IsExpression;
180     bool HasMultipleLines = false;
181     bool HasMultipleParametersOnALine = false;
182     bool MightBeObjCForRangeLoop =
183         Left->Previous && Left->Previous->is(tok::kw_for);
184     while (CurrentToken) {
185       // LookForDecls is set when "if (" has been seen. Check for
186       // 'identifier' '*' 'identifier' followed by not '=' -- this
187       // '*' has to be a binary operator but determineStarAmpUsage() will
188       // categorize it as an unary operator, so set the right type here.
189       if (LookForDecls && CurrentToken->Next) {
190         FormatToken *Prev = CurrentToken->getPreviousNonComment();
191         if (Prev) {
192           FormatToken *PrevPrev = Prev->getPreviousNonComment();
193           FormatToken *Next = CurrentToken->Next;
194           if (PrevPrev && PrevPrev->is(tok::identifier) &&
195               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
196               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
197             Prev->Type = TT_BinaryOperator;
198             LookForDecls = false;
199           }
200         }
201       }
202 
203       if (CurrentToken->Previous->is(TT_PointerOrReference) &&
204           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
205                                                     tok::coloncolon))
206         MightBeFunctionType = true;
207       if (CurrentToken->Previous->is(TT_BinaryOperator))
208         Contexts.back().IsExpression = true;
209       if (CurrentToken->is(tok::r_paren)) {
210         if (MightBeFunctionType && CurrentToken->Next &&
211             (CurrentToken->Next->is(tok::l_paren) ||
212              (CurrentToken->Next->is(tok::l_square) &&
213               Line.MustBeDeclaration)))
214           Left->Type = TT_FunctionTypeLParen;
215         Left->MatchingParen = CurrentToken;
216         CurrentToken->MatchingParen = Left;
217 
218         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
219             Left->Previous && Left->Previous->is(tok::l_paren)) {
220           // Detect the case where macros are used to generate lambdas or
221           // function bodies, e.g.:
222           //   auto my_lambda = MARCO((Type *type, int i) { .. body .. });
223           for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
224             if (Tok->is(TT_BinaryOperator) &&
225                 Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
226               Tok->Type = TT_PointerOrReference;
227           }
228         }
229 
230         if (StartsObjCMethodExpr) {
231           CurrentToken->Type = TT_ObjCMethodExpr;
232           if (Contexts.back().FirstObjCSelectorName) {
233             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
234                 Contexts.back().LongestObjCSelectorName;
235           }
236         }
237 
238         if (Left->is(TT_AttributeParen))
239           CurrentToken->Type = TT_AttributeParen;
240         if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
241           CurrentToken->Type = TT_JavaAnnotation;
242         if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
243           CurrentToken->Type = TT_LeadingJavaAnnotation;
244 
245         if (!HasMultipleLines)
246           Left->PackingKind = PPK_Inconclusive;
247         else if (HasMultipleParametersOnALine)
248           Left->PackingKind = PPK_BinPacked;
249         else
250           Left->PackingKind = PPK_OnePerLine;
251 
252         next();
253         return true;
254       }
255       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
256         return false;
257 
258       if (CurrentToken->is(tok::l_brace))
259         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
260       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
261           !CurrentToken->Next->HasUnescapedNewline &&
262           !CurrentToken->Next->isTrailingComment())
263         HasMultipleParametersOnALine = true;
264       if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
265           CurrentToken->isSimpleTypeSpecifier())
266         Contexts.back().IsExpression = false;
267       if (CurrentToken->isOneOf(tok::semi, tok::colon))
268         MightBeObjCForRangeLoop = false;
269       if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
270         CurrentToken->Type = TT_ObjCForIn;
271       // When we discover a 'new', we set CanBeExpression to 'false' in order to
272       // parse the type correctly. Reset that after a comma.
273       if (CurrentToken->is(tok::comma))
274         Contexts.back().CanBeExpression = true;
275 
276       FormatToken *Tok = CurrentToken;
277       if (!consumeToken())
278         return false;
279       updateParameterCount(Left, Tok);
280       if (CurrentToken && CurrentToken->HasUnescapedNewline)
281         HasMultipleLines = true;
282     }
283     return false;
284   }
285 
286   bool parseSquare() {
287     if (!CurrentToken)
288       return false;
289 
290     // A '[' could be an index subscript (after an identifier or after
291     // ')' or ']'), it could be the start of an Objective-C method
292     // expression, or it could the start of an Objective-C array literal.
293     FormatToken *Left = CurrentToken->Previous;
294     Left->ParentBracket = Contexts.back().ContextKind;
295     FormatToken *Parent = Left->getPreviousNonComment();
296     bool StartsObjCMethodExpr =
297         Style.Language == FormatStyle::LK_Cpp &&
298         Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
299         CurrentToken->isNot(tok::l_brace) &&
300         (!Parent ||
301          Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
302                          tok::kw_return, tok::kw_throw) ||
303          Parent->isUnaryOperator() ||
304          Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
305          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
306     bool ColonFound = false;
307 
308     unsigned BindingIncrease = 1;
309     if (Left->is(TT_Unknown)) {
310       if (StartsObjCMethodExpr) {
311         Left->Type = TT_ObjCMethodExpr;
312       } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
313                  Contexts.back().ContextKind == tok::l_brace &&
314                  Parent->isOneOf(tok::l_brace, tok::comma)) {
315         Left->Type = TT_JsComputedPropertyName;
316       } else if (Style.Language == FormatStyle::LK_Proto ||
317                  (Parent &&
318                   Parent->isOneOf(TT_BinaryOperator, tok::at, tok::comma,
319                                   tok::l_paren, tok::l_square, tok::question,
320                                   tok::colon, tok::kw_return,
321                                   // Should only be relevant to JavaScript:
322                                   tok::kw_default))) {
323         Left->Type = TT_ArrayInitializerLSquare;
324       } else {
325         BindingIncrease = 10;
326         Left->Type = TT_ArraySubscriptLSquare;
327       }
328     }
329 
330     ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
331     Contexts.back().IsExpression = true;
332     Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
333 
334     while (CurrentToken) {
335       if (CurrentToken->is(tok::r_square)) {
336         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
337             Left->is(TT_ObjCMethodExpr)) {
338           // An ObjC method call is rarely followed by an open parenthesis.
339           // FIXME: Do we incorrectly label ":" with this?
340           StartsObjCMethodExpr = false;
341           Left->Type = TT_Unknown;
342         }
343         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
344           CurrentToken->Type = TT_ObjCMethodExpr;
345           // determineStarAmpUsage() thinks that '*' '[' is allocating an
346           // array of pointers, but if '[' starts a selector then '*' is a
347           // binary operator.
348           if (Parent && Parent->is(TT_PointerOrReference))
349             Parent->Type = TT_BinaryOperator;
350         }
351         Left->MatchingParen = CurrentToken;
352         CurrentToken->MatchingParen = Left;
353         if (Contexts.back().FirstObjCSelectorName) {
354           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
355               Contexts.back().LongestObjCSelectorName;
356           if (Left->BlockParameterCount > 1)
357             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
358         }
359         next();
360         return true;
361       }
362       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
363         return false;
364       if (CurrentToken->is(tok::colon)) {
365         if (Left->is(TT_ArraySubscriptLSquare)) {
366           Left->Type = TT_ObjCMethodExpr;
367           StartsObjCMethodExpr = true;
368           Contexts.back().ColonIsObjCMethodExpr = true;
369           if (Parent && Parent->is(tok::r_paren))
370             Parent->Type = TT_CastRParen;
371         }
372         ColonFound = true;
373       }
374       if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
375           !ColonFound)
376         Left->Type = TT_ArrayInitializerLSquare;
377       FormatToken *Tok = CurrentToken;
378       if (!consumeToken())
379         return false;
380       updateParameterCount(Left, Tok);
381     }
382     return false;
383   }
384 
385   bool parseBrace() {
386     if (CurrentToken) {
387       FormatToken *Left = CurrentToken->Previous;
388       Left->ParentBracket = Contexts.back().ContextKind;
389 
390       if (Contexts.back().CaretFound)
391         Left->Type = TT_ObjCBlockLBrace;
392       Contexts.back().CaretFound = false;
393 
394       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
395       Contexts.back().ColonIsDictLiteral = true;
396       if (Left->BlockKind == BK_BracedInit)
397         Contexts.back().IsExpression = true;
398 
399       while (CurrentToken) {
400         if (CurrentToken->is(tok::r_brace)) {
401           Left->MatchingParen = CurrentToken;
402           CurrentToken->MatchingParen = Left;
403           next();
404           return true;
405         }
406         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
407           return false;
408         updateParameterCount(Left, CurrentToken);
409         if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
410           FormatToken *Previous = CurrentToken->getPreviousNonComment();
411           if (((CurrentToken->is(tok::colon) &&
412                 (!Contexts.back().ColonIsDictLiteral ||
413                  Style.Language != FormatStyle::LK_Cpp)) ||
414                Style.Language == FormatStyle::LK_Proto) &&
415               (Previous->Tok.getIdentifierInfo() ||
416                Previous->is(tok::string_literal)))
417             Previous->Type = TT_SelectorName;
418           if (CurrentToken->is(tok::colon) ||
419               Style.Language == FormatStyle::LK_JavaScript)
420             Left->Type = TT_DictLiteral;
421         }
422         if (!consumeToken())
423           return false;
424       }
425     }
426     return true;
427   }
428 
429   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
430     if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
431       ++Left->BlockParameterCount;
432     if (Current->is(tok::comma)) {
433       ++Left->ParameterCount;
434       if (!Left->Role)
435         Left->Role.reset(new CommaSeparatedList(Style));
436       Left->Role->CommaFound(Current);
437     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
438       Left->ParameterCount = 1;
439     }
440   }
441 
442   bool parseConditional() {
443     while (CurrentToken) {
444       if (CurrentToken->is(tok::colon)) {
445         CurrentToken->Type = TT_ConditionalExpr;
446         next();
447         return true;
448       }
449       if (!consumeToken())
450         return false;
451     }
452     return false;
453   }
454 
455   bool parseTemplateDeclaration() {
456     if (CurrentToken && CurrentToken->is(tok::less)) {
457       CurrentToken->Type = TT_TemplateOpener;
458       next();
459       if (!parseAngle())
460         return false;
461       if (CurrentToken)
462         CurrentToken->Previous->ClosesTemplateDeclaration = true;
463       return true;
464     }
465     return false;
466   }
467 
468   bool consumeToken() {
469     FormatToken *Tok = CurrentToken;
470     next();
471     switch (Tok->Tok.getKind()) {
472     case tok::plus:
473     case tok::minus:
474       if (!Tok->Previous && Line.MustBeDeclaration)
475         Tok->Type = TT_ObjCMethodSpecifier;
476       break;
477     case tok::colon:
478       if (!Tok->Previous)
479         return false;
480       // Colons from ?: are handled in parseConditional().
481       if (Style.Language == FormatStyle::LK_JavaScript) {
482         if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
483             (Contexts.size() == 1 &&               // switch/case labels
484              !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
485             Contexts.back().ContextKind == tok::l_paren ||  // function params
486             Contexts.back().ContextKind == tok::l_square || // array type
487             (Contexts.size() == 1 &&
488              Line.MustBeDeclaration)) { // method/property declaration
489           Tok->Type = TT_JsTypeColon;
490           break;
491         }
492       }
493       if (Contexts.back().ColonIsDictLiteral ||
494           Style.Language == FormatStyle::LK_Proto) {
495         Tok->Type = TT_DictLiteral;
496       } else if (Contexts.back().ColonIsObjCMethodExpr ||
497                  Line.startsWith(TT_ObjCMethodSpecifier)) {
498         Tok->Type = TT_ObjCMethodExpr;
499         Tok->Previous->Type = TT_SelectorName;
500         if (Tok->Previous->ColumnWidth >
501             Contexts.back().LongestObjCSelectorName)
502           Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
503         if (!Contexts.back().FirstObjCSelectorName)
504           Contexts.back().FirstObjCSelectorName = Tok->Previous;
505       } else if (Contexts.back().ColonIsForRangeExpr) {
506         Tok->Type = TT_RangeBasedForLoopColon;
507       } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
508         Tok->Type = TT_BitFieldColon;
509       } else if (Contexts.size() == 1 &&
510                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
511         if (Tok->Previous->isOneOf(tok::r_paren, tok::kw_noexcept))
512           Tok->Type = TT_CtorInitializerColon;
513         else
514           Tok->Type = TT_InheritanceColon;
515       } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
516                  Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
517         // This handles a special macro in ObjC code where selectors including
518         // the colon are passed as macro arguments.
519         Tok->Type = TT_ObjCMethodExpr;
520       } else if (Contexts.back().ContextKind == tok::l_paren) {
521         Tok->Type = TT_InlineASMColon;
522       }
523       break;
524     case tok::pipe:
525     case tok::amp:
526       // | and & in declarations/type expressions represent union and
527       // intersection types, respectively.
528       if (Style.Language == FormatStyle::LK_JavaScript &&
529           !Contexts.back().IsExpression)
530         Tok->Type = TT_JsTypeOperator;
531       break;
532     case tok::kw_if:
533     case tok::kw_while:
534       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
535         next();
536         if (!parseParens(/*LookForDecls=*/true))
537           return false;
538       }
539       break;
540     case tok::kw_for:
541       if (Style.Language == FormatStyle::LK_JavaScript && Tok->Previous &&
542           Tok->Previous->is(tok::period))
543         break;
544       Contexts.back().ColonIsForRangeExpr = true;
545       next();
546       if (!parseParens())
547         return false;
548       break;
549     case tok::l_paren:
550       // When faced with 'operator()()', the kw_operator handler incorrectly
551       // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
552       // the first two parens OverloadedOperators and the second l_paren an
553       // OverloadedOperatorLParen.
554       if (Tok->Previous &&
555           Tok->Previous->is(tok::r_paren) &&
556           Tok->Previous->MatchingParen &&
557           Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
558         Tok->Previous->Type = TT_OverloadedOperator;
559         Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
560         Tok->Type = TT_OverloadedOperatorLParen;
561       }
562 
563       if (!parseParens())
564         return false;
565       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
566           !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
567           (!Tok->Previous ||
568            !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
569                                    TT_LeadingJavaAnnotation)))
570         Line.MightBeFunctionDecl = true;
571       break;
572     case tok::l_square:
573       if (!parseSquare())
574         return false;
575       break;
576     case tok::l_brace:
577       if (!parseBrace())
578         return false;
579       break;
580     case tok::less:
581       if (parseAngle()) {
582         Tok->Type = TT_TemplateOpener;
583       } else {
584         Tok->Type = TT_BinaryOperator;
585         NonTemplateLess.insert(Tok);
586         CurrentToken = Tok;
587         next();
588       }
589       break;
590     case tok::r_paren:
591     case tok::r_square:
592       return false;
593     case tok::r_brace:
594       // Lines can start with '}'.
595       if (Tok->Previous)
596         return false;
597       break;
598     case tok::greater:
599       Tok->Type = TT_BinaryOperator;
600       break;
601     case tok::kw_operator:
602       while (CurrentToken &&
603              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
604         if (CurrentToken->isOneOf(tok::star, tok::amp))
605           CurrentToken->Type = TT_PointerOrReference;
606         consumeToken();
607         if (CurrentToken &&
608             CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
609           CurrentToken->Previous->Type = TT_OverloadedOperator;
610       }
611       if (CurrentToken) {
612         CurrentToken->Type = TT_OverloadedOperatorLParen;
613         if (CurrentToken->Previous->is(TT_BinaryOperator))
614           CurrentToken->Previous->Type = TT_OverloadedOperator;
615       }
616       break;
617     case tok::question:
618       if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
619           Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
620                              tok::r_brace)) {
621         // Question marks before semicolons, colons, etc. indicate optional
622         // types (fields, parameters), e.g.
623         //   function(x?: string, y?) {...}
624         //   class X { y?; }
625         Tok->Type = TT_JsTypeOptionalQuestion;
626         break;
627       }
628       // Declarations cannot be conditional expressions, this can only be part
629       // of a type declaration.
630       if (Line.MustBeDeclaration &&
631           Style.Language == FormatStyle::LK_JavaScript)
632         break;
633       parseConditional();
634       break;
635     case tok::kw_template:
636       parseTemplateDeclaration();
637       break;
638     case tok::comma:
639       if (Contexts.back().InCtorInitializer)
640         Tok->Type = TT_CtorInitializerComma;
641       else if (Contexts.back().FirstStartOfName &&
642                (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
643         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
644         Line.IsMultiVariableDeclStmt = true;
645       }
646       if (Contexts.back().IsForEachMacro)
647         Contexts.back().IsExpression = true;
648       break;
649     default:
650       break;
651     }
652     return true;
653   }
654 
655   void parseIncludeDirective() {
656     if (CurrentToken && CurrentToken->is(tok::less)) {
657       next();
658       while (CurrentToken) {
659         if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
660           CurrentToken->Type = TT_ImplicitStringLiteral;
661         next();
662       }
663     }
664   }
665 
666   void parseWarningOrError() {
667     next();
668     // We still want to format the whitespace left of the first token of the
669     // warning or error.
670     next();
671     while (CurrentToken) {
672       CurrentToken->Type = TT_ImplicitStringLiteral;
673       next();
674     }
675   }
676 
677   void parsePragma() {
678     next(); // Consume "pragma".
679     if (CurrentToken &&
680         CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
681       bool IsMark = CurrentToken->is(Keywords.kw_mark);
682       next(); // Consume "mark".
683       next(); // Consume first token (so we fix leading whitespace).
684       while (CurrentToken) {
685         if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
686           CurrentToken->Type = TT_ImplicitStringLiteral;
687         next();
688       }
689     }
690   }
691 
692   LineType parsePreprocessorDirective() {
693     LineType Type = LT_PreprocessorDirective;
694     next();
695     if (!CurrentToken)
696       return Type;
697     if (CurrentToken->Tok.is(tok::numeric_constant)) {
698       CurrentToken->SpacesRequiredBefore = 1;
699       return Type;
700     }
701     // Hashes in the middle of a line can lead to any strange token
702     // sequence.
703     if (!CurrentToken->Tok.getIdentifierInfo())
704       return Type;
705     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
706     case tok::pp_include:
707     case tok::pp_include_next:
708     case tok::pp_import:
709       next();
710       parseIncludeDirective();
711       Type = LT_ImportStatement;
712       break;
713     case tok::pp_error:
714     case tok::pp_warning:
715       parseWarningOrError();
716       break;
717     case tok::pp_pragma:
718       parsePragma();
719       break;
720     case tok::pp_if:
721     case tok::pp_elif:
722       Contexts.back().IsExpression = true;
723       parseLine();
724       break;
725     default:
726       break;
727     }
728     while (CurrentToken)
729       next();
730     return Type;
731   }
732 
733 public:
734   LineType parseLine() {
735     NonTemplateLess.clear();
736     if (CurrentToken->is(tok::hash))
737       return parsePreprocessorDirective();
738 
739     // Directly allow to 'import <string-literal>' to support protocol buffer
740     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
741     // should not break the line).
742     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
743     if ((Style.Language == FormatStyle::LK_Java &&
744          CurrentToken->is(Keywords.kw_package)) ||
745         (Info && Info->getPPKeywordID() == tok::pp_import &&
746          CurrentToken->Next &&
747          CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
748                                      tok::kw_static))) {
749       next();
750       parseIncludeDirective();
751       return LT_ImportStatement;
752     }
753 
754     // If this line starts and ends in '<' and '>', respectively, it is likely
755     // part of "#define <a/b.h>".
756     if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
757       parseIncludeDirective();
758       return LT_ImportStatement;
759     }
760 
761     // In .proto files, top-level options are very similar to import statements
762     // and should not be line-wrapped.
763     if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
764         CurrentToken->is(Keywords.kw_option)) {
765       next();
766       if (CurrentToken && CurrentToken->is(tok::identifier))
767         return LT_ImportStatement;
768     }
769 
770     // import {...} from '...';
771     if (Style.Language == FormatStyle::LK_JavaScript &&
772         CurrentToken->is(Keywords.kw_import))
773       return LT_ImportStatement;
774 
775     bool KeywordVirtualFound = false;
776     bool ImportStatement = false;
777     while (CurrentToken) {
778       if (CurrentToken->is(tok::kw_virtual))
779         KeywordVirtualFound = true;
780       if (Style.Language == FormatStyle::LK_JavaScript) {
781         // export {...} from '...';
782         // An export followed by "from 'some string';" is a re-export from
783         // another module identified by a URI and is treated as a
784         // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
785         // Just "export {...};" or "export class ..." should not be treated as
786         // an import in this sense.
787         if (Line.First->is(tok::kw_export) &&
788             CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
789             CurrentToken->Next->isStringLiteral())
790           ImportStatement = true;
791         if (isClosureImportStatement(*CurrentToken))
792           ImportStatement = true;
793       }
794       if (!consumeToken())
795         return LT_Invalid;
796     }
797     if (KeywordVirtualFound)
798       return LT_VirtualFunctionDecl;
799     if (ImportStatement)
800       return LT_ImportStatement;
801 
802     if (Line.startsWith(TT_ObjCMethodSpecifier)) {
803       if (Contexts.back().FirstObjCSelectorName)
804         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
805             Contexts.back().LongestObjCSelectorName;
806       return LT_ObjCMethodDecl;
807     }
808 
809     return LT_Other;
810   }
811 
812 private:
813   bool isClosureImportStatement(const FormatToken &Tok) {
814     // FIXME: Closure-library specific stuff should not be hard-coded but be
815     // configurable.
816     return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
817            Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
818                               Tok.Next->Next->TokenText == "provide" ||
819                               Tok.Next->Next->TokenText == "require" ||
820                               Tok.Next->Next->TokenText == "setTestOnly" ||
821                               Tok.Next->Next->TokenText == "forwardDeclare") &&
822            Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
823   }
824 
825   void resetTokenMetadata(FormatToken *Token) {
826     if (!Token)
827       return;
828 
829     // Reset token type in case we have already looked at it and then
830     // recovered from an error (e.g. failure to find the matching >).
831     if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
832                                TT_FunctionLBrace, TT_ImplicitStringLiteral,
833                                TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
834                                TT_RegexLiteral))
835       CurrentToken->Type = TT_Unknown;
836     CurrentToken->Role.reset();
837     CurrentToken->MatchingParen = nullptr;
838     CurrentToken->FakeLParens.clear();
839     CurrentToken->FakeRParens = 0;
840   }
841 
842   void next() {
843     if (CurrentToken) {
844       CurrentToken->NestingLevel = Contexts.size() - 1;
845       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
846       modifyContext(*CurrentToken);
847       determineTokenType(*CurrentToken);
848       CurrentToken = CurrentToken->Next;
849     }
850 
851     resetTokenMetadata(CurrentToken);
852   }
853 
854   /// \brief A struct to hold information valid in a specific context, e.g.
855   /// a pair of parenthesis.
856   struct Context {
857     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
858             bool IsExpression)
859         : ContextKind(ContextKind), BindingStrength(BindingStrength),
860           IsExpression(IsExpression) {}
861 
862     tok::TokenKind ContextKind;
863     unsigned BindingStrength;
864     bool IsExpression;
865     unsigned LongestObjCSelectorName = 0;
866     bool ColonIsForRangeExpr = false;
867     bool ColonIsDictLiteral = false;
868     bool ColonIsObjCMethodExpr = false;
869     FormatToken *FirstObjCSelectorName = nullptr;
870     FormatToken *FirstStartOfName = nullptr;
871     bool CanBeExpression = true;
872     bool InTemplateArgument = false;
873     bool InCtorInitializer = false;
874     bool CaretFound = false;
875     bool IsForEachMacro = false;
876   };
877 
878   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
879   /// of each instance.
880   struct ScopedContextCreator {
881     AnnotatingParser &P;
882 
883     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
884                          unsigned Increase)
885         : P(P) {
886       P.Contexts.push_back(Context(ContextKind,
887                                    P.Contexts.back().BindingStrength + Increase,
888                                    P.Contexts.back().IsExpression));
889     }
890 
891     ~ScopedContextCreator() { P.Contexts.pop_back(); }
892   };
893 
894   void modifyContext(const FormatToken &Current) {
895     if (Current.getPrecedence() == prec::Assignment &&
896         !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
897         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
898       Contexts.back().IsExpression = true;
899       if (!Line.startsWith(TT_UnaryOperator)) {
900         for (FormatToken *Previous = Current.Previous;
901              Previous && Previous->Previous &&
902              !Previous->Previous->isOneOf(tok::comma, tok::semi);
903              Previous = Previous->Previous) {
904           if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
905             Previous = Previous->MatchingParen;
906             if (!Previous)
907               break;
908           }
909           if (Previous->opensScope())
910             break;
911           if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
912               Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
913               Previous->Previous && Previous->Previous->isNot(tok::equal))
914             Previous->Type = TT_PointerOrReference;
915         }
916       }
917     } else if (Current.is(tok::lessless) &&
918                (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
919       Contexts.back().IsExpression = true;
920     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
921       Contexts.back().IsExpression = true;
922     } else if (Current.is(TT_TrailingReturnArrow)) {
923       Contexts.back().IsExpression = false;
924     } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
925       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
926     } else if (Current.Previous &&
927                Current.Previous->is(TT_CtorInitializerColon)) {
928       Contexts.back().IsExpression = true;
929       Contexts.back().InCtorInitializer = true;
930     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
931       for (FormatToken *Previous = Current.Previous;
932            Previous && Previous->isOneOf(tok::star, tok::amp);
933            Previous = Previous->Previous)
934         Previous->Type = TT_PointerOrReference;
935       if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
936         Contexts.back().IsExpression = false;
937     } else if (Current.is(tok::kw_new)) {
938       Contexts.back().CanBeExpression = false;
939     } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
940       // This should be the condition or increment in a for-loop.
941       Contexts.back().IsExpression = true;
942     }
943   }
944 
945   void determineTokenType(FormatToken &Current) {
946     if (!Current.is(TT_Unknown))
947       // The token type is already known.
948       return;
949 
950     // Line.MightBeFunctionDecl can only be true after the parentheses of a
951     // function declaration have been found. In this case, 'Current' is a
952     // trailing token of this declaration and thus cannot be a name.
953     if (Current.is(Keywords.kw_instanceof)) {
954       Current.Type = TT_BinaryOperator;
955     } else if (isStartOfName(Current) &&
956                (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
957       Contexts.back().FirstStartOfName = &Current;
958       Current.Type = TT_StartOfName;
959     } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
960       AutoFound = true;
961     } else if (Current.is(tok::arrow) &&
962                Style.Language == FormatStyle::LK_Java) {
963       Current.Type = TT_LambdaArrow;
964     } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
965                Current.NestingLevel == 0) {
966       Current.Type = TT_TrailingReturnArrow;
967     } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
968       Current.Type =
969           determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
970                                              Contexts.back().IsExpression,
971                                 Contexts.back().InTemplateArgument);
972     } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
973       Current.Type = determinePlusMinusCaretUsage(Current);
974       if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
975         Contexts.back().CaretFound = true;
976     } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
977       Current.Type = determineIncrementUsage(Current);
978     } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
979       Current.Type = TT_UnaryOperator;
980     } else if (Current.is(tok::question)) {
981       if (Style.Language == FormatStyle::LK_JavaScript &&
982           Line.MustBeDeclaration) {
983         // In JavaScript, `interface X { foo?(): bar; }` is an optional method
984         // on the interface, not a ternary expression.
985         Current.Type = TT_JsTypeOptionalQuestion;
986       } else {
987         Current.Type = TT_ConditionalExpr;
988       }
989     } else if (Current.isBinaryOperator() &&
990                (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
991       Current.Type = TT_BinaryOperator;
992     } else if (Current.is(tok::comment)) {
993       if (Current.TokenText.startswith("/*")) {
994         if (Current.TokenText.endswith("*/"))
995           Current.Type = TT_BlockComment;
996         else
997           // The lexer has for some reason determined a comment here. But we
998           // cannot really handle it, if it isn't properly terminated.
999           Current.Tok.setKind(tok::unknown);
1000       } else {
1001         Current.Type = TT_LineComment;
1002       }
1003     } else if (Current.is(tok::r_paren)) {
1004       if (rParenEndsCast(Current))
1005         Current.Type = TT_CastRParen;
1006       if (Current.MatchingParen && Current.Next &&
1007           !Current.Next->isBinaryOperator() &&
1008           !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1009                                  tok::period, tok::arrow, tok::coloncolon))
1010         if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1011           if (BeforeParen->is(tok::identifier) &&
1012               BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1013               (!BeforeParen->Previous ||
1014                BeforeParen->Previous->ClosesTemplateDeclaration))
1015             Current.Type = TT_FunctionAnnotationRParen;
1016     } else if (Current.is(tok::at) && Current.Next) {
1017       if (Current.Next->isStringLiteral()) {
1018         Current.Type = TT_ObjCStringLiteral;
1019       } else {
1020         switch (Current.Next->Tok.getObjCKeywordID()) {
1021         case tok::objc_interface:
1022         case tok::objc_implementation:
1023         case tok::objc_protocol:
1024           Current.Type = TT_ObjCDecl;
1025           break;
1026         case tok::objc_property:
1027           Current.Type = TT_ObjCProperty;
1028           break;
1029         default:
1030           break;
1031         }
1032       }
1033     } else if (Current.is(tok::period)) {
1034       FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1035       if (PreviousNoComment &&
1036           PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1037         Current.Type = TT_DesignatedInitializerPeriod;
1038       else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1039                Current.Previous->isOneOf(TT_JavaAnnotation,
1040                                          TT_LeadingJavaAnnotation)) {
1041         Current.Type = Current.Previous->Type;
1042       }
1043     } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1044                Current.Previous &&
1045                !Current.Previous->isOneOf(tok::equal, tok::at) &&
1046                Line.MightBeFunctionDecl && Contexts.size() == 1) {
1047       // Line.MightBeFunctionDecl can only be true after the parentheses of a
1048       // function declaration have been found.
1049       Current.Type = TT_TrailingAnnotation;
1050     } else if ((Style.Language == FormatStyle::LK_Java ||
1051                 Style.Language == FormatStyle::LK_JavaScript) &&
1052                Current.Previous) {
1053       if (Current.Previous->is(tok::at) &&
1054           Current.isNot(Keywords.kw_interface)) {
1055         const FormatToken &AtToken = *Current.Previous;
1056         const FormatToken *Previous = AtToken.getPreviousNonComment();
1057         if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1058           Current.Type = TT_LeadingJavaAnnotation;
1059         else
1060           Current.Type = TT_JavaAnnotation;
1061       } else if (Current.Previous->is(tok::period) &&
1062                  Current.Previous->isOneOf(TT_JavaAnnotation,
1063                                            TT_LeadingJavaAnnotation)) {
1064         Current.Type = Current.Previous->Type;
1065       }
1066     }
1067   }
1068 
1069   /// \brief Take a guess at whether \p Tok starts a name of a function or
1070   /// variable declaration.
1071   ///
1072   /// This is a heuristic based on whether \p Tok is an identifier following
1073   /// something that is likely a type.
1074   bool isStartOfName(const FormatToken &Tok) {
1075     if (Tok.isNot(tok::identifier) || !Tok.Previous)
1076       return false;
1077 
1078     if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof))
1079       return false;
1080     if (Style.Language == FormatStyle::LK_JavaScript &&
1081         Tok.Previous->is(Keywords.kw_in))
1082       return false;
1083 
1084     // Skip "const" as it does not have an influence on whether this is a name.
1085     FormatToken *PreviousNotConst = Tok.Previous;
1086     while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1087       PreviousNotConst = PreviousNotConst->Previous;
1088 
1089     if (!PreviousNotConst)
1090       return false;
1091 
1092     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1093                        PreviousNotConst->Previous &&
1094                        PreviousNotConst->Previous->is(tok::hash);
1095 
1096     if (PreviousNotConst->is(TT_TemplateCloser))
1097       return PreviousNotConst && PreviousNotConst->MatchingParen &&
1098              PreviousNotConst->MatchingParen->Previous &&
1099              PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1100              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1101 
1102     if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1103         PreviousNotConst->MatchingParen->Previous &&
1104         PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1105       return true;
1106 
1107     return (!IsPPKeyword &&
1108             PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1109            PreviousNotConst->is(TT_PointerOrReference) ||
1110            PreviousNotConst->isSimpleTypeSpecifier();
1111   }
1112 
1113   /// \brief Determine whether ')' is ending a cast.
1114   bool rParenEndsCast(const FormatToken &Tok) {
1115     // C-style casts are only used in C++ and Java.
1116     if (Style.Language != FormatStyle::LK_Cpp &&
1117         Style.Language != FormatStyle::LK_Java)
1118       return false;
1119 
1120     // Empty parens aren't casts and there are no casts at the end of the line.
1121     if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1122       return false;
1123 
1124     FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1125     if (LeftOfParens) {
1126       // If there is an opening parenthesis left of the current parentheses,
1127       // look past it as these might be chained casts.
1128       if (LeftOfParens->is(tok::r_paren)) {
1129         if (!LeftOfParens->MatchingParen ||
1130             !LeftOfParens->MatchingParen->Previous)
1131           return false;
1132         LeftOfParens = LeftOfParens->MatchingParen->Previous;
1133       }
1134 
1135       // If there is an identifier (or with a few exceptions a keyword) right
1136       // before the parentheses, this is unlikely to be a cast.
1137       if (LeftOfParens->Tok.getIdentifierInfo() &&
1138           !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1139                                  tok::kw_delete))
1140         return false;
1141 
1142       // Certain other tokens right before the parentheses are also signals that
1143       // this cannot be a cast.
1144       if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1145                                 TT_TemplateCloser))
1146         return false;
1147     }
1148 
1149     if (Tok.Next->is(tok::question))
1150       return false;
1151 
1152     // As Java has no function types, a "(" after the ")" likely means that this
1153     // is a cast.
1154     if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1155       return true;
1156 
1157     // If a (non-string) literal follows, this is likely a cast.
1158     if (Tok.Next->isNot(tok::string_literal) &&
1159         (Tok.Next->Tok.isLiteral() ||
1160          Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1161       return true;
1162 
1163     // Heuristically try to determine whether the parentheses contain a type.
1164     bool ParensAreType =
1165         !Tok.Previous ||
1166         Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1167         Tok.Previous->isSimpleTypeSpecifier();
1168     bool ParensCouldEndDecl =
1169         Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1170     if (ParensAreType && !ParensCouldEndDecl)
1171       return true;
1172 
1173     // At this point, we heuristically assume that there are no casts at the
1174     // start of the line. We assume that we have found most cases where there
1175     // are by the logic above, e.g. "(void)x;".
1176     if (!LeftOfParens)
1177       return false;
1178 
1179     // If the following token is an identifier or 'this', this is a cast. All
1180     // cases where this can be something else are handled above.
1181     if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1182       return true;
1183 
1184     if (!Tok.Next->Next)
1185       return false;
1186 
1187     // If the next token after the parenthesis is a unary operator, assume
1188     // that this is cast, unless there are unexpected tokens inside the
1189     // parenthesis.
1190     bool NextIsUnary =
1191         Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1192     if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1193         !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1194       return false;
1195     // Search for unexpected tokens.
1196     for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1197          Prev = Prev->Previous) {
1198       if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1199         return false;
1200     }
1201     return true;
1202   }
1203 
1204   /// \brief Return the type of the given token assuming it is * or &.
1205   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1206                                   bool InTemplateArgument) {
1207     if (Style.Language == FormatStyle::LK_JavaScript)
1208       return TT_BinaryOperator;
1209 
1210     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1211     if (!PrevToken)
1212       return TT_UnaryOperator;
1213 
1214     const FormatToken *NextToken = Tok.getNextNonComment();
1215     if (!NextToken ||
1216         NextToken->isOneOf(tok::arrow, Keywords.kw_final,
1217                            Keywords.kw_override) ||
1218         (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1219       return TT_PointerOrReference;
1220 
1221     if (PrevToken->is(tok::coloncolon))
1222       return TT_PointerOrReference;
1223 
1224     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1225                            tok::comma, tok::semi, tok::kw_return, tok::colon,
1226                            tok::equal, tok::kw_delete, tok::kw_sizeof) ||
1227         PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1228                            TT_UnaryOperator, TT_CastRParen))
1229       return TT_UnaryOperator;
1230 
1231     if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1232       return TT_PointerOrReference;
1233     if (NextToken->is(tok::kw_operator) && !IsExpression)
1234       return TT_PointerOrReference;
1235     if (NextToken->isOneOf(tok::comma, tok::semi))
1236       return TT_PointerOrReference;
1237 
1238     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
1239         PrevToken->MatchingParen->Previous &&
1240         PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
1241                                                     tok::kw_decltype))
1242       return TT_PointerOrReference;
1243 
1244     if (PrevToken->Tok.isLiteral() ||
1245         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1246                            tok::kw_false, tok::r_brace) ||
1247         NextToken->Tok.isLiteral() ||
1248         NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1249         NextToken->isUnaryOperator() ||
1250         // If we know we're in a template argument, there are no named
1251         // declarations. Thus, having an identifier on the right-hand side
1252         // indicates a binary operator.
1253         (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1254       return TT_BinaryOperator;
1255 
1256     // "&&(" is quite unlikely to be two successive unary "&".
1257     if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1258       return TT_BinaryOperator;
1259 
1260     // This catches some cases where evaluation order is used as control flow:
1261     //   aaa && aaa->f();
1262     const FormatToken *NextNextToken = NextToken->getNextNonComment();
1263     if (NextNextToken && NextNextToken->is(tok::arrow))
1264       return TT_BinaryOperator;
1265 
1266     // It is very unlikely that we are going to find a pointer or reference type
1267     // definition on the RHS of an assignment.
1268     if (IsExpression && !Contexts.back().CaretFound)
1269       return TT_BinaryOperator;
1270 
1271     return TT_PointerOrReference;
1272   }
1273 
1274   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1275     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1276     if (!PrevToken || PrevToken->is(TT_CastRParen))
1277       return TT_UnaryOperator;
1278 
1279     // Use heuristics to recognize unary operators.
1280     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1281                            tok::question, tok::colon, tok::kw_return,
1282                            tok::kw_case, tok::at, tok::l_brace))
1283       return TT_UnaryOperator;
1284 
1285     // There can't be two consecutive binary operators.
1286     if (PrevToken->is(TT_BinaryOperator))
1287       return TT_UnaryOperator;
1288 
1289     // Fall back to marking the token as binary operator.
1290     return TT_BinaryOperator;
1291   }
1292 
1293   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1294   TokenType determineIncrementUsage(const FormatToken &Tok) {
1295     const FormatToken *PrevToken = Tok.getPreviousNonComment();
1296     if (!PrevToken || PrevToken->is(TT_CastRParen))
1297       return TT_UnaryOperator;
1298     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1299       return TT_TrailingUnaryOperator;
1300 
1301     return TT_UnaryOperator;
1302   }
1303 
1304   SmallVector<Context, 8> Contexts;
1305 
1306   const FormatStyle &Style;
1307   AnnotatedLine &Line;
1308   FormatToken *CurrentToken;
1309   bool AutoFound;
1310   const AdditionalKeywords &Keywords;
1311 
1312   // Set of "<" tokens that do not open a template parameter list. If parseAngle
1313   // determines that a specific token can't be a template opener, it will make
1314   // same decision irrespective of the decisions for tokens leading up to it.
1315   // Store this information to prevent this from causing exponential runtime.
1316   llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1317 };
1318 
1319 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1320 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1321 
1322 /// \brief Parses binary expressions by inserting fake parenthesis based on
1323 /// operator precedence.
1324 class ExpressionParser {
1325 public:
1326   ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1327                    AnnotatedLine &Line)
1328       : Style(Style), Keywords(Keywords), Current(Line.First) {}
1329 
1330   /// \brief Parse expressions with the given operatore precedence.
1331   void parse(int Precedence = 0) {
1332     // Skip 'return' and ObjC selector colons as they are not part of a binary
1333     // expression.
1334     while (Current && (Current->is(tok::kw_return) ||
1335                        (Current->is(tok::colon) &&
1336                         Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1337       next();
1338 
1339     if (!Current || Precedence > PrecedenceArrowAndPeriod)
1340       return;
1341 
1342     // Conditional expressions need to be parsed separately for proper nesting.
1343     if (Precedence == prec::Conditional) {
1344       parseConditionalExpr();
1345       return;
1346     }
1347 
1348     // Parse unary operators, which all have a higher precedence than binary
1349     // operators.
1350     if (Precedence == PrecedenceUnaryOperator) {
1351       parseUnaryOperator();
1352       return;
1353     }
1354 
1355     FormatToken *Start = Current;
1356     FormatToken *LatestOperator = nullptr;
1357     unsigned OperatorIndex = 0;
1358 
1359     while (Current) {
1360       // Consume operators with higher precedence.
1361       parse(Precedence + 1);
1362 
1363       int CurrentPrecedence = getCurrentPrecedence();
1364 
1365       if (Current && Current->is(TT_SelectorName) &&
1366           Precedence == CurrentPrecedence) {
1367         if (LatestOperator)
1368           addFakeParenthesis(Start, prec::Level(Precedence));
1369         Start = Current;
1370       }
1371 
1372       // At the end of the line or when an operator with higher precedence is
1373       // found, insert fake parenthesis and return.
1374       if (!Current || (Current->closesScope() && Current->MatchingParen) ||
1375           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1376           (CurrentPrecedence == prec::Conditional &&
1377            Precedence == prec::Assignment && Current->is(tok::colon))) {
1378         break;
1379       }
1380 
1381       // Consume scopes: (), [], <> and {}
1382       if (Current->opensScope()) {
1383         while (Current && !Current->closesScope()) {
1384           next();
1385           parse();
1386         }
1387         next();
1388       } else {
1389         // Operator found.
1390         if (CurrentPrecedence == Precedence) {
1391           if (LatestOperator)
1392             LatestOperator->NextOperator = Current;
1393           LatestOperator = Current;
1394           Current->OperatorIndex = OperatorIndex;
1395           ++OperatorIndex;
1396         }
1397         next(/*SkipPastLeadingComments=*/Precedence > 0);
1398       }
1399     }
1400 
1401     if (LatestOperator && (Current || Precedence > 0)) {
1402       // LatestOperator->LastOperator = true;
1403       if (Precedence == PrecedenceArrowAndPeriod) {
1404         // Call expressions don't have a binary operator precedence.
1405         addFakeParenthesis(Start, prec::Unknown);
1406       } else {
1407         addFakeParenthesis(Start, prec::Level(Precedence));
1408       }
1409     }
1410   }
1411 
1412 private:
1413   /// \brief Gets the precedence (+1) of the given token for binary operators
1414   /// and other tokens that we treat like binary operators.
1415   int getCurrentPrecedence() {
1416     if (Current) {
1417       const FormatToken *NextNonComment = Current->getNextNonComment();
1418       if (Current->is(TT_ConditionalExpr))
1419         return prec::Conditional;
1420       if (NextNonComment && NextNonComment->is(tok::colon) &&
1421           NextNonComment->is(TT_DictLiteral))
1422         return prec::Comma;
1423       if (Current->is(TT_LambdaArrow))
1424         return prec::Comma;
1425       if (Current->is(TT_JsFatArrow))
1426         return prec::Assignment;
1427       if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName,
1428                            TT_JsComputedPropertyName) ||
1429           (Current->is(tok::comment) && NextNonComment &&
1430            NextNonComment->is(TT_SelectorName)))
1431         return 0;
1432       if (Current->is(TT_RangeBasedForLoopColon))
1433         return prec::Comma;
1434       if ((Style.Language == FormatStyle::LK_Java ||
1435            Style.Language == FormatStyle::LK_JavaScript) &&
1436           Current->is(Keywords.kw_instanceof))
1437         return prec::Relational;
1438       if (Style.Language == FormatStyle::LK_JavaScript &&
1439           Current->is(Keywords.kw_in))
1440         return prec::Relational;
1441       if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1442         return Current->getPrecedence();
1443       if (Current->isOneOf(tok::period, tok::arrow))
1444         return PrecedenceArrowAndPeriod;
1445       if (Style.Language == FormatStyle::LK_Java &&
1446           Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1447                            Keywords.kw_throws))
1448         return 0;
1449     }
1450     return -1;
1451   }
1452 
1453   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1454     Start->FakeLParens.push_back(Precedence);
1455     if (Precedence > prec::Unknown)
1456       Start->StartsBinaryExpression = true;
1457     if (Current) {
1458       FormatToken *Previous = Current->Previous;
1459       while (Previous->is(tok::comment) && Previous->Previous)
1460         Previous = Previous->Previous;
1461       ++Previous->FakeRParens;
1462       if (Precedence > prec::Unknown)
1463         Previous->EndsBinaryExpression = true;
1464     }
1465   }
1466 
1467   /// \brief Parse unary operator expressions and surround them with fake
1468   /// parentheses if appropriate.
1469   void parseUnaryOperator() {
1470     if (!Current || Current->isNot(TT_UnaryOperator)) {
1471       parse(PrecedenceArrowAndPeriod);
1472       return;
1473     }
1474 
1475     FormatToken *Start = Current;
1476     next();
1477     parseUnaryOperator();
1478 
1479     // The actual precedence doesn't matter.
1480     addFakeParenthesis(Start, prec::Unknown);
1481   }
1482 
1483   void parseConditionalExpr() {
1484     while (Current && Current->isTrailingComment()) {
1485       next();
1486     }
1487     FormatToken *Start = Current;
1488     parse(prec::LogicalOr);
1489     if (!Current || !Current->is(tok::question))
1490       return;
1491     next();
1492     parse(prec::Assignment);
1493     if (!Current || Current->isNot(TT_ConditionalExpr))
1494       return;
1495     next();
1496     parse(prec::Assignment);
1497     addFakeParenthesis(Start, prec::Conditional);
1498   }
1499 
1500   void next(bool SkipPastLeadingComments = true) {
1501     if (Current)
1502       Current = Current->Next;
1503     while (Current &&
1504            (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1505            Current->isTrailingComment())
1506       Current = Current->Next;
1507   }
1508 
1509   const FormatStyle &Style;
1510   const AdditionalKeywords &Keywords;
1511   FormatToken *Current;
1512 };
1513 
1514 } // end anonymous namespace
1515 
1516 void TokenAnnotator::setCommentLineLevels(
1517     SmallVectorImpl<AnnotatedLine *> &Lines) {
1518   const AnnotatedLine *NextNonCommentLine = nullptr;
1519   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1520                                                           E = Lines.rend();
1521        I != E; ++I) {
1522     if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1523         (*I)->First->Next == nullptr)
1524       (*I)->Level = NextNonCommentLine->Level;
1525     else
1526       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1527 
1528     setCommentLineLevels((*I)->Children);
1529   }
1530 }
1531 
1532 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1533   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1534                                                   E = Line.Children.end();
1535        I != E; ++I) {
1536     annotate(**I);
1537   }
1538   AnnotatingParser Parser(Style, Line, Keywords);
1539   Line.Type = Parser.parseLine();
1540   if (Line.Type == LT_Invalid)
1541     return;
1542 
1543   ExpressionParser ExprParser(Style, Keywords, Line);
1544   ExprParser.parse();
1545 
1546   if (Line.startsWith(TT_ObjCMethodSpecifier))
1547     Line.Type = LT_ObjCMethodDecl;
1548   else if (Line.startsWith(TT_ObjCDecl))
1549     Line.Type = LT_ObjCDecl;
1550   else if (Line.startsWith(TT_ObjCProperty))
1551     Line.Type = LT_ObjCProperty;
1552 
1553   Line.First->SpacesRequiredBefore = 1;
1554   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1555 }
1556 
1557 // This function heuristically determines whether 'Current' starts the name of a
1558 // function declaration.
1559 static bool isFunctionDeclarationName(const FormatToken &Current,
1560                                       const AnnotatedLine &Line) {
1561   auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
1562     for (; Next; Next = Next->Next) {
1563       if (Next->is(TT_OverloadedOperatorLParen))
1564         return Next;
1565       if (Next->is(TT_OverloadedOperator))
1566         continue;
1567       if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1568         // For 'new[]' and 'delete[]'.
1569         if (Next->Next && Next->Next->is(tok::l_square) &&
1570             Next->Next->Next && Next->Next->Next->is(tok::r_square))
1571           Next = Next->Next->Next;
1572         continue;
1573       }
1574 
1575       break;
1576     }
1577     return nullptr;
1578   };
1579 
1580   // Find parentheses of parameter list.
1581   const FormatToken *Next = Current.Next;
1582   if (Current.is(tok::kw_operator)) {
1583     if (Current.Previous && Current.Previous->is(tok::coloncolon))
1584       return false;
1585     Next = skipOperatorName(Next);
1586   } else {
1587     if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
1588       return false;
1589     for (; Next; Next = Next->Next) {
1590       if (Next->is(TT_TemplateOpener)) {
1591         Next = Next->MatchingParen;
1592       } else if (Next->is(tok::coloncolon)) {
1593         Next = Next->Next;
1594         if (!Next)
1595           return false;
1596         if (Next->is(tok::kw_operator)) {
1597           Next = skipOperatorName(Next->Next);
1598           break;
1599         }
1600         if (!Next->is(tok::identifier))
1601           return false;
1602       } else if (Next->is(tok::l_paren)) {
1603         break;
1604       } else {
1605         return false;
1606       }
1607     }
1608   }
1609 
1610   // Check whether parameter list can be long to a function declaration.
1611   if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
1612     return false;
1613   // If the lines ends with "{", this is likely an function definition.
1614   if (Line.Last->is(tok::l_brace))
1615     return true;
1616   if (Next->Next == Next->MatchingParen)
1617     return true; // Empty parentheses.
1618   // If there is an &/&& after the r_paren, this is likely a function.
1619   if (Next->MatchingParen->Next &&
1620       Next->MatchingParen->Next->is(TT_PointerOrReference))
1621     return true;
1622   for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
1623        Tok = Tok->Next) {
1624     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
1625         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
1626       return true;
1627     if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
1628         Tok->Tok.isLiteral())
1629       return false;
1630   }
1631   return false;
1632 }
1633 
1634 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
1635   assert(Line.MightBeFunctionDecl);
1636 
1637   if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
1638        Style.AlwaysBreakAfterReturnType ==
1639            FormatStyle::RTBS_TopLevelDefinitions) &&
1640       Line.Level > 0)
1641     return false;
1642 
1643   switch (Style.AlwaysBreakAfterReturnType) {
1644   case FormatStyle::RTBS_None:
1645     return false;
1646   case FormatStyle::RTBS_All:
1647   case FormatStyle::RTBS_TopLevel:
1648     return true;
1649   case FormatStyle::RTBS_AllDefinitions:
1650   case FormatStyle::RTBS_TopLevelDefinitions:
1651     return Line.mightBeFunctionDefinition();
1652   }
1653 
1654   return false;
1655 }
1656 
1657 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1658   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1659                                                   E = Line.Children.end();
1660        I != E; ++I) {
1661     calculateFormattingInformation(**I);
1662   }
1663 
1664   Line.First->TotalLength =
1665       Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1666   if (!Line.First->Next)
1667     return;
1668   FormatToken *Current = Line.First->Next;
1669   bool InFunctionDecl = Line.MightBeFunctionDecl;
1670   while (Current) {
1671     if (isFunctionDeclarationName(*Current, Line))
1672       Current->Type = TT_FunctionDeclarationName;
1673     if (Current->is(TT_LineComment)) {
1674       if (Current->Previous->BlockKind == BK_BracedInit &&
1675           Current->Previous->opensScope())
1676         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1677       else
1678         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1679 
1680       // If we find a trailing comment, iterate backwards to determine whether
1681       // it seems to relate to a specific parameter. If so, break before that
1682       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1683       // to the previous line in:
1684       //   SomeFunction(a,
1685       //                b, // comment
1686       //                c);
1687       if (!Current->HasUnescapedNewline) {
1688         for (FormatToken *Parameter = Current->Previous; Parameter;
1689              Parameter = Parameter->Previous) {
1690           if (Parameter->isOneOf(tok::comment, tok::r_brace))
1691             break;
1692           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1693             if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
1694                 Parameter->HasUnescapedNewline)
1695               Parameter->MustBreakBefore = true;
1696             break;
1697           }
1698         }
1699       }
1700     } else if (Current->SpacesRequiredBefore == 0 &&
1701                spaceRequiredBefore(Line, *Current)) {
1702       Current->SpacesRequiredBefore = 1;
1703     }
1704 
1705     Current->MustBreakBefore =
1706         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1707 
1708     if (!Current->MustBreakBefore && InFunctionDecl &&
1709         Current->is(TT_FunctionDeclarationName))
1710       Current->MustBreakBefore = mustBreakForReturnType(Line);
1711 
1712     Current->CanBreakBefore =
1713         Current->MustBreakBefore || canBreakBefore(Line, *Current);
1714     unsigned ChildSize = 0;
1715     if (Current->Previous->Children.size() == 1) {
1716       FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
1717       ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
1718                                                   : LastOfChild.TotalLength + 1;
1719     }
1720     const FormatToken *Prev = Current->Previous;
1721     if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
1722         (Prev->Children.size() == 1 &&
1723          Prev->Children[0]->First->MustBreakBefore) ||
1724         Current->IsMultiline)
1725       Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
1726     else
1727       Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
1728                              ChildSize + Current->SpacesRequiredBefore;
1729 
1730     if (Current->is(TT_CtorInitializerColon))
1731       InFunctionDecl = false;
1732 
1733     // FIXME: Only calculate this if CanBreakBefore is true once static
1734     // initializers etc. are sorted out.
1735     // FIXME: Move magic numbers to a better place.
1736     Current->SplitPenalty = 20 * Current->BindingStrength +
1737                             splitPenalty(Line, *Current, InFunctionDecl);
1738 
1739     Current = Current->Next;
1740   }
1741 
1742   calculateUnbreakableTailLengths(Line);
1743   for (Current = Line.First; Current != nullptr; Current = Current->Next) {
1744     if (Current->Role)
1745       Current->Role->precomputeFormattingInfos(Current);
1746   }
1747 
1748   DEBUG({ printDebugInfo(Line); });
1749 }
1750 
1751 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1752   unsigned UnbreakableTailLength = 0;
1753   FormatToken *Current = Line.Last;
1754   while (Current) {
1755     Current->UnbreakableTailLength = UnbreakableTailLength;
1756     if (Current->CanBreakBefore ||
1757         Current->isOneOf(tok::comment, tok::string_literal)) {
1758       UnbreakableTailLength = 0;
1759     } else {
1760       UnbreakableTailLength +=
1761           Current->ColumnWidth + Current->SpacesRequiredBefore;
1762     }
1763     Current = Current->Previous;
1764   }
1765 }
1766 
1767 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1768                                       const FormatToken &Tok,
1769                                       bool InFunctionDecl) {
1770   const FormatToken &Left = *Tok.Previous;
1771   const FormatToken &Right = Tok;
1772 
1773   if (Left.is(tok::semi))
1774     return 0;
1775 
1776   if (Style.Language == FormatStyle::LK_Java) {
1777     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
1778       return 1;
1779     if (Right.is(Keywords.kw_implements))
1780       return 2;
1781     if (Left.is(tok::comma) && Left.NestingLevel == 0)
1782       return 3;
1783   } else if (Style.Language == FormatStyle::LK_JavaScript) {
1784     if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
1785       return 100;
1786     if (Left.is(TT_JsTypeColon))
1787       return 35;
1788   }
1789 
1790   if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
1791                               Right.Next->is(TT_DictLiteral)))
1792     return 1;
1793   if (Right.is(tok::l_square)) {
1794     if (Style.Language == FormatStyle::LK_Proto)
1795       return 1;
1796     if (Left.is(tok::r_square))
1797       return 200;
1798     // Slightly prefer formatting local lambda definitions like functions.
1799     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
1800       return 35;
1801     if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
1802                        TT_ArrayInitializerLSquare))
1803       return 500;
1804   }
1805 
1806   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
1807       Right.is(tok::kw_operator)) {
1808     if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1809       return 3;
1810     if (Left.is(TT_StartOfName))
1811       return 110;
1812     if (InFunctionDecl && Right.NestingLevel == 0)
1813       return Style.PenaltyReturnTypeOnItsOwnLine;
1814     return 200;
1815   }
1816   if (Right.is(TT_PointerOrReference))
1817     return 190;
1818   if (Right.is(TT_LambdaArrow))
1819     return 110;
1820   if (Left.is(tok::equal) && Right.is(tok::l_brace))
1821     return 150;
1822   if (Left.is(TT_CastRParen))
1823     return 100;
1824   if (Left.is(tok::coloncolon) ||
1825       (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1826     return 500;
1827   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1828     return 5000;
1829   if (Left.is(tok::comment))
1830     return 1000;
1831 
1832   if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon))
1833     return 2;
1834 
1835   if (Right.isMemberAccess()) {
1836     // Breaking before the "./->" of a chained call/member access is reasonably
1837     // cheap, as formatting those with one call per line is generally
1838     // desirable. In particular, it should be cheaper to break before the call
1839     // than it is to break inside a call's parameters, which could lead to weird
1840     // "hanging" indents. The exception is the very last "./->" to support this
1841     // frequent pattern:
1842     //
1843     //   aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
1844     //       dddddddd);
1845     //
1846     // which might otherwise be blown up onto many lines. Here, clang-format
1847     // won't produce "hanging" indents anyway as there is no other trailing
1848     // call.
1849     //
1850     // Also apply higher penalty is not a call as that might lead to a wrapping
1851     // like:
1852     //
1853     //   aaaaaaa
1854     //       .aaaaaaaaa.bbbbbbbb(cccccccc);
1855     return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
1856                ? 150
1857                : 35;
1858   }
1859 
1860   if (Right.is(TT_TrailingAnnotation) &&
1861       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
1862     // Moving trailing annotations to the next line is fine for ObjC method
1863     // declarations.
1864     if (Line.startsWith(TT_ObjCMethodSpecifier))
1865       return 10;
1866     // Generally, breaking before a trailing annotation is bad unless it is
1867     // function-like. It seems to be especially preferable to keep standard
1868     // annotations (i.e. "const", "final" and "override") on the same line.
1869     // Use a slightly higher penalty after ")" so that annotations like
1870     // "const override" are kept together.
1871     bool is_short_annotation = Right.TokenText.size() < 10;
1872     return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
1873   }
1874 
1875   // In for-loops, prefer breaking at ',' and ';'.
1876   if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
1877     return 4;
1878 
1879   // In Objective-C method expressions, prefer breaking before "param:" over
1880   // breaking after it.
1881   if (Right.is(TT_SelectorName))
1882     return 0;
1883   if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
1884     return Line.MightBeFunctionDecl ? 50 : 500;
1885 
1886   if (Left.is(tok::l_paren) && InFunctionDecl &&
1887       Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
1888     return 100;
1889   if (Left.is(tok::l_paren) && Left.Previous &&
1890       Left.Previous->isOneOf(tok::kw_if, tok::kw_for))
1891     return 1000;
1892   if (Left.is(tok::equal) && InFunctionDecl)
1893     return 110;
1894   if (Right.is(tok::r_brace))
1895     return 1;
1896   if (Left.is(TT_TemplateOpener))
1897     return 100;
1898   if (Left.opensScope()) {
1899     if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
1900       return 0;
1901     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1902                                    : 19;
1903   }
1904   if (Left.is(TT_JavaAnnotation))
1905     return 50;
1906 
1907   if (Right.is(tok::lessless)) {
1908     if (Left.is(tok::string_literal) &&
1909         (Right.NextOperator || Right.OperatorIndex != 1)) {
1910       StringRef Content = Left.TokenText;
1911       if (Content.startswith("\""))
1912         Content = Content.drop_front(1);
1913       if (Content.endswith("\""))
1914         Content = Content.drop_back(1);
1915       Content = Content.trim();
1916       if (Content.size() > 1 &&
1917           (Content.back() == ':' || Content.back() == '='))
1918         return 25;
1919     }
1920     return 1; // Breaking at a << is really cheap.
1921   }
1922   if (Left.is(TT_ConditionalExpr))
1923     return prec::Conditional;
1924   prec::Level Level = Left.getPrecedence();
1925   if (Level != prec::Unknown)
1926     return Level;
1927   Level = Right.getPrecedence();
1928   if (Level != prec::Unknown)
1929     return Level;
1930 
1931   return 3;
1932 }
1933 
1934 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1935                                           const FormatToken &Left,
1936                                           const FormatToken &Right) {
1937   if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
1938     return true;
1939   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
1940       Left.Tok.getObjCKeywordID() == tok::objc_property)
1941     return true;
1942   if (Right.is(tok::hashhash))
1943     return Left.is(tok::hash);
1944   if (Left.isOneOf(tok::hashhash, tok::hash))
1945     return Right.is(tok::hash);
1946   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1947     return Style.SpaceInEmptyParentheses;
1948   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1949     return (Right.is(TT_CastRParen) ||
1950             (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
1951                ? Style.SpacesInCStyleCastParentheses
1952                : Style.SpacesInParentheses;
1953   if (Right.isOneOf(tok::semi, tok::comma))
1954     return false;
1955   if (Right.is(tok::less) &&
1956       (Left.is(tok::kw_template) ||
1957        (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1958     return true;
1959   if (Left.isOneOf(tok::exclaim, tok::tilde))
1960     return false;
1961   if (Left.is(tok::at) &&
1962       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1963                     tok::numeric_constant, tok::l_paren, tok::l_brace,
1964                     tok::kw_true, tok::kw_false))
1965     return false;
1966   if (Left.is(tok::colon))
1967     return !Left.is(TT_ObjCMethodExpr);
1968   if (Left.is(tok::coloncolon))
1969     return false;
1970   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1971     return false;
1972   if (Right.is(tok::ellipsis))
1973     return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
1974                                     Left.Previous->is(tok::kw_case));
1975   if (Left.is(tok::l_square) && Right.is(tok::amp))
1976     return false;
1977   if (Right.is(TT_PointerOrReference))
1978     return (Left.is(tok::r_paren) && Left.MatchingParen &&
1979             (Left.MatchingParen->is(TT_OverloadedOperatorLParen) ||
1980              (Left.MatchingParen->Previous &&
1981               Left.MatchingParen->Previous->is(TT_FunctionDeclarationName)))) ||
1982            (Left.Tok.isLiteral() ||
1983             (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
1984              (Style.PointerAlignment != FormatStyle::PAS_Left ||
1985               Line.IsMultiVariableDeclStmt)));
1986   if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
1987       (!Left.is(TT_PointerOrReference) ||
1988        (Style.PointerAlignment != FormatStyle::PAS_Right &&
1989         !Line.IsMultiVariableDeclStmt)))
1990     return true;
1991   if (Left.is(TT_PointerOrReference))
1992     return Right.Tok.isLiteral() ||
1993            Right.isOneOf(TT_BlockComment, Keywords.kw_final,
1994                          Keywords.kw_override) ||
1995            (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
1996            (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
1997                            tok::l_paren) &&
1998             (Style.PointerAlignment != FormatStyle::PAS_Right &&
1999              !Line.IsMultiVariableDeclStmt) &&
2000             Left.Previous &&
2001             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
2002   if (Right.is(tok::star) && Left.is(tok::l_paren))
2003     return false;
2004   if (Left.is(tok::l_square))
2005     return (Left.is(TT_ArrayInitializerLSquare) &&
2006             Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
2007            (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&
2008             Right.isNot(tok::r_square));
2009   if (Right.is(tok::r_square))
2010     return Right.MatchingParen &&
2011            ((Style.SpacesInContainerLiterals &&
2012              Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
2013             (Style.SpacesInSquareBrackets &&
2014              Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
2015   if (Right.is(tok::l_square) &&
2016       !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) &&
2017       !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
2018     return false;
2019   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2020     return !Left.Children.empty(); // No spaces in "{}".
2021   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2022       (Right.is(tok::r_brace) && Right.MatchingParen &&
2023        Right.MatchingParen->BlockKind != BK_Block))
2024     return !Style.Cpp11BracedListStyle;
2025   if (Left.is(TT_BlockComment))
2026     return !Left.TokenText.endswith("=*/");
2027   if (Right.is(tok::l_paren)) {
2028     if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen))
2029       return true;
2030     return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2031            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
2032             (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
2033                           tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2034                           TT_ObjCForIn) ||
2035              (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2036                            tok::kw_new, tok::kw_delete) &&
2037               (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2038            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
2039             (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2040              Left.is(tok::r_paren)) &&
2041             Line.Type != LT_PreprocessorDirective);
2042   }
2043   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2044     return false;
2045   if (Right.is(TT_UnaryOperator))
2046     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2047            (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2048   if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2049                     tok::r_paren) ||
2050        Left.isSimpleTypeSpecifier()) &&
2051       Right.is(tok::l_brace) && Right.getNextNonComment() &&
2052       Right.BlockKind != BK_Block)
2053     return false;
2054   if (Left.is(tok::period) || Right.is(tok::period))
2055     return false;
2056   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2057     return false;
2058   if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2059       Left.MatchingParen->Previous &&
2060       Left.MatchingParen->Previous->is(tok::period))
2061     // A.<B>DoSomething();
2062     return false;
2063   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2064     return false;
2065   return true;
2066 }
2067 
2068 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2069                                          const FormatToken &Right) {
2070   const FormatToken &Left = *Right.Previous;
2071   if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2072     return true; // Never ever merge two identifiers.
2073   if (Style.Language == FormatStyle::LK_Cpp) {
2074     if (Left.is(tok::kw_operator))
2075       return Right.is(tok::coloncolon);
2076   } else if (Style.Language == FormatStyle::LK_Proto) {
2077     if (Right.is(tok::period) &&
2078         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2079                      Keywords.kw_repeated, Keywords.kw_extend))
2080       return true;
2081     if (Right.is(tok::l_paren) &&
2082         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2083       return true;
2084   } else if (Style.Language == FormatStyle::LK_JavaScript) {
2085     if (Left.is(TT_JsFatArrow))
2086       return true;
2087     if (Right.is(tok::star) &&
2088         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2089       return false;
2090     if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
2091                      Keywords.kw_of, tok::kw_const) &&
2092         (!Left.Previous || !Left.Previous->is(tok::period)))
2093       return true;
2094     if (Left.is(tok::kw_default) && Left.Previous &&
2095         Left.Previous->is(tok::kw_export))
2096       return true;
2097     if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2098       return true;
2099     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2100       return false;
2101     if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
2102       return false;
2103     if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2104         Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2105       return false;
2106     if (Left.is(tok::ellipsis))
2107       return false;
2108     if (Left.is(TT_TemplateCloser) &&
2109         !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2110                        Keywords.kw_implements, Keywords.kw_extends))
2111       // Type assertions ('<type>expr') are not followed by whitespace. Other
2112       // locations that should have whitespace following are identified by the
2113       // above set of follower tokens.
2114       return false;
2115   } else if (Style.Language == FormatStyle::LK_Java) {
2116     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2117       return true;
2118     if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2119       return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
2120     if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2121                       tok::kw_protected) ||
2122          Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2123                       Keywords.kw_native)) &&
2124         Right.is(TT_TemplateOpener))
2125       return true;
2126   }
2127   if (Left.is(TT_ImplicitStringLiteral))
2128     return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2129   if (Line.Type == LT_ObjCMethodDecl) {
2130     if (Left.is(TT_ObjCMethodSpecifier))
2131       return true;
2132     if (Left.is(tok::r_paren) && Right.is(tok::identifier))
2133       // Don't space between ')' and <id>
2134       return false;
2135   }
2136   if (Line.Type == LT_ObjCProperty &&
2137       (Right.is(tok::equal) || Left.is(tok::equal)))
2138     return false;
2139 
2140   if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2141       Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2142     return true;
2143   if (Right.is(TT_OverloadedOperatorLParen))
2144     return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2145   if (Left.is(tok::comma))
2146     return true;
2147   if (Right.is(tok::comma))
2148     return false;
2149   if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
2150     return true;
2151   if (Right.is(tok::colon)) {
2152     if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2153         !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2154       return false;
2155     if (Right.is(TT_ObjCMethodExpr))
2156       return false;
2157     if (Left.is(tok::question))
2158       return false;
2159     if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2160       return false;
2161     if (Right.is(TT_DictLiteral))
2162       return Style.SpacesInContainerLiterals;
2163     return true;
2164   }
2165   if (Left.is(TT_UnaryOperator))
2166     return Right.is(TT_BinaryOperator);
2167 
2168   // If the next token is a binary operator or a selector name, we have
2169   // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2170   if (Left.is(TT_CastRParen))
2171     return Style.SpaceAfterCStyleCast ||
2172            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2173 
2174   if (Left.is(tok::greater) && Right.is(tok::greater))
2175     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2176            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
2177   if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2178       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar))
2179     return false;
2180   if (!Style.SpaceBeforeAssignmentOperators &&
2181       Right.getPrecedence() == prec::Assignment)
2182     return false;
2183   if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
2184     return (Left.is(TT_TemplateOpener) &&
2185             Style.Standard == FormatStyle::LS_Cpp03) ||
2186            !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren,
2187                           tok::l_square) ||
2188              Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener));
2189   if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2190     return Style.SpacesInAngles;
2191   if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2192       (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2193        !Right.is(tok::r_paren)))
2194     return true;
2195   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2196       Right.isNot(TT_FunctionTypeLParen))
2197     return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2198   if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2199       Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2200     return false;
2201   if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2202       Line.startsWith(tok::hash))
2203     return true;
2204   if (Right.is(TT_TrailingUnaryOperator))
2205     return false;
2206   if (Left.is(TT_RegexLiteral))
2207     return false;
2208   return spaceRequiredBetween(Line, Left, Right);
2209 }
2210 
2211 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2212 static bool isAllmanBrace(const FormatToken &Tok) {
2213   return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2214          !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2215 }
2216 
2217 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2218                                      const FormatToken &Right) {
2219   const FormatToken &Left = *Right.Previous;
2220   if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2221     return true;
2222 
2223   if (Style.Language == FormatStyle::LK_JavaScript) {
2224     // FIXME: This might apply to other languages and token kinds.
2225     if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
2226         Left.Previous->is(tok::string_literal))
2227       return true;
2228     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2229         Left.Previous && Left.Previous->is(tok::equal) &&
2230         Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2231                             tok::kw_const) &&
2232         // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2233         // above.
2234         !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2235       // Object literals on the top level of a file are treated as "enum-style".
2236       // Each key/value pair is put on a separate line, instead of bin-packing.
2237       return true;
2238     if (Left.is(tok::l_brace) && Line.Level == 0 &&
2239         (Line.startsWith(tok::kw_enum) ||
2240          Line.startsWith(tok::kw_export, tok::kw_enum)))
2241       // JavaScript top-level enum key/value pairs are put on separate lines
2242       // instead of bin-packing.
2243       return true;
2244     if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2245         !Left.Children.empty())
2246       // Support AllowShortFunctionsOnASingleLine for JavaScript.
2247       return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
2248              Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
2249              (Left.NestingLevel == 0 && Line.Level == 0 &&
2250               Style.AllowShortFunctionsOnASingleLine ==
2251                   FormatStyle::SFS_Inline);
2252   } else if (Style.Language == FormatStyle::LK_Java) {
2253     if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2254         Right.Next->is(tok::string_literal))
2255       return true;
2256   }
2257 
2258   // If the last token before a '}' is a comma or a trailing comment, the
2259   // intention is to insert a line break after it in order to make shuffling
2260   // around entries easier.
2261   const FormatToken *BeforeClosingBrace = nullptr;
2262   if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
2263       Left.BlockKind != BK_Block && Left.MatchingParen)
2264     BeforeClosingBrace = Left.MatchingParen->Previous;
2265   else if (Right.MatchingParen &&
2266            Right.MatchingParen->isOneOf(tok::l_brace,
2267                                         TT_ArrayInitializerLSquare))
2268     BeforeClosingBrace = &Left;
2269   if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2270                              BeforeClosingBrace->isTrailingComment()))
2271     return true;
2272 
2273   if (Right.is(tok::comment))
2274     return Left.BlockKind != BK_BracedInit &&
2275            Left.isNot(TT_CtorInitializerColon) &&
2276            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2277   if (Left.isTrailingComment())
2278     return true;
2279   if (Left.isStringLiteral() &&
2280       (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
2281     return true;
2282   if (Right.Previous->IsUnterminatedLiteral)
2283     return true;
2284   if (Right.is(tok::lessless) && Right.Next &&
2285       Right.Previous->is(tok::string_literal) &&
2286       Right.Next->is(tok::string_literal))
2287     return true;
2288   if (Right.Previous->ClosesTemplateDeclaration &&
2289       Right.Previous->MatchingParen &&
2290       Right.Previous->MatchingParen->NestingLevel == 0 &&
2291       Style.AlwaysBreakTemplateDeclarations)
2292     return true;
2293   if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) &&
2294       Style.BreakConstructorInitializersBeforeComma &&
2295       !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2296     return true;
2297   if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2298     // Raw string literals are special wrt. line breaks. The author has made a
2299     // deliberate choice and might have aligned the contents of the string
2300     // literal accordingly. Thus, we try keep existing line breaks.
2301     return Right.NewlinesBefore > 0;
2302   if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
2303       Style.Language == FormatStyle::LK_Proto)
2304     // Don't put enums onto single lines in protocol buffers.
2305     return true;
2306   if (Right.is(TT_InlineASMBrace))
2307     return Right.HasUnescapedNewline;
2308   if (isAllmanBrace(Left) || isAllmanBrace(Right))
2309     return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2310            (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2311            (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2312   if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) &&
2313       Right.is(TT_SelectorName))
2314     return true;
2315   if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2316     return true;
2317 
2318   if ((Style.Language == FormatStyle::LK_Java ||
2319        Style.Language == FormatStyle::LK_JavaScript) &&
2320       Left.is(TT_LeadingJavaAnnotation) &&
2321       Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2322       (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2323     return true;
2324 
2325   return false;
2326 }
2327 
2328 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
2329                                     const FormatToken &Right) {
2330   const FormatToken &Left = *Right.Previous;
2331 
2332   // Language-specific stuff.
2333   if (Style.Language == FormatStyle::LK_Java) {
2334     if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2335                      Keywords.kw_implements))
2336       return false;
2337     if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
2338                       Keywords.kw_implements))
2339       return true;
2340   } else if (Style.Language == FormatStyle::LK_JavaScript) {
2341     if (Left.is(tok::kw_return))
2342       return false; // Otherwise a semicolon is inserted.
2343     if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
2344       return false;
2345     if (Left.is(TT_JsTypeColon))
2346       return true;
2347     if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
2348       return false;
2349     if (Left.is(Keywords.kw_in))
2350       return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
2351     if (Right.is(Keywords.kw_in))
2352       return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
2353   }
2354 
2355   if (Left.is(tok::at))
2356     return false;
2357   if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
2358     return false;
2359   if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
2360     return !Right.is(tok::l_paren);
2361   if (Right.is(TT_PointerOrReference))
2362     return Line.IsMultiVariableDeclStmt ||
2363            (Style.PointerAlignment == FormatStyle::PAS_Right &&
2364             (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
2365   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2366       Right.is(tok::kw_operator))
2367     return true;
2368   if (Left.is(TT_PointerOrReference))
2369     return false;
2370   if (Right.isTrailingComment())
2371     // We rely on MustBreakBefore being set correctly here as we should not
2372     // change the "binding" behavior of a comment.
2373     // The first comment in a braced lists is always interpreted as belonging to
2374     // the first list element. Otherwise, it should be placed outside of the
2375     // list.
2376     return Left.BlockKind == BK_BracedInit;
2377   if (Left.is(tok::question) && Right.is(tok::colon))
2378     return false;
2379   if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
2380     return Style.BreakBeforeTernaryOperators;
2381   if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
2382     return !Style.BreakBeforeTernaryOperators;
2383   if (Right.is(TT_InheritanceColon))
2384     return true;
2385   if (Right.is(tok::colon) &&
2386       !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
2387     return false;
2388   if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))
2389     return true;
2390   if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
2391                                     Right.Next->is(TT_ObjCMethodExpr)))
2392     return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
2393   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
2394     return true;
2395   if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
2396     return true;
2397   if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
2398                     TT_OverloadedOperator))
2399     return false;
2400   if (Left.is(TT_RangeBasedForLoopColon))
2401     return true;
2402   if (Right.is(TT_RangeBasedForLoopColon))
2403     return false;
2404   if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
2405       Left.is(tok::kw_operator))
2406     return false;
2407   if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
2408       Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
2409     return false;
2410   if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
2411     return false;
2412   if (Left.is(tok::l_paren) && Left.Previous &&
2413       (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
2414     return false;
2415   if (Right.is(TT_ImplicitStringLiteral))
2416     return false;
2417 
2418   if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
2419     return false;
2420   if (Right.is(tok::r_square) && Right.MatchingParen &&
2421       Right.MatchingParen->is(TT_LambdaLSquare))
2422     return false;
2423 
2424   // We only break before r_brace if there was a corresponding break before
2425   // the l_brace, which is tracked by BreakBeforeClosingBrace.
2426   if (Right.is(tok::r_brace))
2427     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
2428 
2429   // Allow breaking after a trailing annotation, e.g. after a method
2430   // declaration.
2431   if (Left.is(TT_TrailingAnnotation))
2432     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
2433                           tok::less, tok::coloncolon);
2434 
2435   if (Right.is(tok::kw___attribute))
2436     return true;
2437 
2438   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
2439     return true;
2440 
2441   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2442     return true;
2443 
2444   if (Left.is(TT_CtorInitializerComma) &&
2445       Style.BreakConstructorInitializersBeforeComma)
2446     return false;
2447   if (Right.is(TT_CtorInitializerComma) &&
2448       Style.BreakConstructorInitializersBeforeComma)
2449     return true;
2450   if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
2451       (Left.is(tok::less) && Right.is(tok::less)))
2452     return false;
2453   if (Right.is(TT_BinaryOperator) &&
2454       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
2455       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
2456        Right.getPrecedence() != prec::Assignment))
2457     return true;
2458   if (Left.is(TT_ArrayInitializerLSquare))
2459     return true;
2460   if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
2461     return true;
2462   if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
2463       !Left.isOneOf(tok::arrowstar, tok::lessless) &&
2464       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
2465       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
2466        Left.getPrecedence() == prec::Assignment))
2467     return true;
2468   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
2469                       tok::kw_class, tok::kw_struct, tok::comment) ||
2470          Right.isMemberAccess() ||
2471          Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
2472                        tok::colon, tok::l_square, tok::at) ||
2473          (Left.is(tok::r_paren) &&
2474           Right.isOneOf(tok::identifier, tok::kw_const)) ||
2475          (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
2476 }
2477 
2478 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
2479   llvm::errs() << "AnnotatedTokens:\n";
2480   const FormatToken *Tok = Line.First;
2481   while (Tok) {
2482     llvm::errs() << " M=" << Tok->MustBreakBefore
2483                  << " C=" << Tok->CanBreakBefore
2484                  << " T=" << getTokenTypeName(Tok->Type)
2485                  << " S=" << Tok->SpacesRequiredBefore
2486                  << " B=" << Tok->BlockParameterCount
2487                  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
2488                  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
2489                  << " FakeLParens=";
2490     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
2491       llvm::errs() << Tok->FakeLParens[i] << "/";
2492     llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
2493     if (!Tok->Next)
2494       assert(Tok == Line.Last);
2495     Tok = Tok->Next;
2496   }
2497   llvm::errs() << "----\n";
2498 }
2499 
2500 } // namespace format
2501 } // namespace clang
2502