1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/Support/Debug.h"
19 
20 namespace clang {
21 namespace format {
22 
23 namespace {
24 
25 /// \brief A parser that gathers additional information about tokens.
26 ///
27 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
28 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
29 /// into template parameter lists.
30 class AnnotatingParser {
31 public:
32   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
33                    IdentifierInfo &Ident_in)
34       : Style(Style), Line(Line), CurrentToken(Line.First),
35         KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) {
36     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
37     resetTokenMetadata(CurrentToken);
38   }
39 
40 private:
41   bool parseAngle() {
42     if (CurrentToken == NULL)
43       return false;
44     ScopedContextCreator ContextCreator(*this, tok::less, 10);
45     FormatToken *Left = CurrentToken->Previous;
46     Contexts.back().IsExpression = false;
47     // If there's a template keyword before the opening angle bracket, this is a
48     // template parameter, not an argument.
49     Contexts.back().InTemplateArgument =
50         Left->Previous != NULL && Left->Previous->Tok.isNot(tok::kw_template);
51 
52     while (CurrentToken != NULL) {
53       if (CurrentToken->is(tok::greater)) {
54         Left->MatchingParen = CurrentToken;
55         CurrentToken->MatchingParen = Left;
56         CurrentToken->Type = TT_TemplateCloser;
57         next();
58         return true;
59       }
60       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
61                                 tok::question, tok::colon))
62         return false;
63       // If a && or || is found and interpreted as a binary operator, this set
64       // of angles is likely part of something like "a < b && c > d". If the
65       // angles are inside an expression, the ||/&& might also be a binary
66       // operator that was misinterpreted because we are parsing template
67       // parameters.
68       // FIXME: This is getting out of hand, write a decent parser.
69       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
70           ((CurrentToken->Previous->Type == TT_BinaryOperator &&
71             // Toplevel bool expressions do not make lots of sense;
72             // If we're on the top level, it contains only the base context and
73             // the context for the current opening angle bracket.
74             Contexts.size() > 2) ||
75            Contexts[Contexts.size() - 2].IsExpression) &&
76           Line.First->isNot(tok::kw_template))
77         return false;
78       updateParameterCount(Left, CurrentToken);
79       if (!consumeToken())
80         return false;
81     }
82     return false;
83   }
84 
85   bool parseParens(bool LookForDecls = false) {
86     if (CurrentToken == NULL)
87       return false;
88     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
89 
90     // FIXME: This is a bit of a hack. Do better.
91     Contexts.back().ColonIsForRangeExpr =
92         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
93 
94     bool StartsObjCMethodExpr = false;
95     FormatToken *Left = CurrentToken->Previous;
96     if (CurrentToken->is(tok::caret)) {
97       // (^ can start a block type.
98       Left->Type = TT_ObjCBlockLParen;
99     } else if (FormatToken *MaybeSel = Left->Previous) {
100       // @selector( starts a selector.
101       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
102           MaybeSel->Previous->is(tok::at)) {
103         StartsObjCMethodExpr = true;
104       }
105     }
106 
107     if (Left->Previous &&
108         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if,
109                                  tok::kw_while, tok::l_paren, tok::comma) ||
110          Left->Previous->Type == TT_BinaryOperator)) {
111       // static_assert, if and while usually contain expressions.
112       Contexts.back().IsExpression = true;
113     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
114                Left->Previous->MatchingParen &&
115                Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
116       // This is a parameter list of a lambda expression.
117       Contexts.back().IsExpression = false;
118     } else if (Contexts[Contexts.size() - 2].CaretFound) {
119       // This is the parameter list of an ObjC block.
120       Contexts.back().IsExpression = false;
121     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
122       Left->Type = TT_AttributeParen;
123     } else if (Left->Previous && Left->Previous->IsForEachMacro) {
124       // The first argument to a foreach macro is a declaration.
125       Contexts.back().IsForEachMacro = true;
126       Contexts.back().IsExpression = false;
127     }
128 
129     if (StartsObjCMethodExpr) {
130       Contexts.back().ColonIsObjCMethodExpr = true;
131       Left->Type = TT_ObjCMethodExpr;
132     }
133 
134     bool MightBeFunctionType = CurrentToken->is(tok::star);
135     bool HasMultipleLines = false;
136     bool HasMultipleParametersOnALine = false;
137     while (CurrentToken != NULL) {
138       // LookForDecls is set when "if (" has been seen. Check for
139       // 'identifier' '*' 'identifier' followed by not '=' -- this
140       // '*' has to be a binary operator but determineStarAmpUsage() will
141       // categorize it as an unary operator, so set the right type here.
142       if (LookForDecls && CurrentToken->Next) {
143         FormatToken *Prev = CurrentToken->getPreviousNonComment();
144         if (Prev) {
145           FormatToken *PrevPrev = Prev->getPreviousNonComment();
146           FormatToken *Next = CurrentToken->Next;
147           if (PrevPrev && PrevPrev->is(tok::identifier) &&
148               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
149               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
150             Prev->Type = TT_BinaryOperator;
151             LookForDecls = false;
152           }
153         }
154       }
155 
156       if (CurrentToken->Previous->Type == TT_PointerOrReference &&
157           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
158                                                     tok::coloncolon))
159         MightBeFunctionType = true;
160       if (CurrentToken->is(tok::r_paren)) {
161         if (MightBeFunctionType && CurrentToken->Next &&
162             (CurrentToken->Next->is(tok::l_paren) ||
163              (CurrentToken->Next->is(tok::l_square) &&
164               !Contexts.back().IsExpression)))
165           Left->Type = TT_FunctionTypeLParen;
166         Left->MatchingParen = CurrentToken;
167         CurrentToken->MatchingParen = Left;
168 
169         if (StartsObjCMethodExpr) {
170           CurrentToken->Type = TT_ObjCMethodExpr;
171           if (Contexts.back().FirstObjCSelectorName != NULL) {
172             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
173                 Contexts.back().LongestObjCSelectorName;
174           }
175         }
176 
177         if (Left->Type == TT_AttributeParen)
178           CurrentToken->Type = TT_AttributeParen;
179 
180         if (!HasMultipleLines)
181           Left->PackingKind = PPK_Inconclusive;
182         else if (HasMultipleParametersOnALine)
183           Left->PackingKind = PPK_BinPacked;
184         else
185           Left->PackingKind = PPK_OnePerLine;
186 
187         next();
188         return true;
189       }
190       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
191         return false;
192       else if (CurrentToken->is(tok::l_brace))
193         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
194       updateParameterCount(Left, CurrentToken);
195       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
196           !CurrentToken->Next->HasUnescapedNewline &&
197           !CurrentToken->Next->isTrailingComment())
198         HasMultipleParametersOnALine = true;
199       if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
200           CurrentToken->isSimpleTypeSpecifier())
201         Contexts.back().IsExpression = false;
202       if (!consumeToken())
203         return false;
204       if (CurrentToken && CurrentToken->HasUnescapedNewline)
205         HasMultipleLines = true;
206     }
207     return false;
208   }
209 
210   bool parseSquare() {
211     if (!CurrentToken)
212       return false;
213 
214     // A '[' could be an index subscript (after an identifier or after
215     // ')' or ']'), it could be the start of an Objective-C method
216     // expression, or it could the the start of an Objective-C array literal.
217     FormatToken *Left = CurrentToken->Previous;
218     FormatToken *Parent = Left->getPreviousNonComment();
219     bool StartsObjCMethodExpr =
220         Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
221         (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
222                                     tok::kw_return, tok::kw_throw) ||
223          Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
224          Parent->Type == TT_CastRParen ||
225          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
226     ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
227     Contexts.back().IsExpression = true;
228     bool ColonFound = false;
229 
230     if (StartsObjCMethodExpr) {
231       Contexts.back().ColonIsObjCMethodExpr = true;
232       Left->Type = TT_ObjCMethodExpr;
233     } else if (Parent && Parent->is(tok::at)) {
234       Left->Type = TT_ArrayInitializerLSquare;
235     } else if (Left->Type == TT_Unknown) {
236       Left->Type = TT_ArraySubscriptLSquare;
237     }
238 
239     while (CurrentToken != NULL) {
240       if (CurrentToken->is(tok::r_square)) {
241         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
242             Left->Type == TT_ObjCMethodExpr) {
243           // An ObjC method call is rarely followed by an open parenthesis.
244           // FIXME: Do we incorrectly label ":" with this?
245           StartsObjCMethodExpr = false;
246           Left->Type = TT_Unknown;
247         }
248         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
249           CurrentToken->Type = TT_ObjCMethodExpr;
250           // determineStarAmpUsage() thinks that '*' '[' is allocating an
251           // array of pointers, but if '[' starts a selector then '*' is a
252           // binary operator.
253           if (Parent != NULL && Parent->Type == TT_PointerOrReference)
254             Parent->Type = TT_BinaryOperator;
255         }
256         Left->MatchingParen = CurrentToken;
257         CurrentToken->MatchingParen = Left;
258         if (Contexts.back().FirstObjCSelectorName != NULL) {
259           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
260               Contexts.back().LongestObjCSelectorName;
261           if (Contexts.back().NumBlockParameters > 1)
262             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
263         }
264         next();
265         return true;
266       }
267       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
268         return false;
269       if (CurrentToken->is(tok::colon))
270         ColonFound = true;
271       if (CurrentToken->is(tok::comma) &&
272           Style.Language != FormatStyle::LK_Proto &&
273           (Left->Type == TT_ArraySubscriptLSquare ||
274            (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
275         Left->Type = TT_ArrayInitializerLSquare;
276       updateParameterCount(Left, CurrentToken);
277       if (!consumeToken())
278         return false;
279     }
280     return false;
281   }
282 
283   bool parseBrace() {
284     if (CurrentToken != NULL) {
285       FormatToken *Left = CurrentToken->Previous;
286 
287       if (Contexts.back().CaretFound)
288         Left->Type = TT_ObjCBlockLBrace;
289       Contexts.back().CaretFound = false;
290 
291       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
292       Contexts.back().ColonIsDictLiteral = true;
293 
294       while (CurrentToken != NULL) {
295         if (CurrentToken->is(tok::r_brace)) {
296           Left->MatchingParen = CurrentToken;
297           CurrentToken->MatchingParen = Left;
298           next();
299           return true;
300         }
301         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
302           return false;
303         updateParameterCount(Left, CurrentToken);
304         if (CurrentToken->is(tok::colon) &&
305             Style.Language != FormatStyle::LK_Proto)
306           Left->Type = TT_DictLiteral;
307         if (!consumeToken())
308           return false;
309       }
310     }
311     // No closing "}" found, this probably starts a definition.
312     Line.StartsDefinition = true;
313     return true;
314   }
315 
316   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
317     if (Current->is(tok::comma)) {
318       ++Left->ParameterCount;
319       if (!Left->Role)
320         Left->Role.reset(new CommaSeparatedList(Style));
321       Left->Role->CommaFound(Current);
322     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
323       Left->ParameterCount = 1;
324     }
325   }
326 
327   bool parseConditional() {
328     while (CurrentToken != NULL) {
329       if (CurrentToken->is(tok::colon)) {
330         CurrentToken->Type = TT_ConditionalExpr;
331         next();
332         return true;
333       }
334       if (!consumeToken())
335         return false;
336     }
337     return false;
338   }
339 
340   bool parseTemplateDeclaration() {
341     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
342       CurrentToken->Type = TT_TemplateOpener;
343       next();
344       if (!parseAngle())
345         return false;
346       if (CurrentToken != NULL)
347         CurrentToken->Previous->ClosesTemplateDeclaration = true;
348       return true;
349     }
350     return false;
351   }
352 
353   bool consumeToken() {
354     FormatToken *Tok = CurrentToken;
355     next();
356     switch (Tok->Tok.getKind()) {
357     case tok::plus:
358     case tok::minus:
359       if (Tok->Previous == NULL && Line.MustBeDeclaration)
360         Tok->Type = TT_ObjCMethodSpecifier;
361       break;
362     case tok::colon:
363       if (Tok->Previous == NULL)
364         return false;
365       // Colons from ?: are handled in parseConditional().
366       if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) {
367         Tok->Type = TT_CtorInitializerColon;
368       } else if (Contexts.back().ColonIsDictLiteral) {
369         Tok->Type = TT_DictLiteral;
370       } else if (Contexts.back().ColonIsObjCMethodExpr ||
371                  Line.First->Type == TT_ObjCMethodSpecifier) {
372         Tok->Type = TT_ObjCMethodExpr;
373         Tok->Previous->Type = TT_ObjCSelectorName;
374         if (Tok->Previous->ColumnWidth >
375             Contexts.back().LongestObjCSelectorName) {
376           Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
377         }
378         if (Contexts.back().FirstObjCSelectorName == NULL)
379           Contexts.back().FirstObjCSelectorName = Tok->Previous;
380       } else if (Contexts.back().ColonIsForRangeExpr) {
381         Tok->Type = TT_RangeBasedForLoopColon;
382       } else if (CurrentToken != NULL &&
383                  CurrentToken->is(tok::numeric_constant)) {
384         Tok->Type = TT_BitFieldColon;
385       } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) {
386         Tok->Type = TT_InheritanceColon;
387       } else if (Contexts.back().ContextKind == tok::l_paren) {
388         Tok->Type = TT_InlineASMColon;
389       }
390       break;
391     case tok::kw_if:
392     case tok::kw_while:
393       if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
394         next();
395         if (!parseParens(/*LookForDecls=*/true))
396           return false;
397       }
398       break;
399     case tok::kw_for:
400       Contexts.back().ColonIsForRangeExpr = true;
401       next();
402       if (!parseParens())
403         return false;
404       break;
405     case tok::l_paren:
406       if (!parseParens())
407         return false;
408       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
409           !Contexts.back().IsExpression && Line.First->Type != TT_ObjCProperty)
410         Line.MightBeFunctionDecl = true;
411       break;
412     case tok::l_square:
413       if (!parseSquare())
414         return false;
415       break;
416     case tok::l_brace:
417       if (!parseBrace())
418         return false;
419       break;
420     case tok::less:
421       if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle())
422         Tok->Type = TT_TemplateOpener;
423       else {
424         Tok->Type = TT_BinaryOperator;
425         CurrentToken = Tok;
426         next();
427       }
428       break;
429     case tok::r_paren:
430     case tok::r_square:
431       return false;
432     case tok::r_brace:
433       // Lines can start with '}'.
434       if (Tok->Previous != NULL)
435         return false;
436       break;
437     case tok::greater:
438       Tok->Type = TT_BinaryOperator;
439       break;
440     case tok::kw_operator:
441       while (CurrentToken &&
442              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
443         if (CurrentToken->isOneOf(tok::star, tok::amp))
444           CurrentToken->Type = TT_PointerOrReference;
445         consumeToken();
446         if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
447           CurrentToken->Previous->Type = TT_OverloadedOperator;
448       }
449       if (CurrentToken) {
450         CurrentToken->Type = TT_OverloadedOperatorLParen;
451         if (CurrentToken->Previous->Type == TT_BinaryOperator)
452           CurrentToken->Previous->Type = TT_OverloadedOperator;
453       }
454       break;
455     case tok::question:
456       parseConditional();
457       break;
458     case tok::kw_template:
459       parseTemplateDeclaration();
460       break;
461     case tok::identifier:
462       if (Line.First->is(tok::kw_for) &&
463           Tok->Tok.getIdentifierInfo() == &Ident_in)
464         Tok->Type = TT_ObjCForIn;
465       break;
466     case tok::comma:
467       if (Contexts.back().FirstStartOfName)
468         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
469       if (Contexts.back().InCtorInitializer)
470         Tok->Type = TT_CtorInitializerComma;
471       if (Contexts.back().IsForEachMacro)
472         Contexts.back().IsExpression = true;
473       break;
474     default:
475       break;
476     }
477     return true;
478   }
479 
480   void parseIncludeDirective() {
481     next();
482     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
483       next();
484       while (CurrentToken != NULL) {
485         if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
486           CurrentToken->Type = TT_ImplicitStringLiteral;
487         next();
488       }
489     } else {
490       while (CurrentToken != NULL) {
491         if (CurrentToken->is(tok::string_literal))
492           // Mark these string literals as "implicit" literals, too, so that
493           // they are not split or line-wrapped.
494           CurrentToken->Type = TT_ImplicitStringLiteral;
495         next();
496       }
497     }
498   }
499 
500   void parseWarningOrError() {
501     next();
502     // We still want to format the whitespace left of the first token of the
503     // warning or error.
504     next();
505     while (CurrentToken != NULL) {
506       CurrentToken->Type = TT_ImplicitStringLiteral;
507       next();
508     }
509   }
510 
511   void parsePragma() {
512     next(); // Consume "pragma".
513     if (CurrentToken && CurrentToken->TokenText == "mark") {
514       next(); // Consume "mark".
515       next(); // Consume first token (so we fix leading whitespace).
516       while (CurrentToken != NULL) {
517         CurrentToken->Type = TT_ImplicitStringLiteral;
518         next();
519       }
520     }
521   }
522 
523   void parsePreprocessorDirective() {
524     next();
525     if (CurrentToken == NULL)
526       return;
527     if (CurrentToken->Tok.is(tok::numeric_constant)) {
528       CurrentToken->SpacesRequiredBefore = 1;
529       return;
530     }
531     // Hashes in the middle of a line can lead to any strange token
532     // sequence.
533     if (CurrentToken->Tok.getIdentifierInfo() == NULL)
534       return;
535     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
536     case tok::pp_include:
537     case tok::pp_import:
538       parseIncludeDirective();
539       break;
540     case tok::pp_error:
541     case tok::pp_warning:
542       parseWarningOrError();
543       break;
544     case tok::pp_pragma:
545       parsePragma();
546       break;
547     case tok::pp_if:
548     case tok::pp_elif:
549       Contexts.back().IsExpression = true;
550       parseLine();
551       break;
552     default:
553       break;
554     }
555     while (CurrentToken != NULL)
556       next();
557   }
558 
559 public:
560   LineType parseLine() {
561     if (CurrentToken->is(tok::hash)) {
562       parsePreprocessorDirective();
563       return LT_PreprocessorDirective;
564     }
565 
566     // Directly allow to 'import <string-literal>' to support protocol buffer
567     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
568     // should not break the line).
569     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
570     if (Info && Info->getPPKeywordID() == tok::pp_import &&
571         CurrentToken->Next && CurrentToken->Next->is(tok::string_literal))
572       parseIncludeDirective();
573 
574     while (CurrentToken != NULL) {
575       if (CurrentToken->is(tok::kw_virtual))
576         KeywordVirtualFound = true;
577       if (!consumeToken())
578         return LT_Invalid;
579     }
580     if (KeywordVirtualFound)
581       return LT_VirtualFunctionDecl;
582 
583     if (Line.First->Type == TT_ObjCMethodSpecifier) {
584       if (Contexts.back().FirstObjCSelectorName != NULL)
585         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
586             Contexts.back().LongestObjCSelectorName;
587       return LT_ObjCMethodDecl;
588     }
589 
590     return LT_Other;
591   }
592 
593 private:
594   void resetTokenMetadata(FormatToken *Token) {
595     if (Token == nullptr) return;
596 
597     // Reset token type in case we have already looked at it and then
598     // recovered from an error (e.g. failure to find the matching >).
599     if (CurrentToken->Type != TT_LambdaLSquare &&
600         CurrentToken->Type != TT_FunctionLBrace &&
601         CurrentToken->Type != TT_ImplicitStringLiteral &&
602         CurrentToken->Type != TT_TrailingReturnArrow)
603       CurrentToken->Type = TT_Unknown;
604     if (CurrentToken->Role)
605       CurrentToken->Role.reset(NULL);
606     CurrentToken->FakeLParens.clear();
607     CurrentToken->FakeRParens = 0;
608   }
609 
610   void next() {
611     if (CurrentToken != NULL) {
612       determineTokenType(*CurrentToken);
613       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
614       CurrentToken->NestingLevel = Contexts.size() - 1;
615     }
616 
617     if (CurrentToken != NULL)
618       CurrentToken = CurrentToken->Next;
619 
620     resetTokenMetadata(CurrentToken);
621   }
622 
623   /// \brief A struct to hold information valid in a specific context, e.g.
624   /// a pair of parenthesis.
625   struct Context {
626     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
627             bool IsExpression)
628         : ContextKind(ContextKind), BindingStrength(BindingStrength),
629           LongestObjCSelectorName(0), NumBlockParameters(0),
630           ColonIsForRangeExpr(false), ColonIsDictLiteral(false),
631           ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
632           FirstStartOfName(NULL), IsExpression(IsExpression),
633           CanBeExpression(true), InTemplateArgument(false),
634           InCtorInitializer(false), CaretFound(false), IsForEachMacro(false) {}
635 
636     tok::TokenKind ContextKind;
637     unsigned BindingStrength;
638     unsigned LongestObjCSelectorName;
639     unsigned NumBlockParameters;
640     bool ColonIsForRangeExpr;
641     bool ColonIsDictLiteral;
642     bool ColonIsObjCMethodExpr;
643     FormatToken *FirstObjCSelectorName;
644     FormatToken *FirstStartOfName;
645     bool IsExpression;
646     bool CanBeExpression;
647     bool InTemplateArgument;
648     bool InCtorInitializer;
649     bool CaretFound;
650     bool IsForEachMacro;
651   };
652 
653   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
654   /// of each instance.
655   struct ScopedContextCreator {
656     AnnotatingParser &P;
657 
658     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
659                          unsigned Increase)
660         : P(P) {
661       P.Contexts.push_back(Context(ContextKind,
662                                    P.Contexts.back().BindingStrength + Increase,
663                                    P.Contexts.back().IsExpression));
664     }
665 
666     ~ScopedContextCreator() { P.Contexts.pop_back(); }
667   };
668 
669   void determineTokenType(FormatToken &Current) {
670     if (Current.getPrecedence() == prec::Assignment &&
671         !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
672         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
673       Contexts.back().IsExpression = true;
674       for (FormatToken *Previous = Current.Previous;
675            Previous && !Previous->isOneOf(tok::comma, tok::semi);
676            Previous = Previous->Previous) {
677         if (Previous->is(tok::r_square))
678           Previous = Previous->MatchingParen;
679         if (Previous->Type == TT_BinaryOperator &&
680             Previous->isOneOf(tok::star, tok::amp)) {
681           Previous->Type = TT_PointerOrReference;
682         }
683       }
684     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
685       Contexts.back().IsExpression = true;
686     } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
687                !Line.InPPDirective) {
688       bool ParametersOfFunctionType =
689           Current.Previous && Current.Previous->is(tok::r_paren) &&
690           Current.Previous->MatchingParen &&
691           Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen;
692       bool IsForOrCatch = Current.Previous &&
693                           Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
694       Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
695     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
696       for (FormatToken *Previous = Current.Previous;
697            Previous && Previous->isOneOf(tok::star, tok::amp);
698            Previous = Previous->Previous)
699         Previous->Type = TT_PointerOrReference;
700     } else if (Current.Previous &&
701                Current.Previous->Type == TT_CtorInitializerColon) {
702       Contexts.back().IsExpression = true;
703       Contexts.back().InCtorInitializer = true;
704     } else if (Current.is(tok::kw_new)) {
705       Contexts.back().CanBeExpression = false;
706     } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
707       // This should be the condition or increment in a for-loop.
708       Contexts.back().IsExpression = true;
709     }
710 
711     if (Current.Type == TT_Unknown) {
712       // Line.MightBeFunctionDecl can only be true after the parentheses of a
713       // function declaration have been found. In this case, 'Current' is a
714       // trailing token of this declaration and thus cannot be a name.
715       if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
716         Contexts.back().FirstStartOfName = &Current;
717         Current.Type = TT_StartOfName;
718       } else if (Current.is(tok::kw_auto)) {
719         AutoFound = true;
720       } else if (Current.is(tok::arrow) && AutoFound &&
721                  Line.MustBeDeclaration) {
722         Current.Type = TT_TrailingReturnArrow;
723       } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
724         Current.Type =
725             determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
726                                                Contexts.back().IsExpression,
727                                   Contexts.back().InTemplateArgument);
728       } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
729         Current.Type = determinePlusMinusCaretUsage(Current);
730         if (Current.Type == TT_UnaryOperator) {
731           ++Contexts.back().NumBlockParameters;
732           if (Current.is(tok::caret))
733             Contexts.back().CaretFound = true;
734         }
735       } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
736         Current.Type = determineIncrementUsage(Current);
737       } else if (Current.is(tok::exclaim)) {
738         Current.Type = TT_UnaryOperator;
739       } else if (Current.isBinaryOperator() &&
740                  (!Current.Previous ||
741                   Current.Previous->isNot(tok::l_square))) {
742         Current.Type = TT_BinaryOperator;
743       } else if (Current.is(tok::comment)) {
744         if (Current.TokenText.startswith("//"))
745           Current.Type = TT_LineComment;
746         else
747           Current.Type = TT_BlockComment;
748       } else if (Current.is(tok::r_paren)) {
749         FormatToken *LeftOfParens = NULL;
750         if (Current.MatchingParen)
751           LeftOfParens = Current.MatchingParen->getPreviousNonComment();
752         bool IsCast = false;
753         bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
754         bool ParensAreType = !Current.Previous ||
755                              Current.Previous->Type == TT_PointerOrReference ||
756                              Current.Previous->Type == TT_TemplateCloser ||
757                              Current.Previous->isSimpleTypeSpecifier();
758         bool ParensCouldEndDecl =
759             Current.Next &&
760             Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
761         bool IsSizeOfOrAlignOf =
762             LeftOfParens &&
763             LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
764         if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
765             ((Contexts.size() > 1 &&
766               Contexts[Contexts.size() - 2].IsExpression) ||
767              (Current.Next && Current.Next->isBinaryOperator())))
768           IsCast = true;
769         if (Current.Next && Current.Next->isNot(tok::string_literal) &&
770             (Current.Next->Tok.isLiteral() ||
771              Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
772           IsCast = true;
773         // If there is an identifier after the (), it is likely a cast, unless
774         // there is also an identifier before the ().
775         if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
776                              LeftOfParens->is(tok::kw_return)) &&
777             LeftOfParens->Type != TT_OverloadedOperator &&
778             LeftOfParens->isNot(tok::at) &&
779             LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
780             Current.Next->is(tok::identifier))
781           IsCast = true;
782         if (IsCast && !ParensAreEmpty)
783           Current.Type = TT_CastRParen;
784       } else if (Current.is(tok::at) && Current.Next) {
785         switch (Current.Next->Tok.getObjCKeywordID()) {
786         case tok::objc_interface:
787         case tok::objc_implementation:
788         case tok::objc_protocol:
789           Current.Type = TT_ObjCDecl;
790           break;
791         case tok::objc_property:
792           Current.Type = TT_ObjCProperty;
793           break;
794         default:
795           break;
796         }
797       } else if (Current.is(tok::period)) {
798         FormatToken *PreviousNoComment = Current.getPreviousNonComment();
799         if (PreviousNoComment &&
800             PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
801           Current.Type = TT_DesignatedInitializerPeriod;
802       } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
803                  Line.MightBeFunctionDecl && Contexts.size() == 1) {
804         // Line.MightBeFunctionDecl can only be true after the parentheses of a
805         // function declaration have been found.
806         Current.Type = TT_TrailingAnnotation;
807       }
808     }
809   }
810 
811   /// \brief Take a guess at whether \p Tok starts a name of a function or
812   /// variable declaration.
813   ///
814   /// This is a heuristic based on whether \p Tok is an identifier following
815   /// something that is likely a type.
816   bool isStartOfName(const FormatToken &Tok) {
817     if (Tok.isNot(tok::identifier) || Tok.Previous == NULL)
818       return false;
819 
820     // Skip "const" as it does not have an influence on whether this is a name.
821     FormatToken *PreviousNotConst = Tok.Previous;
822     while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const))
823       PreviousNotConst = PreviousNotConst->Previous;
824 
825     if (PreviousNotConst == NULL)
826       return false;
827 
828     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
829                        PreviousNotConst->Previous &&
830                        PreviousNotConst->Previous->is(tok::hash);
831 
832     if (PreviousNotConst->Type == TT_TemplateCloser)
833       return PreviousNotConst && PreviousNotConst->MatchingParen &&
834              PreviousNotConst->MatchingParen->Previous &&
835              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
836 
837     return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
838            PreviousNotConst->Type == TT_PointerOrReference ||
839            PreviousNotConst->isSimpleTypeSpecifier();
840   }
841 
842   /// \brief Return the type of the given token assuming it is * or &.
843   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
844                                   bool InTemplateArgument) {
845     const FormatToken *PrevToken = Tok.getPreviousNonComment();
846     if (PrevToken == NULL)
847       return TT_UnaryOperator;
848 
849     const FormatToken *NextToken = Tok.getNextNonComment();
850     if (NextToken == NULL)
851       return TT_Unknown;
852 
853     if (PrevToken->is(tok::coloncolon) ||
854         (PrevToken->is(tok::l_paren) && !IsExpression))
855       return TT_PointerOrReference;
856 
857     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
858                            tok::comma, tok::semi, tok::kw_return, tok::colon,
859                            tok::equal, tok::kw_delete, tok::kw_sizeof) ||
860         PrevToken->Type == TT_BinaryOperator ||
861         PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
862       return TT_UnaryOperator;
863 
864     if (NextToken->is(tok::l_square))
865       return TT_PointerOrReference;
866 
867     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
868         PrevToken->MatchingParen->Previous &&
869         PrevToken->MatchingParen->Previous->is(tok::kw_typeof))
870       return TT_PointerOrReference;
871 
872     if (PrevToken->Tok.isLiteral() ||
873         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
874                            tok::kw_false) ||
875         NextToken->Tok.isLiteral() ||
876         NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
877         NextToken->isUnaryOperator() ||
878         // If we know we're in a template argument, there are no named
879         // declarations. Thus, having an identifier on the right-hand side
880         // indicates a binary operator.
881         (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
882       return TT_BinaryOperator;
883 
884     // It is very unlikely that we are going to find a pointer or reference type
885     // definition on the RHS of an assignment.
886     if (IsExpression)
887       return TT_BinaryOperator;
888 
889     return TT_PointerOrReference;
890   }
891 
892   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
893     const FormatToken *PrevToken = Tok.getPreviousNonComment();
894     if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
895       return TT_UnaryOperator;
896 
897     // Use heuristics to recognize unary operators.
898     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
899                            tok::question, tok::colon, tok::kw_return,
900                            tok::kw_case, tok::at, tok::l_brace))
901       return TT_UnaryOperator;
902 
903     // There can't be two consecutive binary operators.
904     if (PrevToken->Type == TT_BinaryOperator)
905       return TT_UnaryOperator;
906 
907     // Fall back to marking the token as binary operator.
908     return TT_BinaryOperator;
909   }
910 
911   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
912   TokenType determineIncrementUsage(const FormatToken &Tok) {
913     const FormatToken *PrevToken = Tok.getPreviousNonComment();
914     if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
915       return TT_UnaryOperator;
916     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
917       return TT_TrailingUnaryOperator;
918 
919     return TT_UnaryOperator;
920   }
921 
922 
923   SmallVector<Context, 8> Contexts;
924 
925   const FormatStyle &Style;
926   AnnotatedLine &Line;
927   FormatToken *CurrentToken;
928   bool KeywordVirtualFound;
929   bool AutoFound;
930   IdentifierInfo &Ident_in;
931 };
932 
933 static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
934 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
935 
936 /// \brief Parses binary expressions by inserting fake parenthesis based on
937 /// operator precedence.
938 class ExpressionParser {
939 public:
940   ExpressionParser(AnnotatedLine &Line) : Current(Line.First) {
941     // Skip leading "}", e.g. in "} else if (...) {".
942     if (Current->is(tok::r_brace))
943       next();
944   }
945 
946   /// \brief Parse expressions with the given operatore precedence.
947   void parse(int Precedence = 0) {
948     // Skip 'return' and ObjC selector colons as they are not part of a binary
949     // expression.
950     while (Current &&
951            (Current->is(tok::kw_return) ||
952             (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr)))
953       next();
954 
955     if (Current == NULL || Precedence > PrecedenceArrowAndPeriod)
956       return;
957 
958     // Conditional expressions need to be parsed separately for proper nesting.
959     if (Precedence == prec::Conditional) {
960       parseConditionalExpr();
961       return;
962     }
963 
964     // Parse unary operators, which all have a higher precedence than binary
965     // operators.
966     if (Precedence == PrecedenceUnaryOperator) {
967       parseUnaryOperator();
968       return;
969     }
970 
971     FormatToken *Start = Current;
972     FormatToken *LatestOperator = NULL;
973 
974     while (Current) {
975       // Consume operators with higher precedence.
976       parse(Precedence + 1);
977 
978       int CurrentPrecedence = getCurrentPrecedence();
979 
980       if (Current && Current->Type == TT_ObjCSelectorName &&
981           Precedence == CurrentPrecedence) {
982         if (LatestOperator)
983           addFakeParenthesis(Start, prec::Level(Precedence));
984         Start = Current;
985       }
986 
987       // At the end of the line or when an operator with higher precedence is
988       // found, insert fake parenthesis and return.
989       if (Current == NULL || Current->closesScope() ||
990           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) {
991         if (LatestOperator) {
992           if (Precedence == PrecedenceArrowAndPeriod) {
993             LatestOperator->LastInChainOfCalls = true;
994             // Call expressions don't have a binary operator precedence.
995             addFakeParenthesis(Start, prec::Unknown);
996           } else {
997             addFakeParenthesis(Start, prec::Level(Precedence));
998           }
999         }
1000         return;
1001       }
1002 
1003       // Consume scopes: (), [], <> and {}
1004       if (Current->opensScope()) {
1005         while (Current && !Current->closesScope()) {
1006           next();
1007           parse();
1008         }
1009         next();
1010       } else {
1011         // Operator found.
1012         if (CurrentPrecedence == Precedence)
1013           LatestOperator = Current;
1014 
1015         next();
1016       }
1017     }
1018   }
1019 
1020 private:
1021   /// \brief Gets the precedence (+1) of the given token for binary operators
1022   /// and other tokens that we treat like binary operators.
1023   int getCurrentPrecedence() {
1024     if (Current) {
1025       if (Current->Type == TT_ConditionalExpr)
1026         return prec::Conditional;
1027       else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
1028                Current->Type == TT_ObjCSelectorName)
1029         return 0;
1030       else if (Current->Type == TT_RangeBasedForLoopColon)
1031         return prec::Comma;
1032       else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
1033         return Current->getPrecedence();
1034       else if (Current->isOneOf(tok::period, tok::arrow))
1035         return PrecedenceArrowAndPeriod;
1036     }
1037     return -1;
1038   }
1039 
1040   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1041     Start->FakeLParens.push_back(Precedence);
1042     if (Precedence > prec::Unknown)
1043       Start->StartsBinaryExpression = true;
1044     if (Current) {
1045       ++Current->Previous->FakeRParens;
1046       if (Precedence > prec::Unknown)
1047         Current->Previous->EndsBinaryExpression = true;
1048     }
1049   }
1050 
1051   /// \brief Parse unary operator expressions and surround them with fake
1052   /// parentheses if appropriate.
1053   void parseUnaryOperator() {
1054     if (Current == NULL || Current->Type != TT_UnaryOperator) {
1055       parse(PrecedenceArrowAndPeriod);
1056       return;
1057     }
1058 
1059     FormatToken *Start = Current;
1060     next();
1061     parseUnaryOperator();
1062 
1063     // The actual precedence doesn't matter.
1064     addFakeParenthesis(Start, prec::Unknown);
1065   }
1066 
1067   void parseConditionalExpr() {
1068     FormatToken *Start = Current;
1069     parse(prec::LogicalOr);
1070     if (!Current || !Current->is(tok::question))
1071       return;
1072     next();
1073     parse(prec::LogicalOr);
1074     if (!Current || Current->Type != TT_ConditionalExpr)
1075       return;
1076     next();
1077     parseConditionalExpr();
1078     addFakeParenthesis(Start, prec::Conditional);
1079   }
1080 
1081   void next() {
1082     if (Current)
1083       Current = Current->Next;
1084     while (Current && Current->isTrailingComment())
1085       Current = Current->Next;
1086   }
1087 
1088   FormatToken *Current;
1089 };
1090 
1091 } // end anonymous namespace
1092 
1093 void
1094 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
1095   const AnnotatedLine *NextNonCommentLine = NULL;
1096   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1097                                                           E = Lines.rend();
1098        I != E; ++I) {
1099     if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1100         (*I)->First->Next == NULL)
1101       (*I)->Level = NextNonCommentLine->Level;
1102     else
1103       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL;
1104 
1105     setCommentLineLevels((*I)->Children);
1106   }
1107 }
1108 
1109 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1110   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1111                                                   E = Line.Children.end();
1112        I != E; ++I) {
1113     annotate(**I);
1114   }
1115   AnnotatingParser Parser(Style, Line, Ident_in);
1116   Line.Type = Parser.parseLine();
1117   if (Line.Type == LT_Invalid)
1118     return;
1119 
1120   ExpressionParser ExprParser(Line);
1121   ExprParser.parse();
1122 
1123   if (Line.First->Type == TT_ObjCMethodSpecifier)
1124     Line.Type = LT_ObjCMethodDecl;
1125   else if (Line.First->Type == TT_ObjCDecl)
1126     Line.Type = LT_ObjCDecl;
1127   else if (Line.First->Type == TT_ObjCProperty)
1128     Line.Type = LT_ObjCProperty;
1129 
1130   Line.First->SpacesRequiredBefore = 1;
1131   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1132 }
1133 
1134 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1135   Line.First->TotalLength =
1136       Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1137   if (!Line.First->Next)
1138     return;
1139   FormatToken *Current = Line.First->Next;
1140   bool InFunctionDecl = Line.MightBeFunctionDecl;
1141   while (Current != NULL) {
1142     if (Current->Type == TT_LineComment) {
1143       if (Current->Previous->BlockKind == BK_BracedInit &&
1144           Current->Previous->opensScope())
1145         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1146       else
1147         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1148 
1149       // If we find a trailing comment, iterate backwards to determine whether
1150       // it seems to relate to a specific parameter. If so, break before that
1151       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
1152       // to the previous line in:
1153       //   SomeFunction(a,
1154       //                b, // comment
1155       //                c);
1156       if (!Current->HasUnescapedNewline) {
1157         for (FormatToken *Parameter = Current->Previous; Parameter;
1158              Parameter = Parameter->Previous) {
1159           if (Parameter->isOneOf(tok::comment, tok::r_brace))
1160             break;
1161           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
1162             if (Parameter->Previous->Type != TT_CtorInitializerComma &&
1163                 Parameter->HasUnescapedNewline)
1164               Parameter->MustBreakBefore = true;
1165             break;
1166           }
1167         }
1168       }
1169     } else if (Current->SpacesRequiredBefore == 0 &&
1170              spaceRequiredBefore(Line, *Current)) {
1171       Current->SpacesRequiredBefore = 1;
1172     }
1173 
1174     Current->MustBreakBefore =
1175         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1176 
1177     Current->CanBreakBefore =
1178         Current->MustBreakBefore || canBreakBefore(Line, *Current);
1179     if (Current->MustBreakBefore || !Current->Children.empty() ||
1180         Current->IsMultiline)
1181       Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
1182     else
1183       Current->TotalLength = Current->Previous->TotalLength +
1184                              Current->ColumnWidth +
1185                              Current->SpacesRequiredBefore;
1186 
1187     if (Current->Type == TT_CtorInitializerColon)
1188       InFunctionDecl = false;
1189 
1190     // FIXME: Only calculate this if CanBreakBefore is true once static
1191     // initializers etc. are sorted out.
1192     // FIXME: Move magic numbers to a better place.
1193     Current->SplitPenalty = 20 * Current->BindingStrength +
1194                             splitPenalty(Line, *Current, InFunctionDecl);
1195 
1196     Current = Current->Next;
1197   }
1198 
1199   calculateUnbreakableTailLengths(Line);
1200   for (Current = Line.First; Current != NULL; Current = Current->Next) {
1201     if (Current->Role)
1202       Current->Role->precomputeFormattingInfos(Current);
1203   }
1204 
1205   DEBUG({ printDebugInfo(Line); });
1206 
1207   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1208                                                   E = Line.Children.end();
1209        I != E; ++I) {
1210     calculateFormattingInformation(**I);
1211   }
1212 }
1213 
1214 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1215   unsigned UnbreakableTailLength = 0;
1216   FormatToken *Current = Line.Last;
1217   while (Current != NULL) {
1218     Current->UnbreakableTailLength = UnbreakableTailLength;
1219     if (Current->CanBreakBefore ||
1220         Current->isOneOf(tok::comment, tok::string_literal)) {
1221       UnbreakableTailLength = 0;
1222     } else {
1223       UnbreakableTailLength +=
1224           Current->ColumnWidth + Current->SpacesRequiredBefore;
1225     }
1226     Current = Current->Previous;
1227   }
1228 }
1229 
1230 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1231                                       const FormatToken &Tok,
1232                                       bool InFunctionDecl) {
1233   const FormatToken &Left = *Tok.Previous;
1234   const FormatToken &Right = Tok;
1235 
1236   if (Left.is(tok::semi))
1237     return 0;
1238   if (Left.is(tok::comma))
1239     return 1;
1240   if (Right.is(tok::l_square)) {
1241     if (Style.Language == FormatStyle::LK_Proto)
1242       return 1;
1243     if (Right.Type != TT_ObjCMethodExpr)
1244       return 250;
1245   }
1246   if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) {
1247     if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1248       return 3;
1249     if (Left.Type == TT_StartOfName)
1250       return 20;
1251     if (InFunctionDecl && Right.NestingLevel == 0)
1252       return Style.PenaltyReturnTypeOnItsOwnLine;
1253     return 200;
1254   }
1255   if (Left.is(tok::equal) && Right.is(tok::l_brace))
1256     return 150;
1257   if (Left.Type == TT_CastRParen)
1258     return 100;
1259   if (Left.is(tok::coloncolon) ||
1260       (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1261     return 500;
1262   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1263     return 5000;
1264 
1265   if (Left.Type == TT_RangeBasedForLoopColon ||
1266       Left.Type == TT_InheritanceColon)
1267     return 2;
1268 
1269   if (Right.isMemberAccess()) {
1270     if (Left.is(tok::r_paren) && Left.MatchingParen &&
1271         Left.MatchingParen->ParameterCount > 0)
1272       return 20; // Should be smaller than breaking at a nested comma.
1273     return 150;
1274   }
1275 
1276   if (Right.Type == TT_TrailingAnnotation && Right.Next &&
1277       Right.Next->isNot(tok::l_paren)) {
1278     // Generally, breaking before a trailing annotation is bad unless it is
1279     // function-like. It seems to be especially preferable to keep standard
1280     // annotations (i.e. "const", "final" and "override") on the same line.
1281     // Use a slightly higher penalty after ")" so that annotations like
1282     // "const override" are kept together.
1283     bool is_standard_annotation = Right.is(tok::kw_const) ||
1284                                   Right.TokenText == "override" ||
1285                                   Right.TokenText == "final";
1286     return (Left.is(tok::r_paren) ? 100 : 120) +
1287            (is_standard_annotation ? 50 : 0);
1288   }
1289 
1290   // In for-loops, prefer breaking at ',' and ';'.
1291   if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
1292     return 4;
1293 
1294   // In Objective-C method expressions, prefer breaking before "param:" over
1295   // breaking after it.
1296   if (Right.Type == TT_ObjCSelectorName)
1297     return 0;
1298   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1299     return Line.MightBeFunctionDecl ? 50 : 500;
1300 
1301   if (Left.is(tok::l_paren) && InFunctionDecl)
1302     return 100;
1303   if (Left.is(tok::equal) && InFunctionDecl)
1304     return 110;
1305   if (Left.opensScope())
1306     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1307                                    : 19;
1308 
1309   if (Right.is(tok::lessless)) {
1310     if (Left.is(tok::string_literal)) {
1311       StringRef Content = Left.TokenText;
1312       if (Content.startswith("\""))
1313         Content = Content.drop_front(1);
1314       if (Content.endswith("\""))
1315         Content = Content.drop_back(1);
1316       Content = Content.trim();
1317       if (Content.size() > 1 &&
1318           (Content.back() == ':' || Content.back() == '='))
1319         return 25;
1320     }
1321     return 1; // Breaking at a << is really cheap.
1322   }
1323   if (Left.Type == TT_ConditionalExpr)
1324     return prec::Conditional;
1325   prec::Level Level = Left.getPrecedence();
1326 
1327   if (Level != prec::Unknown)
1328     return Level;
1329 
1330   return 3;
1331 }
1332 
1333 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1334                                           const FormatToken &Left,
1335                                           const FormatToken &Right) {
1336   if (Style.Language == FormatStyle::LK_Proto) {
1337     if (Right.is(tok::l_paren) &&
1338         (Left.TokenText == "returns" || Left.TokenText == "option"))
1339       return true;
1340   }
1341   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
1342       Left.Tok.getObjCKeywordID() == tok::objc_property)
1343     return true;
1344   if (Right.is(tok::hashhash))
1345     return Left.is(tok::hash);
1346   if (Left.isOneOf(tok::hashhash, tok::hash))
1347     return Right.is(tok::hash);
1348   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1349     return Style.SpaceInEmptyParentheses;
1350   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1351     return (Right.Type == TT_CastRParen ||
1352             (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
1353                ? Style.SpacesInCStyleCastParentheses
1354                : Style.SpacesInParentheses;
1355   if (Style.SpacesInAngles &&
1356       ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)))
1357     return true;
1358   if (Right.isOneOf(tok::semi, tok::comma))
1359     return false;
1360   if (Right.is(tok::less) &&
1361       (Left.is(tok::kw_template) ||
1362        (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1363     return true;
1364   if (Left.is(tok::arrow) || Right.is(tok::arrow))
1365     return false;
1366   if (Left.isOneOf(tok::exclaim, tok::tilde))
1367     return false;
1368   if (Left.is(tok::at) &&
1369       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1370                     tok::numeric_constant, tok::l_paren, tok::l_brace,
1371                     tok::kw_true, tok::kw_false))
1372     return false;
1373   if (Left.is(tok::coloncolon))
1374     return false;
1375   if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
1376     return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) ||
1377            !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren,
1378                          tok::r_paren, tok::less);
1379   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1380     return false;
1381   if (Right.is(tok::ellipsis))
1382     return Left.Tok.isLiteral();
1383   if (Left.is(tok::l_square) && Right.is(tok::amp))
1384     return false;
1385   if (Right.Type == TT_PointerOrReference)
1386     return Left.Tok.isLiteral() ||
1387            ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1388             !Style.PointerBindsToType);
1389   if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
1390       (Left.Type != TT_PointerOrReference || Style.PointerBindsToType))
1391     return true;
1392   if (Left.Type == TT_PointerOrReference)
1393     return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
1394            ((Right.Type != TT_PointerOrReference) &&
1395             Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
1396             Left.Previous &&
1397             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1398   if (Right.is(tok::star) && Left.is(tok::l_paren))
1399     return false;
1400   if (Left.is(tok::l_square))
1401     return Left.Type == TT_ArrayInitializerLSquare &&
1402            Style.SpacesInContainerLiterals && Right.isNot(tok::r_square);
1403   if (Right.is(tok::r_square))
1404     return Right.MatchingParen && Style.SpacesInContainerLiterals &&
1405            Right.MatchingParen->Type == TT_ArrayInitializerLSquare;
1406   if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
1407       Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant))
1408     return false;
1409   if (Left.is(tok::colon))
1410     return Left.Type != TT_ObjCMethodExpr;
1411   if (Right.is(tok::l_paren)) {
1412     if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen)
1413       return true;
1414     return Line.Type == LT_ObjCDecl ||
1415            Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete,
1416                         tok::semi) ||
1417            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
1418             (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while,
1419                           tok::kw_switch, tok::kw_catch) ||
1420              Left.IsForEachMacro)) ||
1421            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
1422             Left.isOneOf(tok::identifier, tok::kw___attribute) &&
1423             Line.Type != LT_PreprocessorDirective);
1424   }
1425   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1426     return false;
1427   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1428     return !Left.Children.empty(); // No spaces in "{}".
1429   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
1430       (Right.is(tok::r_brace) && Right.MatchingParen &&
1431        Right.MatchingParen->BlockKind != BK_Block))
1432     return !Style.Cpp11BracedListStyle;
1433   if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/"))
1434     return false;
1435   if (Right.Type == TT_UnaryOperator)
1436     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1437            (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
1438   if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) &&
1439       Right.is(tok::l_brace) && Right.getNextNonComment() &&
1440       Right.BlockKind != BK_Block)
1441     return false;
1442   if (Left.is(tok::period) || Right.is(tok::period))
1443     return false;
1444   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1445     return false;
1446   return true;
1447 }
1448 
1449 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1450                                          const FormatToken &Tok) {
1451   if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo())
1452     return true; // Never ever merge two identifiers.
1453   if (Tok.Previous->Type == TT_ImplicitStringLiteral)
1454     return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd();
1455   if (Line.Type == LT_ObjCMethodDecl) {
1456     if (Tok.Previous->Type == TT_ObjCMethodSpecifier)
1457       return true;
1458     if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier))
1459       // Don't space between ')' and <id>
1460       return false;
1461   }
1462   if (Line.Type == LT_ObjCProperty &&
1463       (Tok.is(tok::equal) || Tok.Previous->is(tok::equal)))
1464     return false;
1465 
1466   if (Tok.Type == TT_TrailingReturnArrow ||
1467       Tok.Previous->Type == TT_TrailingReturnArrow)
1468     return true;
1469   if (Tok.Previous->is(tok::comma))
1470     return true;
1471   if (Tok.is(tok::comma))
1472     return false;
1473   if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1474     return true;
1475   if (Tok.Previous->Tok.is(tok::kw_operator))
1476     return Tok.is(tok::coloncolon);
1477   if (Tok.Type == TT_OverloadedOperatorLParen)
1478     return false;
1479   if (Tok.is(tok::colon))
1480     return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
1481            Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr &&
1482            !Tok.Previous->is(tok::question) &&
1483            (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals);
1484   if (Tok.Previous->Type == TT_UnaryOperator ||
1485       Tok.Previous->Type == TT_CastRParen)
1486     return Tok.Type == TT_BinaryOperator;
1487   if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) {
1488     return Tok.Type == TT_TemplateCloser &&
1489            Tok.Previous->Type == TT_TemplateCloser &&
1490            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
1491   }
1492   if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1493       Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar))
1494     return false;
1495   if (!Style.SpaceBeforeAssignmentOperators &&
1496       Tok.getPrecedence() == prec::Assignment)
1497     return false;
1498   if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) ||
1499       Tok.Previous->Type == TT_BinaryOperator)
1500     return true;
1501   if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1502     return false;
1503   if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) &&
1504       Line.First->is(tok::hash))
1505     return true;
1506   if (Tok.Type == TT_TrailingUnaryOperator)
1507     return false;
1508   return spaceRequiredBetween(Line, *Tok.Previous, Tok);
1509 }
1510 
1511 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
1512                                      const FormatToken &Right) {
1513   const FormatToken &Left = *Right.Previous;
1514   if (Right.is(tok::comment)) {
1515     return Right.Previous->BlockKind != BK_BracedInit &&
1516            Right.Previous->Type != TT_CtorInitializerColon &&
1517            Right.NewlinesBefore > 0;
1518   } else if (Right.Previous->isTrailingComment() ||
1519              (Right.isStringLiteral() && Right.Previous->isStringLiteral())) {
1520     return true;
1521   } else if (Right.Previous->IsUnterminatedLiteral) {
1522     return true;
1523   } else if (Right.is(tok::lessless) && Right.Next &&
1524              Right.Previous->is(tok::string_literal) &&
1525              Right.Next->is(tok::string_literal)) {
1526     return true;
1527   } else if (Right.Previous->ClosesTemplateDeclaration &&
1528              Right.Previous->MatchingParen &&
1529              Right.Previous->MatchingParen->NestingLevel == 0 &&
1530              Style.AlwaysBreakTemplateDeclarations) {
1531     return true;
1532   } else if ((Right.Type == TT_CtorInitializerComma ||
1533               Right.Type == TT_CtorInitializerColon) &&
1534              Style.BreakConstructorInitializersBeforeComma &&
1535              !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
1536     return true;
1537   } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) {
1538     return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1539            Style.BreakBeforeBraces == FormatStyle::BS_GNU;
1540   } else if (Right.is(tok::string_literal) &&
1541              Right.TokenText.startswith("R\"")) {
1542     // Raw string literals are special wrt. line breaks. The author has made a
1543     // deliberate choice and might have aligned the contents of the string
1544     // literal accordingly. Thus, we try keep existing line breaks.
1545     return Right.NewlinesBefore > 0;
1546   } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
1547              Style.Language == FormatStyle::LK_Proto) {
1548     // Don't enums onto single lines in protocol buffers.
1549     return true;
1550   } else if ((Left.is(tok::l_brace) && Left.MatchingParen &&
1551               Left.MatchingParen->Previous &&
1552               Left.MatchingParen->Previous->is(tok::comma)) ||
1553              (Right.is(tok::r_brace) && Left.is(tok::comma))) {
1554     // If the last token before a '}' is a comma, the intention is to insert a
1555     // line break after it in order to make shuffling around entries easier.
1556     return true;
1557   }
1558   return false;
1559 }
1560 
1561 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1562                                     const FormatToken &Right) {
1563   const FormatToken &Left = *Right.Previous;
1564   if (Left.is(tok::at))
1565     return false;
1566   if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator))
1567     return true;
1568   if (Right.isTrailingComment())
1569     // We rely on MustBreakBefore being set correctly here as we should not
1570     // change the "binding" behavior of a comment.
1571     // The first comment in a braced lists is always interpreted as belonging to
1572     // the first list element. Otherwise, it should be placed outside of the
1573     // list.
1574     return Left.BlockKind == BK_BracedInit;
1575   if (Left.is(tok::question) && Right.is(tok::colon))
1576     return false;
1577   if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1578     return Style.BreakBeforeTernaryOperators;
1579   if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
1580     return !Style.BreakBeforeTernaryOperators;
1581   if (Right.is(tok::colon) &&
1582       (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr))
1583     return false;
1584   if (Right.Type == TT_InheritanceColon)
1585     return true;
1586   if (Left.is(tok::colon) &&
1587       (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
1588     return true;
1589   if (Right.Type == TT_ObjCSelectorName)
1590     return true;
1591   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
1592     return true;
1593   if (Left.ClosesTemplateDeclaration)
1594     return true;
1595   if (Right.Type == TT_RangeBasedForLoopColon ||
1596       Right.Type == TT_OverloadedOperatorLParen ||
1597       Right.Type == TT_OverloadedOperator)
1598     return false;
1599   if (Left.Type == TT_RangeBasedForLoopColon)
1600     return true;
1601   if (Right.Type == TT_RangeBasedForLoopColon)
1602     return false;
1603   if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1604       Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
1605     return false;
1606   if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1607     return false;
1608   if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen)
1609     return false;
1610   if (Left.is(tok::l_paren) && Left.Previous &&
1611       (Left.Previous->Type == TT_BinaryOperator ||
1612        Left.Previous->Type == TT_CastRParen))
1613     return false;
1614   if (Right.Type == TT_ImplicitStringLiteral)
1615     return false;
1616 
1617   if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
1618     return false;
1619 
1620   // We only break before r_brace if there was a corresponding break before
1621   // the l_brace, which is tracked by BreakBeforeClosingBrace.
1622   if (Right.is(tok::r_brace))
1623     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
1624 
1625   // Allow breaking after a trailing 'const', e.g. after a method declaration,
1626   // unless it is follow by ';', '{' or '='.
1627   if (Left.is(tok::kw_const) && Left.Previous != NULL &&
1628       Left.Previous->is(tok::r_paren))
1629     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1630 
1631   if (Right.is(tok::kw___attribute))
1632     return true;
1633 
1634   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1635     return true;
1636 
1637   if (Left.Type == TT_CtorInitializerComma &&
1638       Style.BreakConstructorInitializersBeforeComma)
1639     return false;
1640   if (Right.Type == TT_CtorInitializerComma &&
1641       Style.BreakConstructorInitializersBeforeComma)
1642     return true;
1643   if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators)
1644     return true;
1645   if (Left.is(tok::greater) && Right.is(tok::greater) &&
1646       Left.Type != TT_TemplateCloser)
1647     return false;
1648   if (Left.Type == TT_ArrayInitializerLSquare)
1649     return true;
1650   return (Left.isBinaryOperator() && Left.isNot(tok::lessless) &&
1651           !Style.BreakBeforeBinaryOperators) ||
1652          Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1653                       tok::kw_class, tok::kw_struct) ||
1654          Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon,
1655                        tok::l_square, tok::at) ||
1656          (Left.is(tok::r_paren) &&
1657           Right.isOneOf(tok::identifier, tok::kw_const)) ||
1658          (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1659 }
1660 
1661 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1662   llvm::errs() << "AnnotatedTokens:\n";
1663   const FormatToken *Tok = Line.First;
1664   while (Tok) {
1665     llvm::errs() << " M=" << Tok->MustBreakBefore
1666                  << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1667                  << " S=" << Tok->SpacesRequiredBefore
1668                  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
1669                  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
1670                  << " FakeLParens=";
1671     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1672       llvm::errs() << Tok->FakeLParens[i] << "/";
1673     llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1674     if (Tok->Next == NULL)
1675       assert(Tok == Line.Last);
1676     Tok = Tok->Next;
1677   }
1678   llvm::errs() << "----\n";
1679 }
1680 
1681 } // namespace format
1682 } // namespace clang
1683