1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/Support/Debug.h"
19 
20 namespace clang {
21 namespace format {
22 
23 namespace {
24 
25 /// \brief A parser that gathers additional information about tokens.
26 ///
27 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
28 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
29 /// into template parameter lists.
30 class AnnotatingParser {
31 public:
32   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
33                    IdentifierInfo &Ident_in)
34       : Style(Style), Line(Line), CurrentToken(Line.First),
35         KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) {
36     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
37   }
38 
39 private:
40   bool parseAngle() {
41     if (CurrentToken == NULL)
42       return false;
43     ScopedContextCreator ContextCreator(*this, tok::less, 10);
44     FormatToken *Left = CurrentToken->Previous;
45     Contexts.back().IsExpression = false;
46     while (CurrentToken != NULL) {
47       if (CurrentToken->is(tok::greater)) {
48         Left->MatchingParen = CurrentToken;
49         CurrentToken->MatchingParen = Left;
50         CurrentToken->Type = TT_TemplateCloser;
51         next();
52         return true;
53       }
54       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
55                                 tok::question, tok::colon))
56         return false;
57       // If a && or || is found and interpreted as a binary operator, this set
58       // of angles is likely part of something like "a < b && c > d". If the
59       // angles are inside an expression, the ||/&& might also be a binary
60       // operator that was misinterpreted because we are parsing template
61       // parameters.
62       // FIXME: This is getting out of hand, write a decent parser.
63       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
64           (CurrentToken->Previous->Type == TT_BinaryOperator ||
65            Contexts[Contexts.size() - 2].IsExpression) &&
66           Line.First->isNot(tok::kw_template))
67         return false;
68       updateParameterCount(Left, CurrentToken);
69       if (!consumeToken())
70         return false;
71     }
72     return false;
73   }
74 
75   bool parseParens(bool LookForDecls = false) {
76     if (CurrentToken == NULL)
77       return false;
78     bool AfterCaret = Contexts.back().CaretFound;
79     Contexts.back().CaretFound = false;
80 
81     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
82 
83     // FIXME: This is a bit of a hack. Do better.
84     Contexts.back().ColonIsForRangeExpr =
85         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
86 
87     bool StartsObjCMethodExpr = false;
88     FormatToken *Left = CurrentToken->Previous;
89     if (CurrentToken->is(tok::caret)) {
90       // (^ can start a block type.
91       Left->Type = TT_ObjCBlockLParen;
92     } else if (FormatToken *MaybeSel = Left->Previous) {
93       // @selector( starts a selector.
94       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
95           MaybeSel->Previous->is(tok::at)) {
96         StartsObjCMethodExpr = true;
97       }
98     }
99 
100     if (Left->Previous && Left->Previous->isOneOf(tok::kw_static_assert,
101                                                   tok::kw_if, tok::kw_while)) {
102       // static_assert, if and while usually contain expressions.
103       Contexts.back().IsExpression = true;
104     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
105                Left->Previous->MatchingParen &&
106                Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
107       // This is a parameter list of a lambda expression.
108       Contexts.back().IsExpression = false;
109     } else if (AfterCaret) {
110       // This is the parameter list of an ObjC block.
111       Contexts.back().IsExpression = false;
112     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
113       Left->Type = TT_AttributeParen;
114     }
115 
116     if (StartsObjCMethodExpr) {
117       Contexts.back().ColonIsObjCMethodExpr = true;
118       Left->Type = TT_ObjCMethodExpr;
119     }
120 
121     bool MightBeFunctionType = CurrentToken->is(tok::star);
122     bool HasMultipleLines = false;
123     bool HasMultipleParametersOnALine = false;
124     while (CurrentToken != NULL) {
125       // LookForDecls is set when "if (" has been seen. Check for
126       // 'identifier' '*' 'identifier' followed by not '=' -- this
127       // '*' has to be a binary operator but determineStarAmpUsage() will
128       // categorize it as an unary operator, so set the right type here.
129       if (LookForDecls && CurrentToken->Next) {
130         FormatToken *Prev = CurrentToken->getPreviousNonComment();
131         if (Prev) {
132           FormatToken *PrevPrev = Prev->getPreviousNonComment();
133           FormatToken *Next = CurrentToken->Next;
134           if (PrevPrev && PrevPrev->is(tok::identifier) &&
135               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
136               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
137             Prev->Type = TT_BinaryOperator;
138             LookForDecls = false;
139           }
140         }
141       }
142 
143       if (CurrentToken->Previous->Type == TT_PointerOrReference &&
144           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
145                                                     tok::coloncolon))
146         MightBeFunctionType = true;
147       if (CurrentToken->is(tok::r_paren)) {
148         if (MightBeFunctionType && CurrentToken->Next &&
149             (CurrentToken->Next->is(tok::l_paren) ||
150              (CurrentToken->Next->is(tok::l_square) &&
151               !Contexts.back().IsExpression)))
152           Left->Type = TT_FunctionTypeLParen;
153         Left->MatchingParen = CurrentToken;
154         CurrentToken->MatchingParen = Left;
155 
156         if (StartsObjCMethodExpr) {
157           CurrentToken->Type = TT_ObjCMethodExpr;
158           if (Contexts.back().FirstObjCSelectorName != NULL) {
159             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
160                 Contexts.back().LongestObjCSelectorName;
161           }
162         }
163 
164         if (Left->Type == TT_AttributeParen)
165           CurrentToken->Type = TT_AttributeParen;
166 
167         if (!HasMultipleLines)
168           Left->PackingKind = PPK_Inconclusive;
169         else if (HasMultipleParametersOnALine)
170           Left->PackingKind = PPK_BinPacked;
171         else
172           Left->PackingKind = PPK_OnePerLine;
173 
174         next();
175         return true;
176       }
177       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
178         return false;
179       else if (CurrentToken->is(tok::l_brace))
180         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
181       updateParameterCount(Left, CurrentToken);
182       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
183           !CurrentToken->Next->HasUnescapedNewline &&
184           !CurrentToken->Next->isTrailingComment())
185         HasMultipleParametersOnALine = true;
186       if (!consumeToken())
187         return false;
188       if (CurrentToken && CurrentToken->HasUnescapedNewline)
189         HasMultipleLines = true;
190     }
191     return false;
192   }
193 
194   bool parseSquare() {
195     if (!CurrentToken)
196       return false;
197 
198     // A '[' could be an index subscript (after an identifier or after
199     // ')' or ']'), it could be the start of an Objective-C method
200     // expression, or it could the the start of an Objective-C array literal.
201     FormatToken *Left = CurrentToken->Previous;
202     FormatToken *Parent = Left->getPreviousNonComment();
203     bool StartsObjCMethodExpr =
204         Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
205         (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
206                                     tok::kw_return, tok::kw_throw) ||
207          Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
208          Parent->Type == TT_CastRParen ||
209          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
210     ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
211     Contexts.back().IsExpression = true;
212     bool ColonFound = false;
213 
214     if (StartsObjCMethodExpr) {
215       Contexts.back().ColonIsObjCMethodExpr = true;
216       Left->Type = TT_ObjCMethodExpr;
217     } else if (Parent && Parent->is(tok::at)) {
218       Left->Type = TT_ArrayInitializerLSquare;
219     } else if (Left->Type == TT_Unknown) {
220       Left->Type = TT_ArraySubscriptLSquare;
221     }
222 
223     while (CurrentToken != NULL) {
224       if (CurrentToken->is(tok::r_square)) {
225         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
226             Left->Type == TT_ObjCMethodExpr) {
227           // An ObjC method call is rarely followed by an open parenthesis.
228           // FIXME: Do we incorrectly label ":" with this?
229           StartsObjCMethodExpr = false;
230           Left->Type = TT_Unknown;
231         }
232         if (StartsObjCMethodExpr) {
233           CurrentToken->Type = TT_ObjCMethodExpr;
234           // determineStarAmpUsage() thinks that '*' '[' is allocating an
235           // array of pointers, but if '[' starts a selector then '*' is a
236           // binary operator.
237           if (Parent != NULL && Parent->Type == TT_PointerOrReference)
238             Parent->Type = TT_BinaryOperator;
239         }
240         Left->MatchingParen = CurrentToken;
241         CurrentToken->MatchingParen = Left;
242         if (Contexts.back().FirstObjCSelectorName != NULL) {
243           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
244               Contexts.back().LongestObjCSelectorName;
245           if (Contexts.back().NumBlockParameters > 1)
246             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
247         }
248         next();
249         return true;
250       }
251       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
252         return false;
253       if (CurrentToken->is(tok::colon))
254         ColonFound = true;
255       if (CurrentToken->is(tok::comma) &&
256           Style.Language != FormatStyle::LK_Proto &&
257           (Left->Type == TT_ArraySubscriptLSquare ||
258            (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
259         Left->Type = TT_ArrayInitializerLSquare;
260       updateParameterCount(Left, CurrentToken);
261       if (!consumeToken())
262         return false;
263     }
264     return false;
265   }
266 
267   bool parseBrace() {
268     if (CurrentToken != NULL) {
269       FormatToken *Left = CurrentToken->Previous;
270       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
271       Contexts.back().ColonIsDictLiteral = true;
272 
273       while (CurrentToken != NULL) {
274         if (CurrentToken->is(tok::r_brace)) {
275           Left->MatchingParen = CurrentToken;
276           CurrentToken->MatchingParen = Left;
277           next();
278           return true;
279         }
280         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
281           return false;
282         updateParameterCount(Left, CurrentToken);
283         if (CurrentToken->is(tok::colon) &&
284             Style.Language != FormatStyle::LK_Proto)
285           Left->Type = TT_DictLiteral;
286         if (!consumeToken())
287           return false;
288       }
289     }
290     // No closing "}" found, this probably starts a definition.
291     Line.StartsDefinition = true;
292     return true;
293   }
294 
295   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
296     if (Current->is(tok::comma)) {
297       ++Left->ParameterCount;
298       if (!Left->Role)
299         Left->Role.reset(new CommaSeparatedList(Style));
300       Left->Role->CommaFound(Current);
301     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
302       Left->ParameterCount = 1;
303     }
304   }
305 
306   bool parseConditional() {
307     while (CurrentToken != NULL) {
308       if (CurrentToken->is(tok::colon)) {
309         CurrentToken->Type = TT_ConditionalExpr;
310         next();
311         return true;
312       }
313       if (!consumeToken())
314         return false;
315     }
316     return false;
317   }
318 
319   bool parseTemplateDeclaration() {
320     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
321       CurrentToken->Type = TT_TemplateOpener;
322       next();
323       if (!parseAngle())
324         return false;
325       if (CurrentToken != NULL)
326         CurrentToken->Previous->ClosesTemplateDeclaration = true;
327       return true;
328     }
329     return false;
330   }
331 
332   bool consumeToken() {
333     FormatToken *Tok = CurrentToken;
334     next();
335     switch (Tok->Tok.getKind()) {
336     case tok::plus:
337     case tok::minus:
338       if (Tok->Previous == NULL && Line.MustBeDeclaration)
339         Tok->Type = TT_ObjCMethodSpecifier;
340       break;
341     case tok::colon:
342       if (Tok->Previous == NULL)
343         return false;
344       // Colons from ?: are handled in parseConditional().
345       if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) {
346         Tok->Type = TT_CtorInitializerColon;
347       } else if (Contexts.back().ColonIsDictLiteral) {
348         Tok->Type = TT_DictLiteral;
349       } else if (Contexts.back().ColonIsObjCMethodExpr ||
350                  Line.First->Type == TT_ObjCMethodSpecifier) {
351         Tok->Type = TT_ObjCMethodExpr;
352         Tok->Previous->Type = TT_ObjCSelectorName;
353         if (Tok->Previous->ColumnWidth >
354             Contexts.back().LongestObjCSelectorName) {
355           Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
356         }
357         if (Contexts.back().FirstObjCSelectorName == NULL)
358           Contexts.back().FirstObjCSelectorName = Tok->Previous;
359       } else if (Contexts.back().ColonIsForRangeExpr) {
360         Tok->Type = TT_RangeBasedForLoopColon;
361       } else if (CurrentToken != NULL &&
362                  CurrentToken->is(tok::numeric_constant)) {
363         Tok->Type = TT_BitFieldColon;
364       } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) {
365         Tok->Type = TT_InheritanceColon;
366       } else if (Contexts.back().ContextKind == tok::l_paren) {
367         Tok->Type = TT_InlineASMColon;
368       }
369       break;
370     case tok::kw_if:
371     case tok::kw_while:
372       if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
373         next();
374         if (!parseParens(/*LookForDecls=*/true))
375           return false;
376       }
377       break;
378     case tok::kw_for:
379       Contexts.back().ColonIsForRangeExpr = true;
380       next();
381       if (!parseParens())
382         return false;
383       break;
384     case tok::l_paren:
385       if (!parseParens())
386         return false;
387       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
388           !Contexts.back().IsExpression && Line.First->Type != TT_ObjCProperty)
389         Line.MightBeFunctionDecl = true;
390       break;
391     case tok::l_square:
392       if (!parseSquare())
393         return false;
394       break;
395     case tok::l_brace:
396       if (!parseBrace())
397         return false;
398       break;
399     case tok::less:
400       if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle())
401         Tok->Type = TT_TemplateOpener;
402       else {
403         Tok->Type = TT_BinaryOperator;
404         CurrentToken = Tok;
405         next();
406       }
407       break;
408     case tok::r_paren:
409     case tok::r_square:
410       return false;
411     case tok::r_brace:
412       // Lines can start with '}'.
413       if (Tok->Previous != NULL)
414         return false;
415       break;
416     case tok::greater:
417       Tok->Type = TT_BinaryOperator;
418       break;
419     case tok::kw_operator:
420       while (CurrentToken &&
421              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
422         if (CurrentToken->isOneOf(tok::star, tok::amp))
423           CurrentToken->Type = TT_PointerOrReference;
424         consumeToken();
425         if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
426           CurrentToken->Previous->Type = TT_OverloadedOperator;
427       }
428       if (CurrentToken) {
429         CurrentToken->Type = TT_OverloadedOperatorLParen;
430         if (CurrentToken->Previous->Type == TT_BinaryOperator)
431           CurrentToken->Previous->Type = TT_OverloadedOperator;
432       }
433       break;
434     case tok::question:
435       parseConditional();
436       break;
437     case tok::kw_template:
438       parseTemplateDeclaration();
439       break;
440     case tok::identifier:
441       if (Line.First->is(tok::kw_for) &&
442           Tok->Tok.getIdentifierInfo() == &Ident_in)
443         Tok->Type = TT_ObjCForIn;
444       break;
445     case tok::comma:
446       if (Contexts.back().FirstStartOfName)
447         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
448       if (Contexts.back().InCtorInitializer)
449         Tok->Type = TT_CtorInitializerComma;
450       break;
451     default:
452       break;
453     }
454     return true;
455   }
456 
457   void parseIncludeDirective() {
458     next();
459     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
460       next();
461       while (CurrentToken != NULL) {
462         if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
463           CurrentToken->Type = TT_ImplicitStringLiteral;
464         next();
465       }
466     } else {
467       while (CurrentToken != NULL) {
468         if (CurrentToken->is(tok::string_literal))
469           // Mark these string literals as "implicit" literals, too, so that
470           // they are not split or line-wrapped.
471           CurrentToken->Type = TT_ImplicitStringLiteral;
472         next();
473       }
474     }
475   }
476 
477   void parseWarningOrError() {
478     next();
479     // We still want to format the whitespace left of the first token of the
480     // warning or error.
481     next();
482     while (CurrentToken != NULL) {
483       CurrentToken->Type = TT_ImplicitStringLiteral;
484       next();
485     }
486   }
487 
488   void parsePragma() {
489     next(); // Consume "pragma".
490     if (CurrentToken && CurrentToken->TokenText == "mark") {
491       next(); // Consume "mark".
492       next(); // Consume first token (so we fix leading whitespace).
493       while (CurrentToken != NULL) {
494         CurrentToken->Type = TT_ImplicitStringLiteral;
495         next();
496       }
497     }
498   }
499 
500   void parsePreprocessorDirective() {
501     next();
502     if (CurrentToken == NULL)
503       return;
504     if (CurrentToken->Tok.is(tok::numeric_constant)) {
505       CurrentToken->SpacesRequiredBefore = 1;
506       return;
507     }
508     // Hashes in the middle of a line can lead to any strange token
509     // sequence.
510     if (CurrentToken->Tok.getIdentifierInfo() == NULL)
511       return;
512     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
513     case tok::pp_include:
514     case tok::pp_import:
515       parseIncludeDirective();
516       break;
517     case tok::pp_error:
518     case tok::pp_warning:
519       parseWarningOrError();
520       break;
521     case tok::pp_pragma:
522       parsePragma();
523       break;
524     case tok::pp_if:
525     case tok::pp_elif:
526       Contexts.back().IsExpression = true;
527       parseLine();
528       break;
529     default:
530       break;
531     }
532     while (CurrentToken != NULL)
533       next();
534   }
535 
536 public:
537   LineType parseLine() {
538     if (CurrentToken->is(tok::hash)) {
539       parsePreprocessorDirective();
540       return LT_PreprocessorDirective;
541     }
542 
543     // Directly allow to 'import <string-literal>' to support protocol buffer
544     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
545     // should not break the line).
546     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
547     if (Info && Info->getPPKeywordID() == tok::pp_import &&
548         CurrentToken->Next && CurrentToken->Next->is(tok::string_literal))
549       parseIncludeDirective();
550 
551     while (CurrentToken != NULL) {
552       if (CurrentToken->is(tok::kw_virtual))
553         KeywordVirtualFound = true;
554       if (!consumeToken())
555         return LT_Invalid;
556     }
557     if (KeywordVirtualFound)
558       return LT_VirtualFunctionDecl;
559 
560     if (Line.First->Type == TT_ObjCMethodSpecifier) {
561       if (Contexts.back().FirstObjCSelectorName != NULL)
562         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
563             Contexts.back().LongestObjCSelectorName;
564       return LT_ObjCMethodDecl;
565     }
566 
567     return LT_Other;
568   }
569 
570 private:
571   void next() {
572     if (CurrentToken != NULL) {
573       determineTokenType(*CurrentToken);
574       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
575       CurrentToken->NestingLevel = Contexts.size() - 1;
576     }
577 
578     if (CurrentToken != NULL)
579       CurrentToken = CurrentToken->Next;
580 
581     if (CurrentToken != NULL) {
582       // Reset token type in case we have already looked at it and then
583       // recovered from an error (e.g. failure to find the matching >).
584       if (CurrentToken->Type != TT_LambdaLSquare &&
585           CurrentToken->Type != TT_FunctionLBrace &&
586           CurrentToken->Type != TT_ImplicitStringLiteral)
587         CurrentToken->Type = TT_Unknown;
588       if (CurrentToken->Role)
589         CurrentToken->Role.reset(NULL);
590       CurrentToken->FakeLParens.clear();
591       CurrentToken->FakeRParens = 0;
592     }
593   }
594 
595   /// \brief A struct to hold information valid in a specific context, e.g.
596   /// a pair of parenthesis.
597   struct Context {
598     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
599             bool IsExpression)
600         : ContextKind(ContextKind), BindingStrength(BindingStrength),
601           LongestObjCSelectorName(0), NumBlockParameters(0),
602           ColonIsForRangeExpr(false), ColonIsDictLiteral(false),
603           ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
604           FirstStartOfName(NULL), IsExpression(IsExpression),
605           CanBeExpression(true), InCtorInitializer(false), CaretFound(false) {}
606 
607     tok::TokenKind ContextKind;
608     unsigned BindingStrength;
609     unsigned LongestObjCSelectorName;
610     unsigned NumBlockParameters;
611     bool ColonIsForRangeExpr;
612     bool ColonIsDictLiteral;
613     bool ColonIsObjCMethodExpr;
614     FormatToken *FirstObjCSelectorName;
615     FormatToken *FirstStartOfName;
616     bool IsExpression;
617     bool CanBeExpression;
618     bool InCtorInitializer;
619     bool CaretFound;
620   };
621 
622   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
623   /// of each instance.
624   struct ScopedContextCreator {
625     AnnotatingParser &P;
626 
627     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
628                          unsigned Increase)
629         : P(P) {
630       P.Contexts.push_back(Context(ContextKind,
631                                    P.Contexts.back().BindingStrength + Increase,
632                                    P.Contexts.back().IsExpression));
633     }
634 
635     ~ScopedContextCreator() { P.Contexts.pop_back(); }
636   };
637 
638   void determineTokenType(FormatToken &Current) {
639     if (Current.getPrecedence() == prec::Assignment &&
640         !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
641         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
642       Contexts.back().IsExpression = true;
643       for (FormatToken *Previous = Current.Previous;
644            Previous && !Previous->isOneOf(tok::comma, tok::semi);
645            Previous = Previous->Previous) {
646         if (Previous->is(tok::r_square))
647           Previous = Previous->MatchingParen;
648         if (Previous->Type == TT_BinaryOperator &&
649             Previous->isOneOf(tok::star, tok::amp)) {
650           Previous->Type = TT_PointerOrReference;
651         }
652       }
653     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
654       Contexts.back().IsExpression = true;
655     } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
656                !Line.InPPDirective) {
657       bool ParametersOfFunctionType =
658           Current.Previous && Current.Previous->is(tok::r_paren) &&
659           Current.Previous->MatchingParen &&
660           Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen;
661       bool IsForOrCatch = Current.Previous &&
662                           Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
663       Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
664     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
665       for (FormatToken *Previous = Current.Previous;
666            Previous && Previous->isOneOf(tok::star, tok::amp);
667            Previous = Previous->Previous)
668         Previous->Type = TT_PointerOrReference;
669     } else if (Current.Previous &&
670                Current.Previous->Type == TT_CtorInitializerColon) {
671       Contexts.back().IsExpression = true;
672       Contexts.back().InCtorInitializer = true;
673     } else if (Current.is(tok::kw_new)) {
674       Contexts.back().CanBeExpression = false;
675     } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
676       // This should be the condition or increment in a for-loop.
677       Contexts.back().IsExpression = true;
678     }
679 
680     if (Current.Type == TT_Unknown) {
681       // Line.MightBeFunctionDecl can only be true after the parentheses of a
682       // function declaration have been found. In this case, 'Current' is a
683       // trailing token of this declaration and thus cannot be a name.
684       if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
685         Contexts.back().FirstStartOfName = &Current;
686         Current.Type = TT_StartOfName;
687       } else if (Current.is(tok::kw_auto)) {
688         AutoFound = true;
689       } else if (Current.is(tok::arrow) && AutoFound &&
690                  Line.MustBeDeclaration) {
691         Current.Type = TT_TrailingReturnArrow;
692       } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
693         Current.Type =
694             determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
695                                                Contexts.back().IsExpression);
696       } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
697         Current.Type = determinePlusMinusCaretUsage(Current);
698         if (Current.Type == TT_UnaryOperator) {
699           ++Contexts.back().NumBlockParameters;
700           if (Current.is(tok::caret))
701             Contexts.back().CaretFound = true;
702         }
703       } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
704         Current.Type = determineIncrementUsage(Current);
705       } else if (Current.is(tok::exclaim)) {
706         Current.Type = TT_UnaryOperator;
707       } else if (Current.isBinaryOperator() &&
708                  (!Current.Previous ||
709                   Current.Previous->isNot(tok::l_square))) {
710         Current.Type = TT_BinaryOperator;
711       } else if (Current.is(tok::comment)) {
712         if (Current.TokenText.startswith("//"))
713           Current.Type = TT_LineComment;
714         else
715           Current.Type = TT_BlockComment;
716       } else if (Current.is(tok::r_paren)) {
717         FormatToken *LeftOfParens = NULL;
718         if (Current.MatchingParen)
719           LeftOfParens = Current.MatchingParen->getPreviousNonComment();
720         bool IsCast = false;
721         bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
722         bool ParensAreType = !Current.Previous ||
723                              Current.Previous->Type == TT_PointerOrReference ||
724                              Current.Previous->Type == TT_TemplateCloser ||
725                              Current.Previous->isSimpleTypeSpecifier();
726         bool ParensCouldEndDecl =
727             Current.Next &&
728             Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
729         bool IsSizeOfOrAlignOf =
730             LeftOfParens &&
731             LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
732         if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
733             (Contexts.back().IsExpression ||
734              (Current.Next && Current.Next->isBinaryOperator())))
735           IsCast = true;
736         if (Current.Next && Current.Next->isNot(tok::string_literal) &&
737             (Current.Next->Tok.isLiteral() ||
738              Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
739           IsCast = true;
740         // If there is an identifier after the (), it is likely a cast, unless
741         // there is also an identifier before the ().
742         if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
743                              LeftOfParens->is(tok::kw_return)) &&
744             LeftOfParens->Type != TT_OverloadedOperator &&
745             LeftOfParens->isNot(tok::at) &&
746             LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
747             Current.Next->is(tok::identifier))
748           IsCast = true;
749         if (IsCast && !ParensAreEmpty)
750           Current.Type = TT_CastRParen;
751       } else if (Current.is(tok::at) && Current.Next) {
752         switch (Current.Next->Tok.getObjCKeywordID()) {
753         case tok::objc_interface:
754         case tok::objc_implementation:
755         case tok::objc_protocol:
756           Current.Type = TT_ObjCDecl;
757           break;
758         case tok::objc_property:
759           Current.Type = TT_ObjCProperty;
760           break;
761         default:
762           break;
763         }
764       } else if (Current.is(tok::period)) {
765         FormatToken *PreviousNoComment = Current.getPreviousNonComment();
766         if (PreviousNoComment &&
767             PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
768           Current.Type = TT_DesignatedInitializerPeriod;
769       } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
770                  Line.MightBeFunctionDecl && Contexts.size() == 1) {
771         // Line.MightBeFunctionDecl can only be true after the parentheses of a
772         // function declaration have been found.
773         Current.Type = TT_TrailingAnnotation;
774       }
775     }
776   }
777 
778   /// \brief Take a guess at whether \p Tok starts a name of a function or
779   /// variable declaration.
780   ///
781   /// This is a heuristic based on whether \p Tok is an identifier following
782   /// something that is likely a type.
783   bool isStartOfName(const FormatToken &Tok) {
784     if (Tok.isNot(tok::identifier) || Tok.Previous == NULL)
785       return false;
786 
787     // Skip "const" as it does not have an influence on whether this is a name.
788     FormatToken *PreviousNotConst = Tok.Previous;
789     while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const))
790       PreviousNotConst = PreviousNotConst->Previous;
791 
792     if (PreviousNotConst == NULL)
793       return false;
794 
795     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
796                        PreviousNotConst->Previous &&
797                        PreviousNotConst->Previous->is(tok::hash);
798 
799     if (PreviousNotConst->Type == TT_TemplateCloser)
800       return PreviousNotConst && PreviousNotConst->MatchingParen &&
801              PreviousNotConst->MatchingParen->Previous &&
802              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
803 
804     return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
805            PreviousNotConst->Type == TT_PointerOrReference ||
806            PreviousNotConst->isSimpleTypeSpecifier();
807   }
808 
809   /// \brief Return the type of the given token assuming it is * or &.
810   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression) {
811     const FormatToken *PrevToken = Tok.getPreviousNonComment();
812     if (PrevToken == NULL)
813       return TT_UnaryOperator;
814 
815     const FormatToken *NextToken = Tok.getNextNonComment();
816     if (NextToken == NULL)
817       return TT_Unknown;
818 
819     if (PrevToken->is(tok::coloncolon) ||
820         (PrevToken->is(tok::l_paren) && !IsExpression))
821       return TT_PointerOrReference;
822 
823     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
824                            tok::comma, tok::semi, tok::kw_return, tok::colon,
825                            tok::equal, tok::kw_delete, tok::kw_sizeof) ||
826         PrevToken->Type == TT_BinaryOperator ||
827         PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
828       return TT_UnaryOperator;
829 
830     if (NextToken->is(tok::l_square))
831       return TT_PointerOrReference;
832 
833     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
834         PrevToken->MatchingParen->Previous &&
835         PrevToken->MatchingParen->Previous->is(tok::kw_typeof))
836       return TT_PointerOrReference;
837 
838     if (PrevToken->Tok.isLiteral() ||
839         PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
840         NextToken->Tok.isLiteral() || NextToken->isUnaryOperator())
841       return TT_BinaryOperator;
842 
843     // It is very unlikely that we are going to find a pointer or reference type
844     // definition on the RHS of an assignment.
845     if (IsExpression)
846       return TT_BinaryOperator;
847 
848     return TT_PointerOrReference;
849   }
850 
851   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
852     const FormatToken *PrevToken = Tok.getPreviousNonComment();
853     if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
854       return TT_UnaryOperator;
855 
856     // Use heuristics to recognize unary operators.
857     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
858                            tok::question, tok::colon, tok::kw_return,
859                            tok::kw_case, tok::at, tok::l_brace))
860       return TT_UnaryOperator;
861 
862     // There can't be two consecutive binary operators.
863     if (PrevToken->Type == TT_BinaryOperator)
864       return TT_UnaryOperator;
865 
866     // Fall back to marking the token as binary operator.
867     return TT_BinaryOperator;
868   }
869 
870   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
871   TokenType determineIncrementUsage(const FormatToken &Tok) {
872     const FormatToken *PrevToken = Tok.getPreviousNonComment();
873     if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
874       return TT_UnaryOperator;
875     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
876       return TT_TrailingUnaryOperator;
877 
878     return TT_UnaryOperator;
879   }
880 
881 
882   SmallVector<Context, 8> Contexts;
883 
884   const FormatStyle &Style;
885   AnnotatedLine &Line;
886   FormatToken *CurrentToken;
887   bool KeywordVirtualFound;
888   bool AutoFound;
889   IdentifierInfo &Ident_in;
890 };
891 
892 static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
893 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
894 
895 /// \brief Parses binary expressions by inserting fake parenthesis based on
896 /// operator precedence.
897 class ExpressionParser {
898 public:
899   ExpressionParser(AnnotatedLine &Line) : Current(Line.First) {
900     // Skip leading "}", e.g. in "} else if (...) {".
901     if (Current->is(tok::r_brace))
902       next();
903   }
904 
905   /// \brief Parse expressions with the given operatore precedence.
906   void parse(int Precedence = 0) {
907     // Skip 'return' and ObjC selector colons as they are not part of a binary
908     // expression.
909     while (Current &&
910            (Current->is(tok::kw_return) ||
911             (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr)))
912       next();
913 
914     if (Current == NULL || Precedence > PrecedenceArrowAndPeriod)
915       return;
916 
917     // Conditional expressions need to be parsed separately for proper nesting.
918     if (Precedence == prec::Conditional) {
919       parseConditionalExpr();
920       return;
921     }
922 
923     // Parse unary operators, which all have a higher precedence than binary
924     // operators.
925     if (Precedence == PrecedenceUnaryOperator) {
926       parseUnaryOperator();
927       return;
928     }
929 
930     FormatToken *Start = Current;
931     FormatToken *LatestOperator = NULL;
932 
933     while (Current) {
934       // Consume operators with higher precedence.
935       parse(Precedence + 1);
936 
937       int CurrentPrecedence = getCurrentPrecedence();
938 
939       if (Current && Current->Type == TT_ObjCSelectorName &&
940           Precedence == CurrentPrecedence) {
941         if (LatestOperator)
942           addFakeParenthesis(Start, prec::Level(Precedence));
943         Start = Current;
944       }
945 
946       // At the end of the line or when an operator with higher precedence is
947       // found, insert fake parenthesis and return.
948       if (Current == NULL || Current->closesScope() ||
949           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) {
950         if (LatestOperator) {
951           if (Precedence == PrecedenceArrowAndPeriod) {
952             LatestOperator->LastInChainOfCalls = true;
953             // Call expressions don't have a binary operator precedence.
954             addFakeParenthesis(Start, prec::Unknown);
955           } else {
956             addFakeParenthesis(Start, prec::Level(Precedence));
957           }
958         }
959         return;
960       }
961 
962       // Consume scopes: (), [], <> and {}
963       if (Current->opensScope()) {
964         while (Current && !Current->closesScope()) {
965           next();
966           parse();
967         }
968         next();
969       } else {
970         // Operator found.
971         if (CurrentPrecedence == Precedence)
972           LatestOperator = Current;
973 
974         next();
975       }
976     }
977   }
978 
979 private:
980   /// \brief Gets the precedence (+1) of the given token for binary operators
981   /// and other tokens that we treat like binary operators.
982   int getCurrentPrecedence() {
983     if (Current) {
984       if (Current->Type == TT_ConditionalExpr)
985         return prec::Conditional;
986       else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
987                Current->Type == TT_ObjCSelectorName)
988         return 0;
989       else if (Current->Type == TT_RangeBasedForLoopColon)
990         return prec::Comma;
991       else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
992         return Current->getPrecedence();
993       else if (Current->isOneOf(tok::period, tok::arrow))
994         return PrecedenceArrowAndPeriod;
995     }
996     return -1;
997   }
998 
999   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1000     Start->FakeLParens.push_back(Precedence);
1001     if (Precedence > prec::Unknown)
1002       Start->StartsBinaryExpression = true;
1003     if (Current) {
1004       ++Current->Previous->FakeRParens;
1005       if (Precedence > prec::Unknown)
1006         Current->Previous->EndsBinaryExpression = true;
1007     }
1008   }
1009 
1010   /// \brief Parse unary operator expressions and surround them with fake
1011   /// parentheses if appropriate.
1012   void parseUnaryOperator() {
1013     if (Current == NULL || Current->Type != TT_UnaryOperator) {
1014       parse(PrecedenceArrowAndPeriod);
1015       return;
1016     }
1017 
1018     FormatToken *Start = Current;
1019     next();
1020     parseUnaryOperator();
1021 
1022     // The actual precedence doesn't matter.
1023     addFakeParenthesis(Start, prec::Unknown);
1024   }
1025 
1026   void parseConditionalExpr() {
1027     FormatToken *Start = Current;
1028     parse(prec::LogicalOr);
1029     if (!Current || !Current->is(tok::question))
1030       return;
1031     next();
1032     parse(prec::LogicalOr);
1033     if (!Current || Current->Type != TT_ConditionalExpr)
1034       return;
1035     next();
1036     parseConditionalExpr();
1037     addFakeParenthesis(Start, prec::Conditional);
1038   }
1039 
1040   void next() {
1041     if (Current)
1042       Current = Current->Next;
1043     while (Current && Current->isTrailingComment())
1044       Current = Current->Next;
1045   }
1046 
1047   FormatToken *Current;
1048 };
1049 
1050 } // end anonymous namespace
1051 
1052 void
1053 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
1054   const AnnotatedLine *NextNonCommentLine = NULL;
1055   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1056                                                           E = Lines.rend();
1057        I != E; ++I) {
1058     if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1059         (*I)->First->Next == NULL)
1060       (*I)->Level = NextNonCommentLine->Level;
1061     else
1062       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL;
1063 
1064     setCommentLineLevels((*I)->Children);
1065   }
1066 }
1067 
1068 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1069   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1070                                                   E = Line.Children.end();
1071        I != E; ++I) {
1072     annotate(**I);
1073   }
1074   AnnotatingParser Parser(Style, Line, Ident_in);
1075   Line.Type = Parser.parseLine();
1076   if (Line.Type == LT_Invalid)
1077     return;
1078 
1079   ExpressionParser ExprParser(Line);
1080   ExprParser.parse();
1081 
1082   if (Line.First->Type == TT_ObjCMethodSpecifier)
1083     Line.Type = LT_ObjCMethodDecl;
1084   else if (Line.First->Type == TT_ObjCDecl)
1085     Line.Type = LT_ObjCDecl;
1086   else if (Line.First->Type == TT_ObjCProperty)
1087     Line.Type = LT_ObjCProperty;
1088 
1089   Line.First->SpacesRequiredBefore = 1;
1090   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1091 }
1092 
1093 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1094   Line.First->TotalLength =
1095       Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1096   if (!Line.First->Next)
1097     return;
1098   FormatToken *Current = Line.First->Next;
1099   bool InFunctionDecl = Line.MightBeFunctionDecl;
1100   while (Current != NULL) {
1101     if (Current->Type == TT_LineComment) {
1102       if (Current->Previous->BlockKind == BK_BracedInit)
1103         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1104       else
1105         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1106     } else if (Current->SpacesRequiredBefore == 0 &&
1107              spaceRequiredBefore(Line, *Current)) {
1108       Current->SpacesRequiredBefore = 1;
1109     }
1110 
1111     Current->MustBreakBefore =
1112         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1113 
1114     Current->CanBreakBefore =
1115         Current->MustBreakBefore || canBreakBefore(Line, *Current);
1116     if (Current->MustBreakBefore || !Current->Children.empty() ||
1117         Current->IsMultiline)
1118       Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
1119     else
1120       Current->TotalLength = Current->Previous->TotalLength +
1121                              Current->ColumnWidth +
1122                              Current->SpacesRequiredBefore;
1123 
1124     if (Current->Type == TT_CtorInitializerColon)
1125       InFunctionDecl = false;
1126 
1127     // FIXME: Only calculate this if CanBreakBefore is true once static
1128     // initializers etc. are sorted out.
1129     // FIXME: Move magic numbers to a better place.
1130     Current->SplitPenalty = 20 * Current->BindingStrength +
1131                             splitPenalty(Line, *Current, InFunctionDecl);
1132 
1133     Current = Current->Next;
1134   }
1135 
1136   calculateUnbreakableTailLengths(Line);
1137   for (Current = Line.First; Current != NULL; Current = Current->Next) {
1138     if (Current->Role)
1139       Current->Role->precomputeFormattingInfos(Current);
1140   }
1141 
1142   DEBUG({ printDebugInfo(Line); });
1143 
1144   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1145                                                   E = Line.Children.end();
1146        I != E; ++I) {
1147     calculateFormattingInformation(**I);
1148   }
1149 }
1150 
1151 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1152   unsigned UnbreakableTailLength = 0;
1153   FormatToken *Current = Line.Last;
1154   while (Current != NULL) {
1155     Current->UnbreakableTailLength = UnbreakableTailLength;
1156     if (Current->CanBreakBefore ||
1157         Current->isOneOf(tok::comment, tok::string_literal)) {
1158       UnbreakableTailLength = 0;
1159     } else {
1160       UnbreakableTailLength +=
1161           Current->ColumnWidth + Current->SpacesRequiredBefore;
1162     }
1163     Current = Current->Previous;
1164   }
1165 }
1166 
1167 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1168                                       const FormatToken &Tok,
1169                                       bool InFunctionDecl) {
1170   const FormatToken &Left = *Tok.Previous;
1171   const FormatToken &Right = Tok;
1172 
1173   if (Left.is(tok::semi))
1174     return 0;
1175   if (Left.is(tok::comma))
1176     return 1;
1177   if (Right.is(tok::l_square)) {
1178     if (Style.Language == FormatStyle::LK_Proto)
1179       return 1;
1180     if (Right.Type != TT_ObjCMethodExpr)
1181       return 250;
1182   }
1183   if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) {
1184     if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1185       return 3;
1186     if (Left.Type == TT_StartOfName)
1187       return 20;
1188     if (InFunctionDecl && Right.NestingLevel == 0)
1189       return Style.PenaltyReturnTypeOnItsOwnLine;
1190     return 200;
1191   }
1192   if (Left.is(tok::equal) && Right.is(tok::l_brace))
1193     return 150;
1194   if (Left.Type == TT_CastRParen)
1195     return 100;
1196   if (Left.is(tok::coloncolon) ||
1197       (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
1198     return 500;
1199   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1200     return 5000;
1201 
1202   if (Left.Type == TT_RangeBasedForLoopColon ||
1203       Left.Type == TT_InheritanceColon)
1204     return 2;
1205 
1206   if (Right.isMemberAccess()) {
1207     if (Left.is(tok::r_paren) && Left.MatchingParen &&
1208         Left.MatchingParen->ParameterCount > 0)
1209       return 20; // Should be smaller than breaking at a nested comma.
1210     return 150;
1211   }
1212 
1213   if (Right.Type == TT_TrailingAnnotation && Right.Next &&
1214       Right.Next->isNot(tok::l_paren)) {
1215     // Breaking before a trailing annotation is bad unless it is function-like.
1216     // Use a slightly higher penalty after ")" so that annotations like
1217     // "const override" are kept together.
1218     return Left.is(tok::r_paren) ? 100 : 120;
1219   }
1220 
1221   // In for-loops, prefer breaking at ',' and ';'.
1222   if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
1223     return 4;
1224 
1225   // In Objective-C method expressions, prefer breaking before "param:" over
1226   // breaking after it.
1227   if (Right.Type == TT_ObjCSelectorName)
1228     return 0;
1229   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1230     return Line.MightBeFunctionDecl ? 50 : 500;
1231 
1232   if (Left.is(tok::l_paren) && InFunctionDecl)
1233     return 100;
1234   if (Left.is(tok::equal) && InFunctionDecl)
1235     return 110;
1236   if (Left.opensScope())
1237     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1238                                    : 19;
1239 
1240   if (Right.is(tok::lessless)) {
1241     if (Left.is(tok::string_literal)) {
1242       StringRef Content = Left.TokenText;
1243       if (Content.startswith("\""))
1244         Content = Content.drop_front(1);
1245       if (Content.endswith("\""))
1246         Content = Content.drop_back(1);
1247       Content = Content.trim();
1248       if (Content.size() > 1 &&
1249           (Content.back() == ':' || Content.back() == '='))
1250         return 25;
1251     }
1252     return 1; // Breaking at a << is really cheap.
1253   }
1254   if (Left.Type == TT_ConditionalExpr)
1255     return prec::Conditional;
1256   prec::Level Level = Left.getPrecedence();
1257 
1258   if (Level != prec::Unknown)
1259     return Level;
1260 
1261   return 3;
1262 }
1263 
1264 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1265                                           const FormatToken &Left,
1266                                           const FormatToken &Right) {
1267   if (Style.Language == FormatStyle::LK_Proto) {
1268     if (Right.is(tok::l_paren) &&
1269         (Left.TokenText == "returns" || Left.TokenText == "option"))
1270       return true;
1271   }
1272   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
1273       Left.Tok.getObjCKeywordID() == tok::objc_property)
1274     return true;
1275   if (Right.is(tok::hashhash))
1276     return Left.is(tok::hash);
1277   if (Left.isOneOf(tok::hashhash, tok::hash))
1278     return Right.is(tok::hash);
1279   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1280     return Style.SpaceInEmptyParentheses;
1281   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1282     return (Right.Type == TT_CastRParen ||
1283             (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
1284                ? Style.SpacesInCStyleCastParentheses
1285                : Style.SpacesInParentheses;
1286   if (Style.SpacesInAngles &&
1287       ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)))
1288     return true;
1289   if (Right.isOneOf(tok::semi, tok::comma))
1290     return false;
1291   if (Right.is(tok::less) &&
1292       (Left.is(tok::kw_template) ||
1293        (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1294     return true;
1295   if (Left.is(tok::arrow) || Right.is(tok::arrow))
1296     return false;
1297   if (Left.isOneOf(tok::exclaim, tok::tilde))
1298     return false;
1299   if (Left.is(tok::at) &&
1300       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1301                     tok::numeric_constant, tok::l_paren, tok::l_brace,
1302                     tok::kw_true, tok::kw_false))
1303     return false;
1304   if (Left.is(tok::coloncolon))
1305     return false;
1306   if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
1307     return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) ||
1308            !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren,
1309                          tok::r_paren, tok::less);
1310   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1311     return false;
1312   if (Right.is(tok::ellipsis))
1313     return Left.Tok.isLiteral();
1314   if (Left.is(tok::l_square) && Right.is(tok::amp))
1315     return false;
1316   if (Right.Type == TT_PointerOrReference)
1317     return Left.Tok.isLiteral() ||
1318            ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1319             !Style.PointerBindsToType);
1320   if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
1321       (Left.Type != TT_PointerOrReference || Style.PointerBindsToType))
1322     return true;
1323   if (Left.Type == TT_PointerOrReference)
1324     return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
1325            ((Right.Type != TT_PointerOrReference) &&
1326             Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
1327             Left.Previous &&
1328             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1329   if (Right.is(tok::star) && Left.is(tok::l_paren))
1330     return false;
1331   if (Left.is(tok::l_square))
1332     return Left.Type == TT_ArrayInitializerLSquare &&
1333            Style.SpacesInContainerLiterals && Right.isNot(tok::r_square);
1334   if (Right.is(tok::r_square))
1335     return Right.MatchingParen && Style.SpacesInContainerLiterals &&
1336            Right.MatchingParen->Type == TT_ArrayInitializerLSquare;
1337   if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
1338       Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant))
1339     return false;
1340   if (Left.is(tok::colon))
1341     return Left.Type != TT_ObjCMethodExpr;
1342   if (Right.is(tok::l_paren)) {
1343     if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen)
1344       return true;
1345     return Line.Type == LT_ObjCDecl ||
1346            Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete,
1347                         tok::semi) ||
1348            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
1349             Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1350                          tok::kw_catch)) ||
1351            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
1352             Left.isOneOf(tok::identifier, tok::kw___attribute) &&
1353             Line.Type != LT_PreprocessorDirective);
1354   }
1355   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1356     return false;
1357   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1358     return !Left.Children.empty(); // No spaces in "{}".
1359   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
1360       (Right.is(tok::r_brace) && Right.MatchingParen &&
1361        Right.MatchingParen->BlockKind != BK_Block))
1362     return !Style.Cpp11BracedListStyle;
1363   if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/"))
1364     return false;
1365   if (Right.Type == TT_UnaryOperator)
1366     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1367            (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
1368   if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) &&
1369       Right.is(tok::l_brace) && Right.getNextNonComment() &&
1370       Right.BlockKind != BK_Block)
1371     return false;
1372   if (Left.is(tok::period) || Right.is(tok::period))
1373     return false;
1374   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1375     return false;
1376   return true;
1377 }
1378 
1379 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1380                                          const FormatToken &Tok) {
1381   if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo())
1382     return true; // Never ever merge two identifiers.
1383   if (Tok.Previous->Type == TT_ImplicitStringLiteral)
1384     return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd();
1385   if (Line.Type == LT_ObjCMethodDecl) {
1386     if (Tok.Previous->Type == TT_ObjCMethodSpecifier)
1387       return true;
1388     if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier))
1389       // Don't space between ')' and <id>
1390       return false;
1391   }
1392   if (Line.Type == LT_ObjCProperty &&
1393       (Tok.is(tok::equal) || Tok.Previous->is(tok::equal)))
1394     return false;
1395 
1396   if (Tok.Type == TT_TrailingReturnArrow ||
1397       Tok.Previous->Type == TT_TrailingReturnArrow)
1398     return true;
1399   if (Tok.Previous->is(tok::comma))
1400     return true;
1401   if (Tok.is(tok::comma))
1402     return false;
1403   if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1404     return true;
1405   if (Tok.Previous->Tok.is(tok::kw_operator))
1406     return Tok.is(tok::coloncolon);
1407   if (Tok.Type == TT_OverloadedOperatorLParen)
1408     return false;
1409   if (Tok.is(tok::colon))
1410     return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
1411            Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr &&
1412            !Tok.Previous->is(tok::question) &&
1413            (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals);
1414   if (Tok.Previous->Type == TT_UnaryOperator ||
1415       Tok.Previous->Type == TT_CastRParen)
1416     return Tok.Type == TT_BinaryOperator;
1417   if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) {
1418     return Tok.Type == TT_TemplateCloser &&
1419            Tok.Previous->Type == TT_TemplateCloser &&
1420            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
1421   }
1422   if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1423       Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar))
1424     return false;
1425   if (!Style.SpaceBeforeAssignmentOperators &&
1426       Tok.getPrecedence() == prec::Assignment)
1427     return false;
1428   if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) ||
1429       Tok.Previous->Type == TT_BinaryOperator)
1430     return true;
1431   if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1432     return false;
1433   if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) &&
1434       Line.First->is(tok::hash))
1435     return true;
1436   if (Tok.Type == TT_TrailingUnaryOperator)
1437     return false;
1438   return spaceRequiredBetween(Line, *Tok.Previous, Tok);
1439 }
1440 
1441 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
1442                                      const FormatToken &Right) {
1443   if (Right.is(tok::comment)) {
1444     return Right.Previous->BlockKind != BK_BracedInit &&
1445            Right.Previous->Type != TT_CtorInitializerColon &&
1446            Right.NewlinesBefore > 0;
1447   } else if (Right.Previous->isTrailingComment() ||
1448              (Right.isStringLiteral() && Right.Previous->isStringLiteral())) {
1449     return true;
1450   } else if (Right.Previous->IsUnterminatedLiteral) {
1451     return true;
1452   } else if (Right.is(tok::lessless) && Right.Next &&
1453              Right.Previous->is(tok::string_literal) &&
1454              Right.Next->is(tok::string_literal)) {
1455     return true;
1456   } else if (Right.Previous->ClosesTemplateDeclaration &&
1457              Right.Previous->MatchingParen &&
1458              Right.Previous->MatchingParen->NestingLevel == 0 &&
1459              Style.AlwaysBreakTemplateDeclarations) {
1460     return true;
1461   } else if ((Right.Type == TT_CtorInitializerComma ||
1462               Right.Type == TT_CtorInitializerColon) &&
1463              Style.BreakConstructorInitializersBeforeComma &&
1464              !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
1465     return true;
1466   } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) {
1467     return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1468            Style.BreakBeforeBraces == FormatStyle::BS_GNU;
1469   } else if (Right.is(tok::string_literal) &&
1470              Right.TokenText.startswith("R\"")) {
1471     // Raw string literals are special wrt. line breaks. The author has made a
1472     // deliberate choice and might have aligned the contents of the string
1473     // literal accordingly. Thus, we try keep existing line breaks.
1474     return Right.NewlinesBefore > 0;
1475   } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
1476              Style.Language == FormatStyle::LK_Proto) {
1477     // Don't enums onto single lines in protocol buffers.
1478     return true;
1479   }
1480   return false;
1481 }
1482 
1483 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1484                                     const FormatToken &Right) {
1485   const FormatToken &Left = *Right.Previous;
1486   if (Left.is(tok::at))
1487     return false;
1488   if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator))
1489     return true;
1490   if (Right.isTrailingComment())
1491     // We rely on MustBreakBefore being set correctly here as we should not
1492     // change the "binding" behavior of a comment.
1493     // The first comment in a braced lists is always interpreted as belonging to
1494     // the first list element. Otherwise, it should be placed outside of the
1495     // list.
1496     return Left.BlockKind == BK_BracedInit;
1497   if (Left.is(tok::question) && Right.is(tok::colon))
1498     return false;
1499   if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1500     return Style.BreakBeforeTernaryOperators;
1501   if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
1502     return !Style.BreakBeforeTernaryOperators;
1503   if (Right.is(tok::colon) &&
1504       (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr))
1505     return false;
1506   if (Left.is(tok::colon) &&
1507       (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
1508     return true;
1509   if (Right.Type == TT_ObjCSelectorName)
1510     return true;
1511   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
1512     return true;
1513   if (Left.ClosesTemplateDeclaration)
1514     return true;
1515   if (Right.Type == TT_RangeBasedForLoopColon ||
1516       Right.Type == TT_OverloadedOperatorLParen ||
1517       Right.Type == TT_OverloadedOperator)
1518     return false;
1519   if (Left.Type == TT_RangeBasedForLoopColon)
1520     return true;
1521   if (Right.Type == TT_RangeBasedForLoopColon)
1522     return false;
1523   if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1524       Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
1525     return false;
1526   if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1527     return false;
1528   if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen)
1529     return false;
1530   if (Left.is(tok::l_paren) && Left.Previous &&
1531       (Left.Previous->Type == TT_BinaryOperator ||
1532        Left.Previous->Type == TT_CastRParen))
1533     return false;
1534   if (Right.Type == TT_ImplicitStringLiteral)
1535     return false;
1536 
1537   if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
1538     return false;
1539 
1540   // We only break before r_brace if there was a corresponding break before
1541   // the l_brace, which is tracked by BreakBeforeClosingBrace.
1542   if (Right.is(tok::r_brace))
1543     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
1544 
1545   // Allow breaking after a trailing 'const', e.g. after a method declaration,
1546   // unless it is follow by ';', '{' or '='.
1547   if (Left.is(tok::kw_const) && Left.Previous != NULL &&
1548       Left.Previous->is(tok::r_paren))
1549     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1550 
1551   if (Right.is(tok::kw___attribute))
1552     return true;
1553 
1554   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1555     return true;
1556 
1557   if (Left.Type == TT_CtorInitializerComma &&
1558       Style.BreakConstructorInitializersBeforeComma)
1559     return false;
1560   if (Right.Type == TT_CtorInitializerComma &&
1561       Style.BreakConstructorInitializersBeforeComma)
1562     return true;
1563   if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators)
1564     return true;
1565   if (Left.is(tok::greater) && Right.is(tok::greater) &&
1566       Left.Type != TT_TemplateCloser)
1567     return false;
1568   if (Left.Type == TT_ArrayInitializerLSquare)
1569     return true;
1570   return (Left.isBinaryOperator() && Left.isNot(tok::lessless) &&
1571           !Style.BreakBeforeBinaryOperators) ||
1572          Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1573                       tok::kw_class, tok::kw_struct) ||
1574          Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon,
1575                        tok::l_square, tok::at) ||
1576          (Left.is(tok::r_paren) &&
1577           Right.isOneOf(tok::identifier, tok::kw_const)) ||
1578          (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1579 }
1580 
1581 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1582   llvm::errs() << "AnnotatedTokens:\n";
1583   const FormatToken *Tok = Line.First;
1584   while (Tok) {
1585     llvm::errs() << " M=" << Tok->MustBreakBefore
1586                  << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1587                  << " S=" << Tok->SpacesRequiredBefore
1588                  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
1589                  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
1590                  << " FakeLParens=";
1591     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1592       llvm::errs() << Tok->FakeLParens[i] << "/";
1593     llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1594     if (Tok->Next == NULL)
1595       assert(Tok == Line.Last);
1596     Tok = Tok->Next;
1597   }
1598   llvm::errs() << "----\n";
1599 }
1600 
1601 } // namespace format
1602 } // namespace clang
1603