1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/Support/Debug.h"
19 
20 namespace clang {
21 namespace format {
22 
23 namespace {
24 
25 /// \brief A parser that gathers additional information about tokens.
26 ///
27 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
28 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
29 /// into template parameter lists.
30 class AnnotatingParser {
31 public:
32   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
33                    IdentifierInfo &Ident_in)
34       : Style(Style), Line(Line), CurrentToken(Line.First),
35         KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) {
36     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
37   }
38 
39 private:
40   bool parseAngle() {
41     if (CurrentToken == NULL)
42       return false;
43     ScopedContextCreator ContextCreator(*this, tok::less, 10);
44     FormatToken *Left = CurrentToken->Previous;
45     Contexts.back().IsExpression = false;
46     while (CurrentToken != NULL) {
47       if (CurrentToken->is(tok::greater)) {
48         Left->MatchingParen = CurrentToken;
49         CurrentToken->MatchingParen = Left;
50         CurrentToken->Type = TT_TemplateCloser;
51         next();
52         return true;
53       }
54       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
55                                 tok::question, tok::colon))
56         return false;
57       // If a && or || is found and interpreted as a binary operator, this set
58       // of angles is likely part of something like "a < b && c > d". If the
59       // angles are inside an expression, the ||/&& might also be a binary
60       // operator that was misinterpreted because we are parsing template
61       // parameters.
62       // FIXME: This is getting out of hand, write a decent parser.
63       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
64           (CurrentToken->Previous->Type == TT_BinaryOperator ||
65            Contexts[Contexts.size() - 2].IsExpression) &&
66           Line.First->isNot(tok::kw_template))
67         return false;
68       updateParameterCount(Left, CurrentToken);
69       if (!consumeToken())
70         return false;
71     }
72     return false;
73   }
74 
75   bool parseParens(bool LookForDecls = false) {
76     if (CurrentToken == NULL)
77       return false;
78     bool AfterCaret = Contexts.back().CaretFound;
79     Contexts.back().CaretFound = false;
80 
81     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
82 
83     // FIXME: This is a bit of a hack. Do better.
84     Contexts.back().ColonIsForRangeExpr =
85         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
86 
87     bool StartsObjCMethodExpr = false;
88     FormatToken *Left = CurrentToken->Previous;
89     if (CurrentToken->is(tok::caret)) {
90       // (^ can start a block type.
91       Left->Type = TT_ObjCBlockLParen;
92     } else if (FormatToken *MaybeSel = Left->Previous) {
93       // @selector( starts a selector.
94       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
95           MaybeSel->Previous->is(tok::at)) {
96         StartsObjCMethodExpr = true;
97       }
98     }
99 
100     if (Left->Previous && Left->Previous->isOneOf(tok::kw_static_assert,
101                                                   tok::kw_if, tok::kw_while)) {
102       // static_assert, if and while usually contain expressions.
103       Contexts.back().IsExpression = true;
104     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
105                Left->Previous->MatchingParen &&
106                Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
107       // This is a parameter list of a lambda expression.
108       Contexts.back().IsExpression = false;
109     } else if (AfterCaret) {
110       // This is the parameter list of an ObjC block.
111       Contexts.back().IsExpression = false;
112     }
113 
114     if (StartsObjCMethodExpr) {
115       Contexts.back().ColonIsObjCMethodExpr = true;
116       Left->Type = TT_ObjCMethodExpr;
117     }
118 
119     bool MightBeFunctionType = CurrentToken->is(tok::star);
120     bool HasMultipleLines = false;
121     bool HasMultipleParametersOnALine = false;
122     while (CurrentToken != NULL) {
123       // LookForDecls is set when "if (" has been seen. Check for
124       // 'identifier' '*' 'identifier' followed by not '=' -- this
125       // '*' has to be a binary operator but determineStarAmpUsage() will
126       // categorize it as an unary operator, so set the right type here.
127       if (LookForDecls && CurrentToken->Next) {
128         FormatToken *Prev = CurrentToken->getPreviousNonComment();
129         if (Prev) {
130           FormatToken *PrevPrev = Prev->getPreviousNonComment();
131           FormatToken *Next = CurrentToken->Next;
132           if (PrevPrev && PrevPrev->is(tok::identifier) &&
133               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
134               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
135             Prev->Type = TT_BinaryOperator;
136             LookForDecls = false;
137           }
138         }
139       }
140 
141       if (CurrentToken->Previous->Type == TT_PointerOrReference &&
142           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
143                                                     tok::coloncolon))
144         MightBeFunctionType = true;
145       if (CurrentToken->is(tok::r_paren)) {
146         if (MightBeFunctionType && CurrentToken->Next &&
147             (CurrentToken->Next->is(tok::l_paren) ||
148              (CurrentToken->Next->is(tok::l_square) &&
149               !Contexts.back().IsExpression)))
150           Left->Type = TT_FunctionTypeLParen;
151         Left->MatchingParen = CurrentToken;
152         CurrentToken->MatchingParen = Left;
153 
154         if (StartsObjCMethodExpr) {
155           CurrentToken->Type = TT_ObjCMethodExpr;
156           if (Contexts.back().FirstObjCSelectorName != NULL) {
157             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
158                 Contexts.back().LongestObjCSelectorName;
159           }
160         }
161 
162         if (!HasMultipleLines)
163           Left->PackingKind = PPK_Inconclusive;
164         else if (HasMultipleParametersOnALine)
165           Left->PackingKind = PPK_BinPacked;
166         else
167           Left->PackingKind = PPK_OnePerLine;
168 
169         next();
170         return true;
171       }
172       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
173         return false;
174       else if (CurrentToken->is(tok::l_brace))
175         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
176       updateParameterCount(Left, CurrentToken);
177       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
178           !CurrentToken->Next->HasUnescapedNewline &&
179           !CurrentToken->Next->isTrailingComment())
180         HasMultipleParametersOnALine = true;
181       if (!consumeToken())
182         return false;
183       if (CurrentToken && CurrentToken->HasUnescapedNewline)
184         HasMultipleLines = true;
185     }
186     return false;
187   }
188 
189   bool parseSquare() {
190     if (!CurrentToken)
191       return false;
192 
193     // A '[' could be an index subscript (after an identifier or after
194     // ')' or ']'), it could be the start of an Objective-C method
195     // expression, or it could the the start of an Objective-C array literal.
196     FormatToken *Left = CurrentToken->Previous;
197     FormatToken *Parent = Left->getPreviousNonComment();
198     bool StartsObjCMethodExpr =
199         Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
200         (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
201                                     tok::kw_return, tok::kw_throw) ||
202          Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
203          Parent->Type == TT_CastRParen ||
204          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
205     ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
206     Contexts.back().IsExpression = true;
207     bool ColonFound = false;
208 
209     if (StartsObjCMethodExpr) {
210       Contexts.back().ColonIsObjCMethodExpr = true;
211       Left->Type = TT_ObjCMethodExpr;
212     } else if (Parent && Parent->is(tok::at)) {
213       Left->Type = TT_ArrayInitializerLSquare;
214     } else if (Left->Type == TT_Unknown) {
215       Left->Type = TT_ArraySubscriptLSquare;
216     }
217 
218     while (CurrentToken != NULL) {
219       if (CurrentToken->is(tok::r_square)) {
220         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
221             Left->Type == TT_ObjCMethodExpr) {
222           // An ObjC method call is rarely followed by an open parenthesis.
223           // FIXME: Do we incorrectly label ":" with this?
224           StartsObjCMethodExpr = false;
225           Left->Type = TT_Unknown;
226         }
227         if (StartsObjCMethodExpr) {
228           CurrentToken->Type = TT_ObjCMethodExpr;
229           // determineStarAmpUsage() thinks that '*' '[' is allocating an
230           // array of pointers, but if '[' starts a selector then '*' is a
231           // binary operator.
232           if (Parent != NULL && Parent->Type == TT_PointerOrReference)
233             Parent->Type = TT_BinaryOperator;
234         }
235         Left->MatchingParen = CurrentToken;
236         CurrentToken->MatchingParen = Left;
237         if (Contexts.back().FirstObjCSelectorName != NULL) {
238           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
239               Contexts.back().LongestObjCSelectorName;
240           if (Contexts.back().NumBlockParameters > 1)
241             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
242         }
243         next();
244         return true;
245       }
246       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
247         return false;
248       if (CurrentToken->is(tok::colon))
249         ColonFound = true;
250       if (CurrentToken->is(tok::comma) &&
251           (Left->Type == TT_ArraySubscriptLSquare ||
252            (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
253         Left->Type = TT_ArrayInitializerLSquare;
254       updateParameterCount(Left, CurrentToken);
255       if (!consumeToken())
256         return false;
257     }
258     return false;
259   }
260 
261   bool parseBrace() {
262     if (CurrentToken != NULL) {
263       FormatToken *Left = CurrentToken->Previous;
264       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
265       Contexts.back().ColonIsDictLiteral = true;
266 
267       while (CurrentToken != NULL) {
268         if (CurrentToken->is(tok::r_brace)) {
269           Left->MatchingParen = CurrentToken;
270           CurrentToken->MatchingParen = Left;
271           next();
272           return true;
273         }
274         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
275           return false;
276         updateParameterCount(Left, CurrentToken);
277         if (CurrentToken->is(tok::colon))
278           Left->Type = TT_DictLiteral;
279         if (!consumeToken())
280           return false;
281       }
282     }
283     // No closing "}" found, this probably starts a definition.
284     Line.StartsDefinition = true;
285     return true;
286   }
287 
288   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
289     if (Current->is(tok::comma)) {
290       ++Left->ParameterCount;
291       if (!Left->Role)
292         Left->Role.reset(new CommaSeparatedList(Style));
293       Left->Role->CommaFound(Current);
294     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
295       Left->ParameterCount = 1;
296     }
297   }
298 
299   bool parseConditional() {
300     while (CurrentToken != NULL) {
301       if (CurrentToken->is(tok::colon)) {
302         CurrentToken->Type = TT_ConditionalExpr;
303         next();
304         return true;
305       }
306       if (!consumeToken())
307         return false;
308     }
309     return false;
310   }
311 
312   bool parseTemplateDeclaration() {
313     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
314       CurrentToken->Type = TT_TemplateOpener;
315       next();
316       if (!parseAngle())
317         return false;
318       if (CurrentToken != NULL)
319         CurrentToken->Previous->ClosesTemplateDeclaration = true;
320       return true;
321     }
322     return false;
323   }
324 
325   bool consumeToken() {
326     FormatToken *Tok = CurrentToken;
327     next();
328     switch (Tok->Tok.getKind()) {
329     case tok::plus:
330     case tok::minus:
331       if (Tok->Previous == NULL && Line.MustBeDeclaration)
332         Tok->Type = TT_ObjCMethodSpecifier;
333       break;
334     case tok::colon:
335       if (Tok->Previous == NULL)
336         return false;
337       // Colons from ?: are handled in parseConditional().
338       if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) {
339         Tok->Type = TT_CtorInitializerColon;
340       } else if (Contexts.back().ColonIsDictLiteral) {
341         Tok->Type = TT_DictLiteral;
342       } else if (Contexts.back().ColonIsObjCMethodExpr ||
343                  Line.First->Type == TT_ObjCMethodSpecifier) {
344         Tok->Type = TT_ObjCMethodExpr;
345         Tok->Previous->Type = TT_ObjCSelectorName;
346         if (Tok->Previous->ColumnWidth >
347             Contexts.back().LongestObjCSelectorName) {
348           Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
349         }
350         if (Contexts.back().FirstObjCSelectorName == NULL)
351           Contexts.back().FirstObjCSelectorName = Tok->Previous;
352       } else if (Contexts.back().ColonIsForRangeExpr) {
353         Tok->Type = TT_RangeBasedForLoopColon;
354       } else if (CurrentToken != NULL &&
355                  CurrentToken->is(tok::numeric_constant)) {
356         Tok->Type = TT_BitFieldColon;
357       } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) {
358         Tok->Type = TT_InheritanceColon;
359       } else if (Contexts.back().ContextKind == tok::l_paren) {
360         Tok->Type = TT_InlineASMColon;
361       }
362       break;
363     case tok::kw_if:
364     case tok::kw_while:
365       if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
366         next();
367         if (!parseParens(/*LookForDecls=*/true))
368           return false;
369       }
370       break;
371     case tok::kw_for:
372       Contexts.back().ColonIsForRangeExpr = true;
373       next();
374       if (!parseParens())
375         return false;
376       break;
377     case tok::l_paren:
378       if (!parseParens())
379         return false;
380       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
381           !Contexts.back().IsExpression && Line.First->Type != TT_ObjCProperty)
382         Line.MightBeFunctionDecl = true;
383       break;
384     case tok::l_square:
385       if (!parseSquare())
386         return false;
387       break;
388     case tok::l_brace:
389       if (!parseBrace())
390         return false;
391       break;
392     case tok::less:
393       if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle())
394         Tok->Type = TT_TemplateOpener;
395       else {
396         Tok->Type = TT_BinaryOperator;
397         CurrentToken = Tok;
398         next();
399       }
400       break;
401     case tok::r_paren:
402     case tok::r_square:
403       return false;
404     case tok::r_brace:
405       // Lines can start with '}'.
406       if (Tok->Previous != NULL)
407         return false;
408       break;
409     case tok::greater:
410       Tok->Type = TT_BinaryOperator;
411       break;
412     case tok::kw_operator:
413       while (CurrentToken &&
414              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
415         if (CurrentToken->isOneOf(tok::star, tok::amp))
416           CurrentToken->Type = TT_PointerOrReference;
417         consumeToken();
418         if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
419           CurrentToken->Previous->Type = TT_OverloadedOperator;
420       }
421       if (CurrentToken) {
422         CurrentToken->Type = TT_OverloadedOperatorLParen;
423         if (CurrentToken->Previous->Type == TT_BinaryOperator)
424           CurrentToken->Previous->Type = TT_OverloadedOperator;
425       }
426       break;
427     case tok::question:
428       parseConditional();
429       break;
430     case tok::kw_template:
431       parseTemplateDeclaration();
432       break;
433     case tok::identifier:
434       if (Line.First->is(tok::kw_for) &&
435           Tok->Tok.getIdentifierInfo() == &Ident_in)
436         Tok->Type = TT_ObjCForIn;
437       break;
438     case tok::comma:
439       if (Contexts.back().FirstStartOfName)
440         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
441       if (Contexts.back().InCtorInitializer)
442         Tok->Type = TT_CtorInitializerComma;
443       break;
444     default:
445       break;
446     }
447     return true;
448   }
449 
450   void parseIncludeDirective() {
451     next();
452     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
453       next();
454       while (CurrentToken != NULL) {
455         if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
456           CurrentToken->Type = TT_ImplicitStringLiteral;
457         next();
458       }
459     } else {
460       while (CurrentToken != NULL) {
461         if (CurrentToken->is(tok::string_literal))
462           // Mark these string literals as "implicit" literals, too, so that
463           // they are not split or line-wrapped.
464           CurrentToken->Type = TT_ImplicitStringLiteral;
465         next();
466       }
467     }
468   }
469 
470   void parseWarningOrError() {
471     next();
472     // We still want to format the whitespace left of the first token of the
473     // warning or error.
474     next();
475     while (CurrentToken != NULL) {
476       CurrentToken->Type = TT_ImplicitStringLiteral;
477       next();
478     }
479   }
480 
481   void parsePragma() {
482     next(); // Consume "pragma".
483     if (CurrentToken && CurrentToken->TokenText == "mark") {
484       next(); // Consume "mark".
485       next(); // Consume first token (so we fix leading whitespace).
486       while (CurrentToken != NULL) {
487         CurrentToken->Type = TT_ImplicitStringLiteral;
488         next();
489       }
490     }
491   }
492 
493   void parsePreprocessorDirective() {
494     next();
495     if (CurrentToken == NULL)
496       return;
497     if (CurrentToken->Tok.is(tok::numeric_constant)) {
498       CurrentToken->SpacesRequiredBefore = 1;
499       return;
500     }
501     // Hashes in the middle of a line can lead to any strange token
502     // sequence.
503     if (CurrentToken->Tok.getIdentifierInfo() == NULL)
504       return;
505     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
506     case tok::pp_include:
507     case tok::pp_import:
508       parseIncludeDirective();
509       break;
510     case tok::pp_error:
511     case tok::pp_warning:
512       parseWarningOrError();
513       break;
514     case tok::pp_pragma:
515       parsePragma();
516       break;
517     case tok::pp_if:
518     case tok::pp_elif:
519       parseLine();
520       break;
521     default:
522       break;
523     }
524     while (CurrentToken != NULL)
525       next();
526   }
527 
528 public:
529   LineType parseLine() {
530     if (CurrentToken->is(tok::hash)) {
531       parsePreprocessorDirective();
532       return LT_PreprocessorDirective;
533     }
534 
535     // Directly allow to 'import <string-literal>' to support protocol buffer
536     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
537     // should not break the line).
538     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
539     if (Info && Info->getPPKeywordID() == tok::pp_import &&
540         CurrentToken->Next && CurrentToken->Next->is(tok::string_literal))
541       parseIncludeDirective();
542 
543     while (CurrentToken != NULL) {
544       if (CurrentToken->is(tok::kw_virtual))
545         KeywordVirtualFound = true;
546       if (!consumeToken())
547         return LT_Invalid;
548     }
549     if (KeywordVirtualFound)
550       return LT_VirtualFunctionDecl;
551 
552     if (Line.First->Type == TT_ObjCMethodSpecifier) {
553       if (Contexts.back().FirstObjCSelectorName != NULL)
554         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
555             Contexts.back().LongestObjCSelectorName;
556       return LT_ObjCMethodDecl;
557     }
558 
559     return LT_Other;
560   }
561 
562 private:
563   void next() {
564     if (CurrentToken != NULL) {
565       determineTokenType(*CurrentToken);
566       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
567       CurrentToken->NestingLevel = Contexts.size() - 1;
568     }
569 
570     if (CurrentToken != NULL)
571       CurrentToken = CurrentToken->Next;
572 
573     if (CurrentToken != NULL) {
574       // Reset token type in case we have already looked at it and then
575       // recovered from an error (e.g. failure to find the matching >).
576       if (CurrentToken->Type != TT_LambdaLSquare &&
577           CurrentToken->Type != TT_FunctionLBrace &&
578           CurrentToken->Type != TT_ImplicitStringLiteral)
579         CurrentToken->Type = TT_Unknown;
580       if (CurrentToken->Role)
581         CurrentToken->Role.reset(NULL);
582       CurrentToken->FakeLParens.clear();
583       CurrentToken->FakeRParens = 0;
584     }
585   }
586 
587   /// \brief A struct to hold information valid in a specific context, e.g.
588   /// a pair of parenthesis.
589   struct Context {
590     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
591             bool IsExpression)
592         : ContextKind(ContextKind), BindingStrength(BindingStrength),
593           LongestObjCSelectorName(0), NumBlockParameters(0),
594           ColonIsForRangeExpr(false), ColonIsDictLiteral(false),
595           ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
596           FirstStartOfName(NULL), IsExpression(IsExpression),
597           CanBeExpression(true), InCtorInitializer(false), CaretFound(false) {}
598 
599     tok::TokenKind ContextKind;
600     unsigned BindingStrength;
601     unsigned LongestObjCSelectorName;
602     unsigned NumBlockParameters;
603     bool ColonIsForRangeExpr;
604     bool ColonIsDictLiteral;
605     bool ColonIsObjCMethodExpr;
606     FormatToken *FirstObjCSelectorName;
607     FormatToken *FirstStartOfName;
608     bool IsExpression;
609     bool CanBeExpression;
610     bool InCtorInitializer;
611     bool CaretFound;
612   };
613 
614   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
615   /// of each instance.
616   struct ScopedContextCreator {
617     AnnotatingParser &P;
618 
619     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
620                          unsigned Increase)
621         : P(P) {
622       P.Contexts.push_back(Context(ContextKind,
623                                    P.Contexts.back().BindingStrength + Increase,
624                                    P.Contexts.back().IsExpression));
625     }
626 
627     ~ScopedContextCreator() { P.Contexts.pop_back(); }
628   };
629 
630   void determineTokenType(FormatToken &Current) {
631     if (Current.getPrecedence() == prec::Assignment &&
632         !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
633         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
634       Contexts.back().IsExpression = true;
635       for (FormatToken *Previous = Current.Previous;
636            Previous && !Previous->isOneOf(tok::comma, tok::semi);
637            Previous = Previous->Previous) {
638         if (Previous->is(tok::r_square))
639           Previous = Previous->MatchingParen;
640         if (Previous->Type == TT_BinaryOperator &&
641             Previous->isOneOf(tok::star, tok::amp)) {
642           Previous->Type = TT_PointerOrReference;
643         }
644       }
645     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
646       Contexts.back().IsExpression = true;
647     } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
648                !Line.InPPDirective) {
649       bool ParametersOfFunctionType =
650           Current.Previous && Current.Previous->is(tok::r_paren) &&
651           Current.Previous->MatchingParen &&
652           Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen;
653       bool IsForOrCatch = Current.Previous &&
654                           Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
655       Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
656     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
657       for (FormatToken *Previous = Current.Previous;
658            Previous && Previous->isOneOf(tok::star, tok::amp);
659            Previous = Previous->Previous)
660         Previous->Type = TT_PointerOrReference;
661     } else if (Current.Previous &&
662                Current.Previous->Type == TT_CtorInitializerColon) {
663       Contexts.back().IsExpression = true;
664       Contexts.back().InCtorInitializer = true;
665     } else if (Current.is(tok::kw_new)) {
666       Contexts.back().CanBeExpression = false;
667     } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
668       // This should be the condition or increment in a for-loop.
669       Contexts.back().IsExpression = true;
670     }
671 
672     if (Current.Type == TT_Unknown) {
673       // Line.MightBeFunctionDecl can only be true after the parentheses of a
674       // function declaration have been found. In this case, 'Current' is a
675       // trailing token of this declaration and thus cannot be a name.
676       if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
677         Contexts.back().FirstStartOfName = &Current;
678         Current.Type = TT_StartOfName;
679       } else if (Current.is(tok::kw_auto)) {
680         AutoFound = true;
681       } else if (Current.is(tok::arrow) && AutoFound &&
682                  Line.MustBeDeclaration) {
683         Current.Type = TT_TrailingReturnArrow;
684       } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
685         Current.Type =
686             determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
687                                                Contexts.back().IsExpression);
688       } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
689         Current.Type = determinePlusMinusCaretUsage(Current);
690         if (Current.Type == TT_UnaryOperator) {
691           ++Contexts.back().NumBlockParameters;
692           if (Current.is(tok::caret))
693             Contexts.back().CaretFound = true;
694         }
695       } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
696         Current.Type = determineIncrementUsage(Current);
697       } else if (Current.is(tok::exclaim)) {
698         Current.Type = TT_UnaryOperator;
699       } else if (Current.isBinaryOperator() &&
700                  (!Current.Previous ||
701                   Current.Previous->isNot(tok::l_square))) {
702         Current.Type = TT_BinaryOperator;
703       } else if (Current.is(tok::comment)) {
704         if (Current.TokenText.startswith("//"))
705           Current.Type = TT_LineComment;
706         else
707           Current.Type = TT_BlockComment;
708       } else if (Current.is(tok::r_paren)) {
709         FormatToken *LeftOfParens = NULL;
710         if (Current.MatchingParen)
711           LeftOfParens = Current.MatchingParen->getPreviousNonComment();
712         bool IsCast = false;
713         bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
714         bool ParensAreType = !Current.Previous ||
715                              Current.Previous->Type == TT_PointerOrReference ||
716                              Current.Previous->Type == TT_TemplateCloser ||
717                              Current.Previous->isSimpleTypeSpecifier();
718         bool ParensCouldEndDecl =
719             Current.Next &&
720             Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
721         bool IsSizeOfOrAlignOf =
722             LeftOfParens &&
723             LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
724         if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
725             (Contexts.back().IsExpression ||
726              (Current.Next && Current.Next->isBinaryOperator())))
727           IsCast = true;
728         if (Current.Next && Current.Next->isNot(tok::string_literal) &&
729             (Current.Next->Tok.isLiteral() ||
730              Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
731           IsCast = true;
732         // If there is an identifier after the (), it is likely a cast, unless
733         // there is also an identifier before the ().
734         if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
735                              LeftOfParens->is(tok::kw_return)) &&
736             LeftOfParens->Type != TT_OverloadedOperator &&
737             LeftOfParens->isNot(tok::at) &&
738             LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
739             Current.Next->is(tok::identifier))
740           IsCast = true;
741         if (IsCast && !ParensAreEmpty)
742           Current.Type = TT_CastRParen;
743       } else if (Current.is(tok::at) && Current.Next) {
744         switch (Current.Next->Tok.getObjCKeywordID()) {
745         case tok::objc_interface:
746         case tok::objc_implementation:
747         case tok::objc_protocol:
748           Current.Type = TT_ObjCDecl;
749           break;
750         case tok::objc_property:
751           Current.Type = TT_ObjCProperty;
752           break;
753         default:
754           break;
755         }
756       } else if (Current.is(tok::period)) {
757         FormatToken *PreviousNoComment = Current.getPreviousNonComment();
758         if (PreviousNoComment &&
759             PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
760           Current.Type = TT_DesignatedInitializerPeriod;
761       } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
762                  Line.MightBeFunctionDecl && Contexts.size() == 1) {
763         // Line.MightBeFunctionDecl can only be true after the parentheses of a
764         // function declaration have been found.
765         Current.Type = TT_TrailingAnnotation;
766       }
767     }
768   }
769 
770   /// \brief Take a guess at whether \p Tok starts a name of a function or
771   /// variable declaration.
772   ///
773   /// This is a heuristic based on whether \p Tok is an identifier following
774   /// something that is likely a type.
775   bool isStartOfName(const FormatToken &Tok) {
776     if (Tok.isNot(tok::identifier) || Tok.Previous == NULL)
777       return false;
778 
779     // Skip "const" as it does not have an influence on whether this is a name.
780     FormatToken *PreviousNotConst = Tok.Previous;
781     while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const))
782       PreviousNotConst = PreviousNotConst->Previous;
783 
784     if (PreviousNotConst == NULL)
785       return false;
786 
787     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
788                        PreviousNotConst->Previous &&
789                        PreviousNotConst->Previous->is(tok::hash);
790 
791     if (PreviousNotConst->Type == TT_TemplateCloser)
792       return PreviousNotConst && PreviousNotConst->MatchingParen &&
793              PreviousNotConst->MatchingParen->Previous &&
794              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
795 
796     return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
797            PreviousNotConst->Type == TT_PointerOrReference ||
798            PreviousNotConst->isSimpleTypeSpecifier();
799   }
800 
801   /// \brief Return the type of the given token assuming it is * or &.
802   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression) {
803     const FormatToken *PrevToken = Tok.getPreviousNonComment();
804     if (PrevToken == NULL)
805       return TT_UnaryOperator;
806 
807     const FormatToken *NextToken = Tok.getNextNonComment();
808     if (NextToken == NULL)
809       return TT_Unknown;
810 
811     if (PrevToken->is(tok::coloncolon) ||
812         (PrevToken->is(tok::l_paren) && !IsExpression))
813       return TT_PointerOrReference;
814 
815     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
816                            tok::comma, tok::semi, tok::kw_return, tok::colon,
817                            tok::equal, tok::kw_delete, tok::kw_sizeof) ||
818         PrevToken->Type == TT_BinaryOperator ||
819         PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
820       return TT_UnaryOperator;
821 
822     if (NextToken->is(tok::l_square))
823       return TT_PointerOrReference;
824 
825     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
826         PrevToken->MatchingParen->Previous &&
827         PrevToken->MatchingParen->Previous->is(tok::kw_typeof))
828       return TT_PointerOrReference;
829 
830     if (PrevToken->Tok.isLiteral() ||
831         PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
832         NextToken->Tok.isLiteral() || NextToken->isUnaryOperator())
833       return TT_BinaryOperator;
834 
835     // It is very unlikely that we are going to find a pointer or reference type
836     // definition on the RHS of an assignment.
837     if (IsExpression)
838       return TT_BinaryOperator;
839 
840     return TT_PointerOrReference;
841   }
842 
843   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
844     const FormatToken *PrevToken = Tok.getPreviousNonComment();
845     if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
846       return TT_UnaryOperator;
847 
848     // Use heuristics to recognize unary operators.
849     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
850                            tok::question, tok::colon, tok::kw_return,
851                            tok::kw_case, tok::at, tok::l_brace))
852       return TT_UnaryOperator;
853 
854     // There can't be two consecutive binary operators.
855     if (PrevToken->Type == TT_BinaryOperator)
856       return TT_UnaryOperator;
857 
858     // Fall back to marking the token as binary operator.
859     return TT_BinaryOperator;
860   }
861 
862   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
863   TokenType determineIncrementUsage(const FormatToken &Tok) {
864     const FormatToken *PrevToken = Tok.getPreviousNonComment();
865     if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
866       return TT_UnaryOperator;
867     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
868       return TT_TrailingUnaryOperator;
869 
870     return TT_UnaryOperator;
871   }
872 
873 
874   SmallVector<Context, 8> Contexts;
875 
876   const FormatStyle &Style;
877   AnnotatedLine &Line;
878   FormatToken *CurrentToken;
879   bool KeywordVirtualFound;
880   bool AutoFound;
881   IdentifierInfo &Ident_in;
882 };
883 
884 static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
885 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
886 
887 /// \brief Parses binary expressions by inserting fake parenthesis based on
888 /// operator precedence.
889 class ExpressionParser {
890 public:
891   ExpressionParser(AnnotatedLine &Line) : Current(Line.First) {
892     // Skip leading "}", e.g. in "} else if (...) {".
893     if (Current->is(tok::r_brace))
894       next();
895   }
896 
897   /// \brief Parse expressions with the given operatore precedence.
898   void parse(int Precedence = 0) {
899     // Skip 'return' and ObjC selector colons as they are not part of a binary
900     // expression.
901     while (Current &&
902            (Current->is(tok::kw_return) ||
903             (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr)))
904       next();
905 
906     if (Current == NULL || Precedence > PrecedenceArrowAndPeriod)
907       return;
908 
909     // Conditional expressions need to be parsed separately for proper nesting.
910     if (Precedence == prec::Conditional) {
911       parseConditionalExpr();
912       return;
913     }
914 
915     // Parse unary operators, which all have a higher precedence than binary
916     // operators.
917     if (Precedence == PrecedenceUnaryOperator) {
918       parseUnaryOperator();
919       return;
920     }
921 
922     FormatToken *Start = Current;
923     FormatToken *LatestOperator = NULL;
924 
925     while (Current) {
926       // Consume operators with higher precedence.
927       parse(Precedence + 1);
928 
929       int CurrentPrecedence = getCurrentPrecedence();
930 
931       if (Current && Current->Type == TT_ObjCSelectorName &&
932           Precedence == CurrentPrecedence) {
933         if (LatestOperator)
934           addFakeParenthesis(Start, prec::Level(Precedence));
935         Start = Current;
936       }
937 
938       // At the end of the line or when an operator with higher precedence is
939       // found, insert fake parenthesis and return.
940       if (Current == NULL || Current->closesScope() ||
941           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) {
942         if (LatestOperator) {
943           if (Precedence == PrecedenceArrowAndPeriod) {
944             LatestOperator->LastInChainOfCalls = true;
945             // Call expressions don't have a binary operator precedence.
946             addFakeParenthesis(Start, prec::Unknown);
947           } else {
948             addFakeParenthesis(Start, prec::Level(Precedence));
949           }
950         }
951         return;
952       }
953 
954       // Consume scopes: (), [], <> and {}
955       if (Current->opensScope()) {
956         while (Current && !Current->closesScope()) {
957           next();
958           parse();
959         }
960         next();
961       } else {
962         // Operator found.
963         if (CurrentPrecedence == Precedence)
964           LatestOperator = Current;
965 
966         next();
967       }
968     }
969   }
970 
971 private:
972   /// \brief Gets the precedence (+1) of the given token for binary operators
973   /// and other tokens that we treat like binary operators.
974   int getCurrentPrecedence() {
975     if (Current) {
976       if (Current->Type == TT_ConditionalExpr)
977         return prec::Conditional;
978       else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
979                Current->Type == TT_ObjCSelectorName)
980         return 0;
981       else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
982         return Current->getPrecedence();
983       else if (Current->isOneOf(tok::period, tok::arrow))
984         return PrecedenceArrowAndPeriod;
985     }
986     return -1;
987   }
988 
989   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
990     Start->FakeLParens.push_back(Precedence);
991     if (Precedence > prec::Unknown)
992       Start->StartsBinaryExpression = true;
993     if (Current) {
994       ++Current->Previous->FakeRParens;
995       if (Precedence > prec::Unknown)
996         Current->Previous->EndsBinaryExpression = true;
997     }
998   }
999 
1000   /// \brief Parse unary operator expressions and surround them with fake
1001   /// parentheses if appropriate.
1002   void parseUnaryOperator() {
1003     if (Current == NULL || Current->Type != TT_UnaryOperator) {
1004       parse(PrecedenceArrowAndPeriod);
1005       return;
1006     }
1007 
1008     FormatToken *Start = Current;
1009     next();
1010     parseUnaryOperator();
1011 
1012     // The actual precedence doesn't matter.
1013     addFakeParenthesis(Start, prec::Unknown);
1014   }
1015 
1016   void parseConditionalExpr() {
1017     FormatToken *Start = Current;
1018     parse(prec::LogicalOr);
1019     if (!Current || !Current->is(tok::question))
1020       return;
1021     next();
1022     parse(prec::LogicalOr);
1023     if (!Current || Current->Type != TT_ConditionalExpr)
1024       return;
1025     next();
1026     parseConditionalExpr();
1027     addFakeParenthesis(Start, prec::Conditional);
1028   }
1029 
1030   void next() {
1031     if (Current)
1032       Current = Current->Next;
1033     while (Current && Current->isTrailingComment())
1034       Current = Current->Next;
1035   }
1036 
1037   FormatToken *Current;
1038 };
1039 
1040 } // end anonymous namespace
1041 
1042 void
1043 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
1044   const AnnotatedLine *NextNonCommentLine = NULL;
1045   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1046                                                           E = Lines.rend();
1047        I != E; ++I) {
1048     if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1049         (*I)->First->Next == NULL)
1050       (*I)->Level = NextNonCommentLine->Level;
1051     else
1052       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL;
1053 
1054     setCommentLineLevels((*I)->Children);
1055   }
1056 }
1057 
1058 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1059   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1060                                                   E = Line.Children.end();
1061        I != E; ++I) {
1062     annotate(**I);
1063   }
1064   AnnotatingParser Parser(Style, Line, Ident_in);
1065   Line.Type = Parser.parseLine();
1066   if (Line.Type == LT_Invalid)
1067     return;
1068 
1069   ExpressionParser ExprParser(Line);
1070   ExprParser.parse();
1071 
1072   if (Line.First->Type == TT_ObjCMethodSpecifier)
1073     Line.Type = LT_ObjCMethodDecl;
1074   else if (Line.First->Type == TT_ObjCDecl)
1075     Line.Type = LT_ObjCDecl;
1076   else if (Line.First->Type == TT_ObjCProperty)
1077     Line.Type = LT_ObjCProperty;
1078 
1079   Line.First->SpacesRequiredBefore = 1;
1080   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1081 }
1082 
1083 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1084   Line.First->TotalLength =
1085       Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1086   if (!Line.First->Next)
1087     return;
1088   FormatToken *Current = Line.First->Next;
1089   bool InFunctionDecl = Line.MightBeFunctionDecl;
1090   while (Current != NULL) {
1091     if (Current->Type == TT_LineComment) {
1092       if (Current->Previous->BlockKind == BK_BracedInit)
1093         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
1094       else
1095         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1096     } else if (Current->SpacesRequiredBefore == 0 &&
1097              spaceRequiredBefore(Line, *Current)) {
1098       Current->SpacesRequiredBefore = 1;
1099     }
1100 
1101     Current->MustBreakBefore =
1102         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1103 
1104     Current->CanBreakBefore =
1105         Current->MustBreakBefore || canBreakBefore(Line, *Current);
1106     if (Current->MustBreakBefore || !Current->Children.empty() ||
1107         Current->IsMultiline)
1108       Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
1109     else
1110       Current->TotalLength = Current->Previous->TotalLength +
1111                              Current->ColumnWidth +
1112                              Current->SpacesRequiredBefore;
1113 
1114     if (Current->Type == TT_CtorInitializerColon)
1115       InFunctionDecl = false;
1116 
1117     // FIXME: Only calculate this if CanBreakBefore is true once static
1118     // initializers etc. are sorted out.
1119     // FIXME: Move magic numbers to a better place.
1120     Current->SplitPenalty = 20 * Current->BindingStrength +
1121                             splitPenalty(Line, *Current, InFunctionDecl);
1122 
1123     Current = Current->Next;
1124   }
1125 
1126   calculateUnbreakableTailLengths(Line);
1127   for (Current = Line.First; Current != NULL; Current = Current->Next) {
1128     if (Current->Role)
1129       Current->Role->precomputeFormattingInfos(Current);
1130   }
1131 
1132   DEBUG({ printDebugInfo(Line); });
1133 
1134   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1135                                                   E = Line.Children.end();
1136        I != E; ++I) {
1137     calculateFormattingInformation(**I);
1138   }
1139 }
1140 
1141 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1142   unsigned UnbreakableTailLength = 0;
1143   FormatToken *Current = Line.Last;
1144   while (Current != NULL) {
1145     Current->UnbreakableTailLength = UnbreakableTailLength;
1146     if (Current->CanBreakBefore ||
1147         Current->isOneOf(tok::comment, tok::string_literal)) {
1148       UnbreakableTailLength = 0;
1149     } else {
1150       UnbreakableTailLength +=
1151           Current->ColumnWidth + Current->SpacesRequiredBefore;
1152     }
1153     Current = Current->Previous;
1154   }
1155 }
1156 
1157 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1158                                       const FormatToken &Tok,
1159                                       bool InFunctionDecl) {
1160   const FormatToken &Left = *Tok.Previous;
1161   const FormatToken &Right = Tok;
1162 
1163   if (Left.is(tok::semi))
1164     return 0;
1165   if (Left.is(tok::comma))
1166     return 1;
1167   if (Right.is(tok::l_square)) {
1168     if (Style.Language == FormatStyle::LK_Proto)
1169       return 1;
1170     if (Right.Type != TT_ObjCMethodExpr)
1171       return 250;
1172   }
1173   if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) {
1174     if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1175       return 3;
1176     if (Left.Type == TT_StartOfName)
1177       return 20;
1178     if (InFunctionDecl && Right.NestingLevel == 0)
1179       return Style.PenaltyReturnTypeOnItsOwnLine;
1180     return 200;
1181   }
1182   if (Left.is(tok::equal) && Right.is(tok::l_brace))
1183     return 150;
1184   if (Left.Type == TT_CastRParen)
1185     return 100;
1186   if (Left.is(tok::coloncolon))
1187     return 500;
1188   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1189     return 5000;
1190 
1191   if (Left.Type == TT_RangeBasedForLoopColon ||
1192       Left.Type == TT_InheritanceColon)
1193     return 2;
1194 
1195   if (Right.isMemberAccess()) {
1196     if (Left.is(tok::r_paren) && Left.MatchingParen &&
1197         Left.MatchingParen->ParameterCount > 0)
1198       return 20; // Should be smaller than breaking at a nested comma.
1199     return 150;
1200   }
1201 
1202   if (Right.Type == TT_TrailingAnnotation && Right.Next &&
1203       Right.Next->isNot(tok::l_paren)) {
1204     // Breaking before a trailing annotation is bad unless it is function-like.
1205     // Use a slightly higher penalty after ")" so that annotations like
1206     // "const override" are kept together.
1207     return Left.is(tok::r_paren) ? 100 : 120;
1208   }
1209 
1210   // In for-loops, prefer breaking at ',' and ';'.
1211   if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
1212     return 4;
1213 
1214   // In Objective-C method expressions, prefer breaking before "param:" over
1215   // breaking after it.
1216   if (Right.Type == TT_ObjCSelectorName)
1217     return 0;
1218   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1219     return Line.MightBeFunctionDecl ? 50 : 500;
1220 
1221   if (Left.is(tok::l_paren) && InFunctionDecl)
1222     return 100;
1223   if (Left.is(tok::equal) && InFunctionDecl)
1224     return 110;
1225   if (Left.opensScope())
1226     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1227                                    : 19;
1228 
1229   if (Right.is(tok::lessless)) {
1230     if (Left.is(tok::string_literal)) {
1231       StringRef Content = Left.TokenText;
1232       if (Content.startswith("\""))
1233         Content = Content.drop_front(1);
1234       if (Content.endswith("\""))
1235         Content = Content.drop_back(1);
1236       Content = Content.trim();
1237       if (Content.size() > 1 &&
1238           (Content.back() == ':' || Content.back() == '='))
1239         return 25;
1240     }
1241     return 1; // Breaking at a << is really cheap.
1242   }
1243   if (Left.Type == TT_ConditionalExpr)
1244     return prec::Conditional;
1245   prec::Level Level = Left.getPrecedence();
1246 
1247   if (Level != prec::Unknown)
1248     return Level;
1249 
1250   return 3;
1251 }
1252 
1253 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1254                                           const FormatToken &Left,
1255                                           const FormatToken &Right) {
1256   if (Style.Language == FormatStyle::LK_Proto) {
1257     if (Right.is(tok::l_paren) && Left.TokenText == "returns")
1258       return true;
1259   }
1260   if (Right.is(tok::hashhash))
1261     return Left.is(tok::hash);
1262   if (Left.isOneOf(tok::hashhash, tok::hash))
1263     return Right.is(tok::hash);
1264   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1265     return Style.SpaceInEmptyParentheses;
1266   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1267     return (Right.Type == TT_CastRParen ||
1268             (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
1269                ? Style.SpacesInCStyleCastParentheses
1270                : Style.SpacesInParentheses;
1271   if (Style.SpacesInAngles &&
1272       ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)))
1273     return true;
1274   if (Right.isOneOf(tok::semi, tok::comma))
1275     return false;
1276   if (Right.is(tok::less) &&
1277       (Left.is(tok::kw_template) ||
1278        (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1279     return true;
1280   if (Left.is(tok::arrow) || Right.is(tok::arrow))
1281     return false;
1282   if (Left.isOneOf(tok::exclaim, tok::tilde))
1283     return false;
1284   if (Left.is(tok::at) &&
1285       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1286                     tok::numeric_constant, tok::l_paren, tok::l_brace,
1287                     tok::kw_true, tok::kw_false))
1288     return false;
1289   if (Left.is(tok::coloncolon))
1290     return false;
1291   if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
1292     return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) ||
1293            !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren,
1294                          tok::r_paren, tok::less);
1295   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1296     return false;
1297   if (Right.is(tok::ellipsis))
1298     return Left.Tok.isLiteral();
1299   if (Left.is(tok::l_square) && Right.is(tok::amp))
1300     return false;
1301   if (Right.Type == TT_PointerOrReference)
1302     return Left.Tok.isLiteral() ||
1303            ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1304             !Style.PointerBindsToType);
1305   if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
1306       (Left.Type != TT_PointerOrReference || Style.PointerBindsToType))
1307     return true;
1308   if (Left.Type == TT_PointerOrReference)
1309     return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
1310            ((Right.Type != TT_PointerOrReference) &&
1311             Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
1312             Left.Previous &&
1313             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1314   if (Right.is(tok::star) && Left.is(tok::l_paren))
1315     return false;
1316   if (Left.is(tok::l_square))
1317     return Left.Type == TT_ArrayInitializerLSquare &&
1318            Style.SpacesInContainerLiterals && Right.isNot(tok::r_square);
1319   if (Right.is(tok::r_square))
1320     return Right.MatchingParen && Style.SpacesInContainerLiterals &&
1321            Right.MatchingParen->Type == TT_ArrayInitializerLSquare;
1322   if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
1323       Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant))
1324     return false;
1325   if (Left.is(tok::colon))
1326     return Left.Type != TT_ObjCMethodExpr;
1327   if (Right.is(tok::l_paren)) {
1328     if (Left.is(tok::r_paren) && Left.MatchingParen &&
1329         Left.MatchingParen->Previous &&
1330         Left.MatchingParen->Previous->is(tok::kw___attribute))
1331       return true;
1332     return Line.Type == LT_ObjCDecl ||
1333            Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete,
1334                         tok::semi) ||
1335            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
1336             Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1337                          tok::kw_catch)) ||
1338            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
1339             Left.isOneOf(tok::identifier, tok::kw___attribute) &&
1340             Line.Type != LT_PreprocessorDirective);
1341   }
1342   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1343     return false;
1344   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1345     return !Left.Children.empty(); // No spaces in "{}".
1346   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
1347       (Right.is(tok::r_brace) && Right.MatchingParen &&
1348        Right.MatchingParen->BlockKind != BK_Block))
1349     return !Style.Cpp11BracedListStyle;
1350   if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/"))
1351     return false;
1352   if (Right.Type == TT_UnaryOperator)
1353     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1354            (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
1355   if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) &&
1356       Right.is(tok::l_brace) && Right.getNextNonComment() &&
1357       Right.BlockKind != BK_Block)
1358     return false;
1359   if (Left.is(tok::period) || Right.is(tok::period))
1360     return false;
1361   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1362     return false;
1363   return true;
1364 }
1365 
1366 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1367                                          const FormatToken &Tok) {
1368   if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo())
1369     return true; // Never ever merge two identifiers.
1370   if (Tok.Previous->Type == TT_ImplicitStringLiteral)
1371     return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd();
1372   if (Line.Type == LT_ObjCMethodDecl) {
1373     if (Tok.Previous->Type == TT_ObjCMethodSpecifier)
1374       return true;
1375     if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier))
1376       // Don't space between ')' and <id>
1377       return false;
1378   }
1379   if (Line.Type == LT_ObjCProperty &&
1380       (Tok.is(tok::equal) || Tok.Previous->is(tok::equal)))
1381     return false;
1382 
1383   if (Tok.Type == TT_TrailingReturnArrow ||
1384       Tok.Previous->Type == TT_TrailingReturnArrow)
1385     return true;
1386   if (Tok.Previous->is(tok::comma))
1387     return true;
1388   if (Tok.is(tok::comma))
1389     return false;
1390   if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1391     return true;
1392   if (Tok.Previous->Tok.is(tok::kw_operator))
1393     return Tok.is(tok::coloncolon);
1394   if (Tok.Type == TT_OverloadedOperatorLParen)
1395     return false;
1396   if (Tok.is(tok::colon))
1397     return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
1398            Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr &&
1399            !Tok.Previous->is(tok::question) &&
1400            (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals);
1401   if (Tok.Previous->Type == TT_UnaryOperator ||
1402       Tok.Previous->Type == TT_CastRParen)
1403     return false;
1404   if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) {
1405     return Tok.Type == TT_TemplateCloser &&
1406            Tok.Previous->Type == TT_TemplateCloser &&
1407            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
1408   }
1409   if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1410       Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar))
1411     return false;
1412   if (!Style.SpaceBeforeAssignmentOperators &&
1413       Tok.getPrecedence() == prec::Assignment)
1414     return false;
1415   if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) ||
1416       Tok.Previous->Type == TT_BinaryOperator)
1417     return true;
1418   if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1419     return false;
1420   if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) &&
1421       Line.First->is(tok::hash))
1422     return true;
1423   if (Tok.Type == TT_TrailingUnaryOperator)
1424     return false;
1425   return spaceRequiredBetween(Line, *Tok.Previous, Tok);
1426 }
1427 
1428 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
1429                                      const FormatToken &Right) {
1430   if (Right.is(tok::comment)) {
1431     return Right.Previous->BlockKind != BK_BracedInit &&
1432            Right.Previous->Type != TT_CtorInitializerColon &&
1433            Right.NewlinesBefore > 0;
1434   } else if (Right.Previous->isTrailingComment() ||
1435              (Right.isStringLiteral() && Right.Previous->isStringLiteral())) {
1436     return true;
1437   } else if (Right.Previous->IsUnterminatedLiteral) {
1438     return true;
1439   } else if (Right.is(tok::lessless) && Right.Next &&
1440              Right.Previous->is(tok::string_literal) &&
1441              Right.Next->is(tok::string_literal)) {
1442     return true;
1443   } else if (Right.Previous->ClosesTemplateDeclaration &&
1444              Right.Previous->MatchingParen &&
1445              Right.Previous->MatchingParen->NestingLevel == 0 &&
1446              Style.AlwaysBreakTemplateDeclarations) {
1447     return true;
1448   } else if ((Right.Type == TT_CtorInitializerComma ||
1449               Right.Type == TT_CtorInitializerColon) &&
1450              Style.BreakConstructorInitializersBeforeComma &&
1451              !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
1452     return true;
1453   } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) {
1454     return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1455            Style.BreakBeforeBraces == FormatStyle::BS_GNU;
1456   } else if (Right.is(tok::string_literal) &&
1457              Right.TokenText.startswith("R\"")) {
1458     // Raw string literals are special wrt. line breaks. The author has made a
1459     // deliberate choice and might have aligned the contents of the string
1460     // literal accordingly. Thus, we try keep existing line breaks.
1461     return Right.NewlinesBefore > 0;
1462   } else if (Right.Previous->is(tok::l_brace) &&
1463              Style.Language == FormatStyle::LK_Proto) {
1464     // Don't enums onto single lines in protocol buffers.
1465     return true;
1466   }
1467   return false;
1468 }
1469 
1470 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1471                                     const FormatToken &Right) {
1472   const FormatToken &Left = *Right.Previous;
1473   if (Left.is(tok::at))
1474     return false;
1475   if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator))
1476     return true;
1477   if (Right.isTrailingComment())
1478     // We rely on MustBreakBefore being set correctly here as we should not
1479     // change the "binding" behavior of a comment.
1480     // The first comment in a braced lists is always interpreted as belonging to
1481     // the first list element. Otherwise, it should be placed outside of the
1482     // list.
1483     return Left.BlockKind == BK_BracedInit;
1484   if (Left.is(tok::question) && Right.is(tok::colon))
1485     return false;
1486   if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1487     return Style.BreakBeforeTernaryOperators;
1488   if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
1489     return !Style.BreakBeforeTernaryOperators;
1490   if (Right.is(tok::colon) &&
1491       (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr))
1492     return false;
1493   if (Left.is(tok::colon) &&
1494       (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
1495     return true;
1496   if (Right.Type == TT_ObjCSelectorName)
1497     return true;
1498   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
1499     return true;
1500   if (Left.ClosesTemplateDeclaration)
1501     return true;
1502   if (Right.Type == TT_RangeBasedForLoopColon ||
1503       Right.Type == TT_OverloadedOperatorLParen ||
1504       Right.Type == TT_OverloadedOperator)
1505     return false;
1506   if (Left.Type == TT_RangeBasedForLoopColon)
1507     return true;
1508   if (Right.Type == TT_RangeBasedForLoopColon)
1509     return false;
1510   if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1511       Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
1512     return false;
1513   if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1514     return false;
1515   if (Left.Previous) {
1516     if (Left.is(tok::l_paren) && Right.is(tok::l_paren) &&
1517         Left.Previous->is(tok::kw___attribute))
1518       return false;
1519     if (Left.is(tok::l_paren) && (Left.Previous->Type == TT_BinaryOperator ||
1520                                   Left.Previous->Type == TT_CastRParen))
1521       return false;
1522   }
1523   if (Right.Type == TT_ImplicitStringLiteral)
1524     return false;
1525 
1526   if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
1527     return false;
1528 
1529   // We only break before r_brace if there was a corresponding break before
1530   // the l_brace, which is tracked by BreakBeforeClosingBrace.
1531   if (Right.is(tok::r_brace))
1532     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
1533 
1534   // Allow breaking after a trailing 'const', e.g. after a method declaration,
1535   // unless it is follow by ';', '{' or '='.
1536   if (Left.is(tok::kw_const) && Left.Previous != NULL &&
1537       Left.Previous->is(tok::r_paren))
1538     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1539 
1540   if (Right.is(tok::kw___attribute))
1541     return true;
1542 
1543   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1544     return true;
1545 
1546   if (Left.Type == TT_CtorInitializerComma &&
1547       Style.BreakConstructorInitializersBeforeComma)
1548     return false;
1549   if (Right.Type == TT_CtorInitializerComma &&
1550       Style.BreakConstructorInitializersBeforeComma)
1551     return true;
1552   if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators)
1553     return true;
1554   if (Left.is(tok::greater) && Right.is(tok::greater) &&
1555       Left.Type != TT_TemplateCloser)
1556     return false;
1557   if (Left.Type == TT_ArrayInitializerLSquare)
1558     return true;
1559   return (Left.isBinaryOperator() && Left.isNot(tok::lessless) &&
1560           !Style.BreakBeforeBinaryOperators) ||
1561          Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1562                       tok::kw_class, tok::kw_struct) ||
1563          Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon,
1564                        tok::l_square, tok::at) ||
1565          (Left.is(tok::r_paren) &&
1566           Right.isOneOf(tok::identifier, tok::kw_const, tok::kw___attribute)) ||
1567          (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1568 }
1569 
1570 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1571   llvm::errs() << "AnnotatedTokens:\n";
1572   const FormatToken *Tok = Line.First;
1573   while (Tok) {
1574     llvm::errs() << " M=" << Tok->MustBreakBefore
1575                  << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1576                  << " S=" << Tok->SpacesRequiredBefore
1577                  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
1578                  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
1579                  << " FakeLParens=";
1580     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1581       llvm::errs() << Tok->FakeLParens[i] << "/";
1582     llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1583     if (Tok->Next == NULL)
1584       assert(Tok == Line.Last);
1585     Tok = Tok->Next;
1586   }
1587   llvm::errs() << "----\n";
1588 }
1589 
1590 } // namespace format
1591 } // namespace clang
1592