1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Lex/Lexer.h"
19 
20 namespace clang {
21 namespace format {
22 
23 static bool isUnaryOperator(const AnnotatedToken &Tok) {
24   switch (Tok.FormatTok.Tok.getKind()) {
25   case tok::plus:
26   case tok::plusplus:
27   case tok::minus:
28   case tok::minusminus:
29   case tok::exclaim:
30   case tok::tilde:
31   case tok::kw_sizeof:
32   case tok::kw_alignof:
33     return true;
34   default:
35     return false;
36   }
37 }
38 
39 static bool isBinaryOperator(const AnnotatedToken &Tok) {
40   // Comma is a binary operator, but does not behave as such wrt. formatting.
41   return getPrecedence(Tok) > prec::Comma;
42 }
43 
44 // Returns the previous token ignoring comments.
45 static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) {
46   AnnotatedToken *PrevToken = Tok.Parent;
47   while (PrevToken != NULL && PrevToken->is(tok::comment))
48     PrevToken = PrevToken->Parent;
49   return PrevToken;
50 }
51 static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
52   return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
53 }
54 
55 static bool isTrailingComment(AnnotatedToken *Tok) {
56   return Tok != NULL && Tok->is(tok::comment) &&
57          (Tok->Children.empty() ||
58           Tok->Children[0].FormatTok.NewlinesBefore > 0);
59 }
60 
61 // Returns the next token ignoring comments.
62 static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
63   if (Tok.Children.empty())
64     return NULL;
65   const AnnotatedToken *NextToken = &Tok.Children[0];
66   while (NextToken->is(tok::comment)) {
67     if (NextToken->Children.empty())
68       return NULL;
69     NextToken = &NextToken->Children[0];
70   }
71   return NextToken;
72 }
73 
74 static bool closesScope(const AnnotatedToken &Tok) {
75   return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
76          Tok.Type == TT_TemplateCloser;
77 }
78 
79 static bool opensScope(const AnnotatedToken &Tok) {
80   return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
81          Tok.Type == TT_TemplateOpener;
82 }
83 
84 /// \brief A parser that gathers additional information about tokens.
85 ///
86 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
87 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
88 /// into template parameter lists.
89 class AnnotatingParser {
90 public:
91   AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
92                    IdentifierInfo &Ident_in)
93       : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
94         KeywordVirtualFound(false), Ident_in(Ident_in) {
95     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
96   }
97 
98 private:
99   bool parseAngle() {
100     if (CurrentToken == NULL)
101       return false;
102     ScopedContextCreator ContextCreator(*this, tok::less, 10);
103     AnnotatedToken *Left = CurrentToken->Parent;
104     Contexts.back().IsExpression = false;
105     while (CurrentToken != NULL) {
106       if (CurrentToken->is(tok::greater)) {
107         Left->MatchingParen = CurrentToken;
108         CurrentToken->MatchingParen = Left;
109         CurrentToken->Type = TT_TemplateCloser;
110         next();
111         return true;
112       }
113       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
114                                 tok::pipepipe, tok::ampamp, tok::question,
115                                 tok::colon))
116         return false;
117       updateParameterCount(Left, CurrentToken);
118       if (!consumeToken())
119         return false;
120     }
121     return false;
122   }
123 
124   bool parseParens(bool LookForDecls = false) {
125     if (CurrentToken == NULL)
126       return false;
127     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
128 
129     // FIXME: This is a bit of a hack. Do better.
130     Contexts.back().ColonIsForRangeExpr =
131         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
132 
133     bool StartsObjCMethodExpr = false;
134     AnnotatedToken *Left = CurrentToken->Parent;
135     if (CurrentToken->is(tok::caret)) {
136       // ^( starts a block.
137       Left->Type = TT_ObjCBlockLParen;
138     } else if (AnnotatedToken *MaybeSel = Left->Parent) {
139       // @selector( starts a selector.
140       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
141           MaybeSel->Parent->is(tok::at)) {
142         StartsObjCMethodExpr = true;
143       }
144     }
145 
146     if (StartsObjCMethodExpr) {
147       Contexts.back().ColonIsObjCMethodExpr = true;
148       Left->Type = TT_ObjCMethodExpr;
149     }
150 
151     while (CurrentToken != NULL) {
152       // LookForDecls is set when "if (" has been seen. Check for
153       // 'identifier' '*' 'identifier' followed by not '=' -- this
154       // '*' has to be a binary operator but determineStarAmpUsage() will
155       // categorize it as an unary operator, so set the right type here.
156       if (LookForDecls && !CurrentToken->Children.empty()) {
157         AnnotatedToken &Prev = *CurrentToken->Parent;
158         AnnotatedToken &Next = CurrentToken->Children[0];
159         if (Prev.Parent->is(tok::identifier) &&
160             Prev.isOneOf(tok::star, tok::amp, tok::ampamp) &&
161             CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
162           Prev.Type = TT_BinaryOperator;
163           LookForDecls = false;
164         }
165       }
166 
167       if (CurrentToken->is(tok::r_paren)) {
168         Left->MatchingParen = CurrentToken;
169         CurrentToken->MatchingParen = Left;
170 
171         if (StartsObjCMethodExpr) {
172           CurrentToken->Type = TT_ObjCMethodExpr;
173           if (Contexts.back().FirstObjCSelectorName != NULL) {
174             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
175                 Contexts.back().LongestObjCSelectorName;
176           }
177         }
178 
179         next();
180         return true;
181       }
182       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
183         return false;
184       updateParameterCount(Left, CurrentToken);
185       if (!consumeToken())
186         return false;
187     }
188     return false;
189   }
190 
191   bool parseSquare() {
192     if (!CurrentToken)
193       return false;
194 
195     // A '[' could be an index subscript (after an indentifier or after
196     // ')' or ']'), it could be the start of an Objective-C method
197     // expression, or it could the the start of an Objective-C array literal.
198     AnnotatedToken *Left = CurrentToken->Parent;
199     AnnotatedToken *Parent = getPreviousToken(*Left);
200     bool StartsObjCMethodExpr =
201         Contexts.back().CanBeExpression &&
202         (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
203                                     tok::kw_return, tok::kw_throw) ||
204          isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn ||
205          Parent->Type == TT_CastRParen ||
206          getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
207          prec::Unknown);
208     ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
209     Contexts.back().IsExpression = true;
210     bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
211 
212     if (StartsObjCMethodExpr) {
213       Contexts.back().ColonIsObjCMethodExpr = true;
214       Left->Type = TT_ObjCMethodExpr;
215     } else if (StartsObjCArrayLiteral) {
216       Left->Type = TT_ObjCArrayLiteral;
217     }
218 
219     while (CurrentToken != NULL) {
220       if (CurrentToken->is(tok::r_square)) {
221         if (!CurrentToken->Children.empty() &&
222             CurrentToken->Children[0].is(tok::l_paren)) {
223           // An ObjC method call is rarely followed by an open parenthesis.
224           // FIXME: Do we incorrectly label ":" with this?
225           StartsObjCMethodExpr = false;
226           Left->Type = TT_Unknown;
227         }
228         if (StartsObjCMethodExpr) {
229           CurrentToken->Type = TT_ObjCMethodExpr;
230           // determineStarAmpUsage() thinks that '*' '[' is allocating an
231           // array of pointers, but if '[' starts a selector then '*' is a
232           // binary operator.
233           if (Parent != NULL && Parent->Type == TT_PointerOrReference)
234             Parent->Type = TT_BinaryOperator;
235         } else if (StartsObjCArrayLiteral) {
236           CurrentToken->Type = TT_ObjCArrayLiteral;
237         }
238         Left->MatchingParen = CurrentToken;
239         CurrentToken->MatchingParen = Left;
240         if (Contexts.back().FirstObjCSelectorName != NULL)
241           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
242               Contexts.back().LongestObjCSelectorName;
243         next();
244         return true;
245       }
246       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
247         return false;
248       updateParameterCount(Left, CurrentToken);
249       if (!consumeToken())
250         return false;
251     }
252     return false;
253   }
254 
255   bool parseBrace() {
256     // Lines are fine to end with '{'.
257     if (CurrentToken == NULL)
258       return true;
259     ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
260     AnnotatedToken *Left = CurrentToken->Parent;
261     while (CurrentToken != NULL) {
262       if (CurrentToken->is(tok::r_brace)) {
263         Left->MatchingParen = CurrentToken;
264         CurrentToken->MatchingParen = Left;
265         next();
266         return true;
267       }
268       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
269         return false;
270       updateParameterCount(Left, CurrentToken);
271       if (!consumeToken())
272         return false;
273     }
274     return true;
275   }
276 
277   void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
278     if (Current->is(tok::comma))
279       ++Left->ParameterCount;
280     else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
281       Left->ParameterCount = 1;
282   }
283 
284   bool parseConditional() {
285     while (CurrentToken != NULL) {
286       if (CurrentToken->is(tok::colon)) {
287         CurrentToken->Type = TT_ConditionalExpr;
288         next();
289         return true;
290       }
291       if (!consumeToken())
292         return false;
293     }
294     return false;
295   }
296 
297   bool parseTemplateDeclaration() {
298     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
299       CurrentToken->Type = TT_TemplateOpener;
300       next();
301       if (!parseAngle())
302         return false;
303       if (CurrentToken != NULL)
304         CurrentToken->Parent->ClosesTemplateDeclaration = true;
305       return true;
306     }
307     return false;
308   }
309 
310   bool consumeToken() {
311     AnnotatedToken *Tok = CurrentToken;
312     next();
313     switch (Tok->FormatTok.Tok.getKind()) {
314     case tok::plus:
315     case tok::minus:
316       // At the start of the line, +/- specific ObjectiveC method
317       // declarations.
318       if (Tok->Parent == NULL)
319         Tok->Type = TT_ObjCMethodSpecifier;
320       break;
321     case tok::colon:
322       if (Tok->Parent == NULL)
323         return false;
324       // Colons from ?: are handled in parseConditional().
325       if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) {
326         Tok->Type = TT_CtorInitializerColon;
327       } else if (Contexts.back().ColonIsObjCMethodExpr ||
328                  Line.First.Type == TT_ObjCMethodSpecifier) {
329         Tok->Type = TT_ObjCMethodExpr;
330         Tok->Parent->Type = TT_ObjCSelectorName;
331         if (Tok->Parent->FormatTok.TokenLength >
332             Contexts.back().LongestObjCSelectorName)
333           Contexts.back().LongestObjCSelectorName =
334               Tok->Parent->FormatTok.TokenLength;
335         if (Contexts.back().FirstObjCSelectorName == NULL)
336           Contexts.back().FirstObjCSelectorName = Tok->Parent;
337       } else if (Contexts.back().ColonIsForRangeExpr) {
338         Tok->Type = TT_RangeBasedForLoopColon;
339       } else if (Contexts.size() == 1) {
340         Tok->Type = TT_InheritanceColon;
341       } else if (Contexts.back().ContextKind == tok::l_paren) {
342         Tok->Type = TT_InlineASMColon;
343       }
344       break;
345     case tok::kw_if:
346     case tok::kw_while:
347       if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
348         next();
349         if (!parseParens(/*LookForDecls=*/ true))
350           return false;
351       }
352       break;
353     case tok::kw_for:
354       Contexts.back().ColonIsForRangeExpr = true;
355       next();
356       if (!parseParens())
357         return false;
358       break;
359     case tok::l_paren:
360       if (!parseParens())
361         return false;
362       if (Line.MustBeDeclaration)
363         Line.MightBeFunctionDecl = true;
364       break;
365     case tok::l_square:
366       if (!parseSquare())
367         return false;
368       break;
369     case tok::l_brace:
370       if (!parseBrace())
371         return false;
372       break;
373     case tok::less:
374       if (parseAngle())
375         Tok->Type = TT_TemplateOpener;
376       else {
377         Tok->Type = TT_BinaryOperator;
378         CurrentToken = Tok;
379         next();
380       }
381       break;
382     case tok::r_paren:
383     case tok::r_square:
384       return false;
385     case tok::r_brace:
386       // Lines can start with '}'.
387       if (Tok->Parent != NULL)
388         return false;
389       break;
390     case tok::greater:
391       Tok->Type = TT_BinaryOperator;
392       break;
393     case tok::kw_operator:
394       while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
395         if (CurrentToken->isOneOf(tok::star, tok::amp))
396           CurrentToken->Type = TT_PointerOrReference;
397         consumeToken();
398       }
399       if (CurrentToken)
400         CurrentToken->Type = TT_OverloadedOperatorLParen;
401       break;
402     case tok::question:
403       parseConditional();
404       break;
405     case tok::kw_template:
406       parseTemplateDeclaration();
407       break;
408     case tok::identifier:
409       if (Line.First.is(tok::kw_for) &&
410           Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
411         Tok->Type = TT_ObjCForIn;
412       break;
413     default:
414       break;
415     }
416     return true;
417   }
418 
419   void parseIncludeDirective() {
420     next();
421     if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
422       next();
423       while (CurrentToken != NULL) {
424         if (CurrentToken->isNot(tok::comment) ||
425             !CurrentToken->Children.empty())
426           CurrentToken->Type = TT_ImplicitStringLiteral;
427         next();
428       }
429     } else {
430       while (CurrentToken != NULL) {
431         if (CurrentToken->is(tok::string_literal))
432           // Mark these string literals as "implicit" literals, too, so that
433           // they are not split or line-wrapped.
434           CurrentToken->Type = TT_ImplicitStringLiteral;
435         next();
436       }
437     }
438   }
439 
440   void parseWarningOrError() {
441     next();
442     // We still want to format the whitespace left of the first token of the
443     // warning or error.
444     next();
445     while (CurrentToken != NULL) {
446       CurrentToken->Type = TT_ImplicitStringLiteral;
447       next();
448     }
449   }
450 
451   void parsePreprocessorDirective() {
452     next();
453     if (CurrentToken == NULL)
454       return;
455     // Hashes in the middle of a line can lead to any strange token
456     // sequence.
457     if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
458       return;
459     switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
460     case tok::pp_include:
461     case tok::pp_import:
462       parseIncludeDirective();
463       break;
464     case tok::pp_error:
465     case tok::pp_warning:
466       parseWarningOrError();
467       break;
468     default:
469       break;
470     }
471     while (CurrentToken != NULL)
472       next();
473   }
474 
475 public:
476   LineType parseLine() {
477     int PeriodsAndArrows = 0;
478     AnnotatedToken *LastPeriodOrArrow = NULL;
479     bool CanBeBuilderTypeStmt = true;
480     if (CurrentToken->is(tok::hash)) {
481       parsePreprocessorDirective();
482       return LT_PreprocessorDirective;
483     }
484     while (CurrentToken != NULL) {
485       if (CurrentToken->is(tok::kw_virtual))
486         KeywordVirtualFound = true;
487       if (CurrentToken->isOneOf(tok::period, tok::arrow)) {
488         ++PeriodsAndArrows;
489         LastPeriodOrArrow = CurrentToken;
490       }
491       AnnotatedToken *TheToken = CurrentToken;
492       if (!consumeToken())
493         return LT_Invalid;
494       if (getPrecedence(*TheToken) > prec::Assignment &&
495           TheToken->Type == TT_BinaryOperator)
496         CanBeBuilderTypeStmt = false;
497     }
498     if (KeywordVirtualFound)
499       return LT_VirtualFunctionDecl;
500 
501     // Assume a builder-type call if there are 2 or more "." and "->".
502     if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
503       LastPeriodOrArrow->LastInChainOfCalls = true;
504       return LT_BuilderTypeCall;
505     }
506 
507     if (Line.First.Type == TT_ObjCMethodSpecifier) {
508       if (Contexts.back().FirstObjCSelectorName != NULL)
509         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
510             Contexts.back().LongestObjCSelectorName;
511       return LT_ObjCMethodDecl;
512     }
513 
514     return LT_Other;
515   }
516 
517 private:
518   void next() {
519     if (CurrentToken != NULL) {
520       determineTokenType(*CurrentToken);
521       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
522     }
523 
524     if (CurrentToken != NULL && !CurrentToken->Children.empty())
525       CurrentToken = &CurrentToken->Children[0];
526     else
527       CurrentToken = NULL;
528 
529     // Reset token type in case we have already looked at it and then recovered
530     // from an error (e.g. failure to find the matching >).
531     if (CurrentToken != NULL)
532       CurrentToken->Type = TT_Unknown;
533   }
534 
535   /// \brief A struct to hold information valid in a specific context, e.g.
536   /// a pair of parenthesis.
537   struct Context {
538     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
539             bool IsExpression)
540         : ContextKind(ContextKind), BindingStrength(BindingStrength),
541           LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
542           ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
543           IsExpression(IsExpression), CanBeExpression(true) {}
544 
545     tok::TokenKind ContextKind;
546     unsigned BindingStrength;
547     unsigned LongestObjCSelectorName;
548     bool ColonIsForRangeExpr;
549     bool ColonIsObjCMethodExpr;
550     AnnotatedToken *FirstObjCSelectorName;
551     bool IsExpression;
552     bool CanBeExpression;
553   };
554 
555   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
556   /// of each instance.
557   struct ScopedContextCreator {
558     AnnotatingParser &P;
559 
560     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
561                          unsigned Increase)
562         : P(P) {
563       P.Contexts.push_back(
564           Context(ContextKind, P.Contexts.back().BindingStrength + Increase,
565                   P.Contexts.back().IsExpression));
566     }
567 
568     ~ScopedContextCreator() { P.Contexts.pop_back(); }
569   };
570 
571   void determineTokenType(AnnotatedToken &Current) {
572     if (getPrecedence(Current) == prec::Assignment) {
573       Contexts.back().IsExpression = true;
574       for (AnnotatedToken *Previous = Current.Parent;
575            Previous && Previous->isNot(tok::comma);
576            Previous = Previous->Parent) {
577         if (Previous->is(tok::r_square))
578           Previous = Previous->MatchingParen;
579         if (Previous->Type == TT_BinaryOperator &&
580             Previous->isOneOf(tok::star, tok::amp)) {
581           Previous->Type = TT_PointerOrReference;
582         }
583       }
584     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
585                (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
586                 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
587       Contexts.back().IsExpression = true;
588     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
589       for (AnnotatedToken *Previous = Current.Parent;
590            Previous && Previous->isOneOf(tok::star, tok::amp);
591            Previous = Previous->Parent)
592         Previous->Type = TT_PointerOrReference;
593     } else if (Current.Parent &&
594                Current.Parent->Type == TT_CtorInitializerColon) {
595       Contexts.back().IsExpression = true;
596     } else if (Current.is(tok::kw_new)) {
597       Contexts.back().CanBeExpression = false;
598     }
599 
600     if (Current.Type == TT_Unknown) {
601       if (Current.Parent && Current.is(tok::identifier) &&
602           ((Current.Parent->is(tok::identifier) &&
603             Current.Parent->FormatTok.Tok.getIdentifierInfo()
604                 ->getPPKeywordID() == tok::pp_not_keyword) ||
605            Current.Parent->Type == TT_PointerOrReference ||
606            Current.Parent->Type == TT_TemplateCloser)) {
607         Current.Type = TT_StartOfName;
608       } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
609         Current.Type =
610             determineStarAmpUsage(Current, Contexts.back().IsExpression);
611       } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
612         Current.Type = determinePlusMinusCaretUsage(Current);
613       } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
614         Current.Type = determineIncrementUsage(Current);
615       } else if (Current.is(tok::exclaim)) {
616         Current.Type = TT_UnaryOperator;
617       } else if (isBinaryOperator(Current)) {
618         Current.Type = TT_BinaryOperator;
619       } else if (Current.is(tok::comment)) {
620         std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
621                                             Lex.getLangOpts()));
622         if (StringRef(Data).startswith("//"))
623           Current.Type = TT_LineComment;
624         else
625           Current.Type = TT_BlockComment;
626       } else if (Current.is(tok::r_paren)) {
627         bool ParensNotExpr = !Current.Parent ||
628                              Current.Parent->Type == TT_PointerOrReference ||
629                              Current.Parent->Type == TT_TemplateCloser;
630         bool ParensCouldEndDecl =
631             !Current.Children.empty() &&
632             Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace);
633         bool IsSizeOfOrAlignOf =
634             Current.MatchingParen && Current.MatchingParen->Parent &&
635             Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof,
636                                                    tok::kw_alignof);
637         if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
638             Contexts.back().IsExpression)
639           // FIXME: We need to get smarter and understand more cases of casts.
640           Current.Type = TT_CastRParen;
641       } else if (Current.is(tok::at) && Current.Children.size()) {
642         switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
643         case tok::objc_interface:
644         case tok::objc_implementation:
645         case tok::objc_protocol:
646           Current.Type = TT_ObjCDecl;
647           break;
648         case tok::objc_property:
649           Current.Type = TT_ObjCProperty;
650           break;
651         default:
652           break;
653         }
654       }
655     }
656   }
657 
658   /// \brief Return the type of the given token assuming it is * or &.
659   TokenType
660   determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
661     const AnnotatedToken *PrevToken = getPreviousToken(Tok);
662     if (PrevToken == NULL)
663       return TT_UnaryOperator;
664 
665     const AnnotatedToken *NextToken = getNextToken(Tok);
666     if (NextToken == NULL)
667       return TT_Unknown;
668 
669     if (PrevToken->is(tok::l_paren) && !IsExpression)
670       return TT_PointerOrReference;
671 
672     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
673                            tok::comma, tok::semi, tok::kw_return, tok::colon,
674                            tok::equal) ||
675         PrevToken->Type == TT_BinaryOperator ||
676         PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
677       return TT_UnaryOperator;
678 
679     if (NextToken->is(tok::l_square))
680       return TT_PointerOrReference;
681 
682     if (PrevToken->FormatTok.Tok.isLiteral() ||
683         PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
684         NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken) ||
685         NextToken->isOneOf(tok::l_paren, tok::l_square))
686       return TT_BinaryOperator;
687 
688     // It is very unlikely that we are going to find a pointer or reference type
689     // definition on the RHS of an assignment.
690     if (IsExpression)
691       return TT_BinaryOperator;
692 
693     return TT_PointerOrReference;
694   }
695 
696   TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
697     const AnnotatedToken *PrevToken = getPreviousToken(Tok);
698     if (PrevToken == NULL)
699       return TT_UnaryOperator;
700 
701     // Use heuristics to recognize unary operators.
702     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
703                            tok::question, tok::colon, tok::kw_return,
704                            tok::kw_case, tok::at, tok::l_brace))
705       return TT_UnaryOperator;
706 
707     // There can't be two consecutive binary operators.
708     if (PrevToken->Type == TT_BinaryOperator)
709       return TT_UnaryOperator;
710 
711     // Fall back to marking the token as binary operator.
712     return TT_BinaryOperator;
713   }
714 
715   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
716   TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
717     const AnnotatedToken *PrevToken = getPreviousToken(Tok);
718     if (PrevToken == NULL)
719       return TT_UnaryOperator;
720     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
721       return TT_TrailingUnaryOperator;
722 
723     return TT_UnaryOperator;
724   }
725 
726   SmallVector<Context, 8> Contexts;
727 
728   SourceManager &SourceMgr;
729   Lexer &Lex;
730   AnnotatedLine &Line;
731   AnnotatedToken *CurrentToken;
732   bool KeywordVirtualFound;
733   IdentifierInfo &Ident_in;
734 };
735 
736 /// \brief Parses binary expressions by inserting fake parenthesis based on
737 /// operator precedence.
738 class ExpressionParser {
739 public:
740   ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
741 
742   /// \brief Parse expressions with the given operatore precedence.
743   void parse(int Precedence = 0) {
744     if (Precedence > prec::PointerToMember || Current == NULL)
745       return;
746 
747     // Skip over "return" until we can properly parse it.
748     if (Current->is(tok::kw_return))
749       next();
750 
751     // Eagerly consume trailing comments.
752     while (isTrailingComment(Current)) {
753       next();
754     }
755 
756     AnnotatedToken *Start = Current;
757     bool OperatorFound = false;
758 
759     while (Current) {
760       // Consume operators with higher precedence.
761       parse(prec::Level(Precedence + 1));
762 
763       int CurrentPrecedence = 0;
764       if (Current) {
765         if (Current->Type == TT_ConditionalExpr)
766           CurrentPrecedence = 1 + (int) prec::Conditional;
767         else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
768                  Current->Type == TT_CtorInitializerColon)
769           CurrentPrecedence = 1;
770         else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
771           CurrentPrecedence = 1 + (int) getPrecedence(*Current);
772       }
773 
774       // At the end of the line or when an operator with higher precedence is
775       // found, insert fake parenthesis and return.
776       if (Current == NULL || closesScope(*Current) ||
777           (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
778         if (OperatorFound) {
779           ++Start->FakeLParens;
780           if (Current)
781             ++Current->Parent->FakeRParens;
782         }
783         return;
784       }
785 
786       // Consume scopes: (), [], <> and {}
787       if (opensScope(*Current)) {
788         AnnotatedToken *Left = Current;
789         while (Current && !closesScope(*Current)) {
790           next();
791           parse();
792         }
793         // Remove fake parens that just duplicate the real parens.
794         if (Current && Left->Children[0].FakeLParens > 0 &&
795             Current->Parent->FakeRParens > 0) {
796           --Left->Children[0].FakeLParens;
797           --Current->Parent->FakeRParens;
798         }
799         next();
800       } else {
801         // Operator found.
802         if (CurrentPrecedence == Precedence)
803           OperatorFound = true;
804 
805         next();
806       }
807     }
808   }
809 
810 private:
811   void next() {
812     if (Current != NULL)
813       Current = Current->Children.empty() ? NULL : &Current->Children[0];
814   }
815 
816   AnnotatedToken *Current;
817 };
818 
819 void TokenAnnotator::annotate(AnnotatedLine &Line) {
820   AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
821   Line.Type = Parser.parseLine();
822   if (Line.Type == LT_Invalid)
823     return;
824 
825   ExpressionParser ExprParser(Line);
826   ExprParser.parse();
827 
828   if (Line.First.Type == TT_ObjCMethodSpecifier)
829     Line.Type = LT_ObjCMethodDecl;
830   else if (Line.First.Type == TT_ObjCDecl)
831     Line.Type = LT_ObjCDecl;
832   else if (Line.First.Type == TT_ObjCProperty)
833     Line.Type = LT_ObjCProperty;
834 
835   Line.First.SpacesRequiredBefore = 1;
836   Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
837   Line.First.CanBreakBefore = Line.First.MustBreakBefore;
838 
839   Line.First.TotalLength = Line.First.FormatTok.TokenLength;
840 }
841 
842 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
843   if (Line.First.Children.empty())
844     return;
845   AnnotatedToken *Current = &Line.First.Children[0];
846   while (Current != NULL) {
847     if (Current->Type == TT_LineComment)
848       Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
849     else
850       Current->SpacesRequiredBefore =
851           spaceRequiredBefore(Line, *Current) ? 1 : 0;
852 
853     if (Current->FormatTok.MustBreakBefore) {
854       Current->MustBreakBefore = true;
855     } else if (Current->Type == TT_LineComment) {
856       Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
857     } else if (isTrailingComment(Current->Parent) ||
858                (Current->is(tok::string_literal) &&
859                 Current->Parent->is(tok::string_literal))) {
860       Current->MustBreakBefore = true;
861     } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
862                Current->Parent->is(tok::string_literal) &&
863                Current->Children[0].is(tok::string_literal)) {
864       Current->MustBreakBefore = true;
865     } else {
866       Current->MustBreakBefore = false;
867     }
868     Current->CanBreakBefore =
869         Current->MustBreakBefore || canBreakBefore(Line, *Current);
870     if (Current->MustBreakBefore)
871       Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
872     else
873       Current->TotalLength =
874           Current->Parent->TotalLength + Current->FormatTok.TokenLength +
875           Current->SpacesRequiredBefore;
876     // FIXME: Only calculate this if CanBreakBefore is true once static
877     // initializers etc. are sorted out.
878     // FIXME: Move magic numbers to a better place.
879     Current->SplitPenalty =
880         20 * Current->BindingStrength + splitPenalty(Line, *Current);
881 
882     Current = Current->Children.empty() ? NULL : &Current->Children[0];
883   }
884 }
885 
886 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
887                                       const AnnotatedToken &Tok) {
888   const AnnotatedToken &Left = *Tok.Parent;
889   const AnnotatedToken &Right = Tok;
890 
891   if (Right.Type == TT_StartOfName) {
892     if (Line.First.is(tok::kw_for))
893       return 3;
894     else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
895       // FIXME: Clean up hack of using BindingStrength to find top-level names.
896       return Style.PenaltyReturnTypeOnItsOwnLine;
897     else
898       return 100;
899   }
900   if (Left.is(tok::equal) && Right.is(tok::l_brace))
901     return 150;
902   if (Left.is(tok::coloncolon))
903     return 500;
904 
905   if (Left.Type == TT_RangeBasedForLoopColon ||
906       Left.Type == TT_InheritanceColon)
907     return 2;
908 
909   if (Right.isOneOf(tok::arrow, tok::period)) {
910     if (Line.Type == LT_BuilderTypeCall)
911       return prec::PointerToMember;
912     if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
913         Left.MatchingParen->ParameterCount > 0)
914       return 20; // Should be smaller than breaking at a nested comma.
915     return 150;
916   }
917 
918   // In for-loops, prefer breaking at ',' and ';'.
919   if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
920     return 4;
921 
922   if (Left.is(tok::semi))
923     return 0;
924   if (Left.is(tok::comma))
925     return 1;
926 
927   // In Objective-C method expressions, prefer breaking before "param:" over
928   // breaking after it.
929   if (Right.Type == TT_ObjCSelectorName)
930     return 0;
931   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
932     return 20;
933 
934   if (opensScope(Left))
935     return 20;
936 
937   if (Right.is(tok::lessless)) {
938     if (Left.is(tok::string_literal)) {
939       StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(),
940                                     Left.FormatTok.TokenLength);
941       Content = Content.drop_back(1).drop_front(1).trim();
942       if (Content.size() > 1 &&
943           (Content.back() == ':' || Content.back() == '='))
944         return 100;
945     }
946     return prec::Shift;
947   }
948   if (Left.Type == TT_ConditionalExpr)
949     return prec::Conditional;
950   prec::Level Level = getPrecedence(Left);
951 
952   if (Level != prec::Unknown)
953     return Level;
954 
955   return 3;
956 }
957 
958 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
959                                           const AnnotatedToken &Left,
960                                           const AnnotatedToken &Right) {
961   if (Right.is(tok::hashhash))
962     return Left.is(tok::hash);
963   if (Left.isOneOf(tok::hashhash, tok::hash))
964     return Right.is(tok::hash);
965   if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma))
966     return false;
967   if (Right.is(tok::less) &&
968       (Left.is(tok::kw_template) ||
969        (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
970     return true;
971   if (Left.is(tok::arrow) || Right.is(tok::arrow))
972     return false;
973   if (Left.isOneOf(tok::exclaim, tok::tilde))
974     return false;
975   if (Left.is(tok::at) &&
976       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
977                     tok::numeric_constant, tok::l_paren, tok::l_brace,
978                     tok::kw_true, tok::kw_false))
979     return false;
980   if (Left.is(tok::coloncolon))
981     return false;
982   if (Right.is(tok::coloncolon))
983     return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren);
984   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
985     return false;
986   if (Right.Type == TT_PointerOrReference)
987     return Left.FormatTok.Tok.isLiteral() ||
988            ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
989             !Style.PointerBindsToType);
990   if (Left.Type == TT_PointerOrReference)
991     return Right.FormatTok.Tok.isLiteral() ||
992            ((Right.Type != TT_PointerOrReference) && Style.PointerBindsToType);
993   if (Right.is(tok::star) && Left.is(tok::l_paren))
994     return false;
995   if (Left.is(tok::l_square))
996     return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
997   if (Right.is(tok::r_square))
998     return Right.Type == TT_ObjCArrayLiteral;
999   if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
1000     return false;
1001   if (Left.is(tok::period) || Right.is(tok::period))
1002     return false;
1003   if (Left.is(tok::colon))
1004     return Left.Type != TT_ObjCMethodExpr;
1005   if (Right.is(tok::colon))
1006     return Right.Type != TT_ObjCMethodExpr;
1007   if (Left.is(tok::l_paren))
1008     return false;
1009   if (Right.is(tok::l_paren)) {
1010     return Line.Type == LT_ObjCDecl ||
1011            Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1012                         tok::kw_return, tok::kw_catch, tok::kw_new,
1013                         tok::kw_delete);
1014   }
1015   if (Left.is(tok::at) &&
1016       Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1017     return false;
1018   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1019     return false;
1020   return true;
1021 }
1022 
1023 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1024                                          const AnnotatedToken &Tok) {
1025   if (Tok.FormatTok.Tok.getIdentifierInfo() &&
1026       Tok.Parent->FormatTok.Tok.getIdentifierInfo())
1027     return true; // Never ever merge two identifiers.
1028   if (Line.Type == LT_ObjCMethodDecl) {
1029     if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1030       return true;
1031     if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1032       // Don't space between ')' and <id>
1033       return false;
1034   }
1035   if (Line.Type == LT_ObjCProperty &&
1036       (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1037     return false;
1038 
1039   if (Tok.Parent->is(tok::comma))
1040     return true;
1041   if (Tok.is(tok::comma))
1042     return false;
1043   if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1044     return true;
1045   if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
1046     return false;
1047   if (Tok.Type == TT_OverloadedOperatorLParen)
1048     return false;
1049   if (Tok.is(tok::colon))
1050     return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
1051            !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr;
1052   if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
1053       Tok.Children[0].Type == TT_PointerOrReference &&
1054       !Tok.Children[0].Children.empty() &&
1055       Tok.Children[0].Children[0].isNot(tok::r_paren))
1056     return true;
1057   if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
1058     return false;
1059   if (Tok.Type == TT_UnaryOperator)
1060     return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1061            (Tok.Parent->isNot(tok::colon) ||
1062             Tok.Parent->Type != TT_ObjCMethodExpr);
1063   if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1064     return Tok.Type == TT_TemplateCloser &&
1065            Tok.Parent->Type == TT_TemplateCloser &&
1066            Style.Standard != FormatStyle::LS_Cpp11;
1067   }
1068   if (Tok.is(tok::arrowstar) || Tok.Parent->is(tok::arrowstar))
1069     return false;
1070   if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1071     return true;
1072   if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1073     return false;
1074   if (Tok.is(tok::less) && Line.First.is(tok::hash))
1075     return true;
1076   if (Tok.Type == TT_TrailingUnaryOperator)
1077     return false;
1078   return spaceRequiredBetween(Line, *Tok.Parent, Tok);
1079 }
1080 
1081 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1082                                     const AnnotatedToken &Right) {
1083   const AnnotatedToken &Left = *Right.Parent;
1084   if (Right.Type == TT_StartOfName)
1085     return true;
1086   if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1087     return false;
1088   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1089     return true;
1090   if (Right.Type == TT_ObjCSelectorName)
1091     return true;
1092   if (Left.ClosesTemplateDeclaration)
1093     return true;
1094   if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1095     return true;
1096   if (Right.Type == TT_RangeBasedForLoopColon ||
1097       Right.Type == TT_InheritanceColon)
1098     return false;
1099   if (Left.Type == TT_RangeBasedForLoopColon ||
1100       Left.Type == TT_InheritanceColon)
1101     return true;
1102   if (Right.Type == TT_RangeBasedForLoopColon)
1103     return false;
1104   if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1105       Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
1106       Left.isOneOf(tok::question, tok::kw_operator))
1107     return false;
1108   if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1109     return false;
1110   if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
1111       Left.Parent->is(tok::kw___attribute))
1112     return false;
1113 
1114   if (Right.Type == TT_LineComment)
1115     // We rely on MustBreakBefore being set correctly here as we should not
1116     // change the "binding" behavior of a comment.
1117     return false;
1118 
1119   // Allow breaking after a trailing 'const', e.g. after a method declaration,
1120   // unless it is follow by ';', '{' or '='.
1121   if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1122       Left.Parent->is(tok::r_paren))
1123     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1124 
1125   if (Right.is(tok::kw___attribute))
1126     return true;
1127 
1128   // We only break before r_brace if there was a corresponding break before
1129   // the l_brace, which is tracked by BreakBeforeClosingBrace.
1130   if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater))
1131     return false;
1132   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1133     return true;
1134   return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1135          Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) ||
1136          Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
1137          (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1138           Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
1139          (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1140          (Left.is(tok::l_square) && !Right.is(tok::r_square));
1141 }
1142 
1143 } // namespace format
1144 } // namespace clang
1145