1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15 /// where it can be used to format real code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #define DEBUG_TYPE "format-formatter"
20 
21 #include "UnwrappedLineParser.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/OperatorPrecedence.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Format/Format.h"
26 #include "clang/Frontend/TextDiagnosticPrinter.h"
27 #include "clang/Lex/Lexer.h"
28 #include "llvm/Support/Debug.h"
29 #include <string>
30 
31 // Uncomment to get debug output from tests:
32 // #define DEBUG_WITH_TYPE(T, X) do { X; } while(0)
33 
34 namespace clang {
35 namespace format {
36 
37 enum TokenType {
38   TT_BinaryOperator,
39   TT_BlockComment,
40   TT_CastRParen,
41   TT_ConditionalExpr,
42   TT_CtorInitializerColon,
43   TT_ImplicitStringLiteral,
44   TT_LineComment,
45   TT_ObjCBlockLParen,
46   TT_ObjCDecl,
47   TT_ObjCMethodSpecifier,
48   TT_ObjCMethodExpr,
49   TT_ObjCProperty,
50   TT_OverloadedOperator,
51   TT_PointerOrReference,
52   TT_PureVirtualSpecifier,
53   TT_RangeBasedForLoopColon,
54   TT_StartOfName,
55   TT_TemplateCloser,
56   TT_TemplateOpener,
57   TT_TrailingUnaryOperator,
58   TT_UnaryOperator,
59   TT_Unknown
60 };
61 
62 enum LineType {
63   LT_Invalid,
64   LT_Other,
65   LT_BuilderTypeCall,
66   LT_PreprocessorDirective,
67   LT_VirtualFunctionDecl,
68   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
69   LT_ObjCMethodDecl,
70   LT_ObjCProperty // An @property line.
71 };
72 
73 class AnnotatedToken {
74 public:
75   explicit AnnotatedToken(const FormatToken &FormatTok)
76       : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false),
77         CanBreakBefore(false), MustBreakBefore(false),
78         ClosesTemplateDeclaration(false), MatchingParen(NULL),
79         ParameterCount(1), Parent(NULL) {
80   }
81 
82   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
83   bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
84 
85   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
86     return FormatTok.Tok.isObjCAtKeyword(Kind);
87   }
88 
89   FormatToken FormatTok;
90 
91   TokenType Type;
92 
93   bool SpaceRequiredBefore;
94   bool CanBreakBefore;
95   bool MustBreakBefore;
96 
97   bool ClosesTemplateDeclaration;
98 
99   AnnotatedToken *MatchingParen;
100 
101   /// \brief Number of parameters, if this is "(", "[" or "<".
102   ///
103   /// This is initialized to 1 as we don't need to distinguish functions with
104   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
105   /// the number of commas.
106   unsigned ParameterCount;
107 
108   /// \brief The total length of the line up to and including this token.
109   unsigned TotalLength;
110 
111   std::vector<AnnotatedToken> Children;
112   AnnotatedToken *Parent;
113 
114   const AnnotatedToken *getPreviousNoneComment() const {
115     AnnotatedToken *Tok = Parent;
116     while (Tok != NULL && Tok->is(tok::comment))
117       Tok = Tok->Parent;
118     return Tok;
119   }
120 };
121 
122 class AnnotatedLine {
123 public:
124   AnnotatedLine(const UnwrappedLine &Line)
125       : First(Line.Tokens.front()), Level(Line.Level),
126         InPPDirective(Line.InPPDirective),
127         MustBeDeclaration(Line.MustBeDeclaration) {
128     assert(!Line.Tokens.empty());
129     AnnotatedToken *Current = &First;
130     for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
131                                                 E = Line.Tokens.end();
132          I != E; ++I) {
133       Current->Children.push_back(AnnotatedToken(*I));
134       Current->Children[0].Parent = Current;
135       Current = &Current->Children[0];
136     }
137     Last = Current;
138   }
139   AnnotatedLine(const AnnotatedLine &Other)
140       : First(Other.First), Type(Other.Type), Level(Other.Level),
141         InPPDirective(Other.InPPDirective),
142         MustBeDeclaration(Other.MustBeDeclaration) {
143     Last = &First;
144     while (!Last->Children.empty()) {
145       Last->Children[0].Parent = Last;
146       Last = &Last->Children[0];
147     }
148   }
149 
150   AnnotatedToken First;
151   AnnotatedToken *Last;
152 
153   LineType Type;
154   unsigned Level;
155   bool InPPDirective;
156   bool MustBeDeclaration;
157 };
158 
159 static prec::Level getPrecedence(const AnnotatedToken &Tok) {
160   return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
161 }
162 
163 bool isBinaryOperator(const AnnotatedToken &Tok) {
164   // Comma is a binary operator, but does not behave as such wrt. formatting.
165   return getPrecedence(Tok) > prec::Comma;
166 }
167 
168 FormatStyle getLLVMStyle() {
169   FormatStyle LLVMStyle;
170   LLVMStyle.ColumnLimit = 80;
171   LLVMStyle.MaxEmptyLinesToKeep = 1;
172   LLVMStyle.PointerAndReferenceBindToType = false;
173   LLVMStyle.AccessModifierOffset = -2;
174   LLVMStyle.SplitTemplateClosingGreater = true;
175   LLVMStyle.IndentCaseLabels = false;
176   LLVMStyle.SpacesBeforeTrailingComments = 1;
177   LLVMStyle.BinPackParameters = true;
178   LLVMStyle.AllowAllParametersOnNextLine = true;
179   LLVMStyle.AllowReturnTypeOnItsOwnLine = true;
180   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
181   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
182   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
183   return LLVMStyle;
184 }
185 
186 FormatStyle getGoogleStyle() {
187   FormatStyle GoogleStyle;
188   GoogleStyle.ColumnLimit = 80;
189   GoogleStyle.MaxEmptyLinesToKeep = 1;
190   GoogleStyle.PointerAndReferenceBindToType = true;
191   GoogleStyle.AccessModifierOffset = -1;
192   GoogleStyle.SplitTemplateClosingGreater = false;
193   GoogleStyle.IndentCaseLabels = true;
194   GoogleStyle.SpacesBeforeTrailingComments = 2;
195   GoogleStyle.BinPackParameters = false;
196   GoogleStyle.AllowAllParametersOnNextLine = true;
197   GoogleStyle.AllowReturnTypeOnItsOwnLine = false;
198   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
199   GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
200   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
201   return GoogleStyle;
202 }
203 
204 FormatStyle getChromiumStyle() {
205   FormatStyle ChromiumStyle = getGoogleStyle();
206   ChromiumStyle.AllowAllParametersOnNextLine = false;
207   return ChromiumStyle;
208 }
209 
210 struct OptimizationParameters {
211   unsigned PenaltyIndentLevel;
212   unsigned PenaltyExcessCharacter;
213 };
214 
215 /// \brief Manages the whitespaces around tokens and their replacements.
216 ///
217 /// This includes special handling for certain constructs, e.g. the alignment of
218 /// trailing line comments.
219 class WhitespaceManager {
220 public:
221   WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
222 
223   /// \brief Replaces the whitespace in front of \p Tok. Only call once for
224   /// each \c AnnotatedToken.
225   void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
226                          unsigned Spaces, unsigned WhitespaceStartColumn,
227                          const FormatStyle &Style) {
228     // 2+ newlines mean an empty line separating logic scopes.
229     if (NewLines >= 2)
230       alignComments();
231 
232     // Align line comments if they are trailing or if they continue other
233     // trailing comments.
234     if (Tok.Type == TT_LineComment &&
235         (Tok.Parent != NULL || !Comments.empty())) {
236       if (Style.ColumnLimit >=
237           Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) {
238         Comments.push_back(StoredComment());
239         Comments.back().Tok = Tok.FormatTok;
240         Comments.back().Spaces = Spaces;
241         Comments.back().NewLines = NewLines;
242         Comments.back().MinColumn = WhitespaceStartColumn + Spaces;
243         Comments.back().MaxColumn = Style.ColumnLimit -
244                                     Spaces - Tok.FormatTok.TokenLength;
245         return;
246       }
247     }
248 
249     // If this line does not have a trailing comment, align the stored comments.
250     if (Tok.Children.empty() && Tok.Type != TT_LineComment)
251       alignComments();
252     storeReplacement(Tok.FormatTok,
253                      std::string(NewLines, '\n') + std::string(Spaces, ' '));
254   }
255 
256   /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
257   /// backslashes to escape newlines inside a preprocessor directive.
258   ///
259   /// This function and \c replaceWhitespace have the same behavior if
260   /// \c Newlines == 0.
261   void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
262                            unsigned Spaces, unsigned WhitespaceStartColumn,
263                            const FormatStyle &Style) {
264     std::string NewLineText;
265     if (NewLines > 0) {
266       unsigned Offset = std::min<int>(Style.ColumnLimit - 1,
267                                       WhitespaceStartColumn);
268       for (unsigned i = 0; i < NewLines; ++i) {
269         NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
270         NewLineText += "\\\n";
271         Offset = 0;
272       }
273     }
274     storeReplacement(Tok.FormatTok, NewLineText + std::string(Spaces, ' '));
275   }
276 
277   /// \brief Returns all the \c Replacements created during formatting.
278   const tooling::Replacements &generateReplacements() {
279     alignComments();
280     return Replaces;
281   }
282 
283 private:
284   /// \brief Structure to store a comment for later layout and alignment.
285   struct StoredComment {
286     FormatToken Tok;
287     unsigned MinColumn;
288     unsigned MaxColumn;
289     unsigned NewLines;
290     unsigned Spaces;
291   };
292   SmallVector<StoredComment, 16> Comments;
293   typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
294 
295   /// \brief Try to align all stashed comments.
296   void alignComments() {
297     unsigned MinColumn = 0;
298     unsigned MaxColumn = UINT_MAX;
299     comment_iterator Start = Comments.begin();
300     for (comment_iterator I = Comments.begin(), E = Comments.end(); I != E;
301          ++I) {
302       if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
303         alignComments(Start, I, MinColumn);
304         MinColumn = I->MinColumn;
305         MaxColumn = I->MaxColumn;
306         Start = I;
307       } else {
308         MinColumn = std::max(MinColumn, I->MinColumn);
309         MaxColumn = std::min(MaxColumn, I->MaxColumn);
310       }
311     }
312     alignComments(Start, Comments.end(), MinColumn);
313     Comments.clear();
314   }
315 
316   /// \brief Put all the comments between \p I and \p E into \p Column.
317   void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
318     while (I != E) {
319       unsigned Spaces = I->Spaces + Column - I->MinColumn;
320       storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
321                        std::string(Spaces, ' '));
322       ++I;
323     }
324   }
325 
326   /// \brief Stores \p Text as the replacement for the whitespace in front of
327   /// \p Tok.
328   void storeReplacement(const FormatToken &Tok, const std::string Text) {
329     Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
330                                          Tok.WhiteSpaceLength, Text));
331   }
332 
333   SourceManager &SourceMgr;
334   tooling::Replacements Replaces;
335 };
336 
337 /// \brief Returns if a token is an Objective-C selector name.
338 ///
339 /// For example, "bar" is a selector name in [foo bar:(4 + 5)].
340 static bool isObjCSelectorName(const AnnotatedToken &Tok) {
341   return Tok.is(tok::identifier) && !Tok.Children.empty() &&
342          Tok.Children[0].is(tok::colon) &&
343          Tok.Children[0].Type == TT_ObjCMethodExpr;
344 }
345 
346 class UnwrappedLineFormatter {
347 public:
348   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
349                          const AnnotatedLine &Line, unsigned FirstIndent,
350                          const AnnotatedToken &RootToken,
351                          WhitespaceManager &Whitespaces, bool StructuralError)
352       : Style(Style), SourceMgr(SourceMgr), Line(Line),
353         FirstIndent(FirstIndent), RootToken(RootToken),
354         Whitespaces(Whitespaces) {
355     Parameters.PenaltyIndentLevel = 20;
356     Parameters.PenaltyExcessCharacter = 1000000;
357   }
358 
359   /// \brief Formats an \c UnwrappedLine.
360   ///
361   /// \returns The column after the last token in the last line of the
362   /// \c UnwrappedLine.
363   unsigned format() {
364     // Initialize state dependent on indent.
365     LineState State;
366     State.Column = FirstIndent;
367     State.NextToken = &RootToken;
368     State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent));
369     State.ForLoopVariablePos = 0;
370     State.LineContainsContinuedForLoopSection = false;
371 
372     DEBUG({
373       DebugTokenState(*State.NextToken);
374     });
375 
376     // The first token has already been indented and thus consumed.
377     moveStateToNextToken(State);
378 
379     // Start iterating at 1 as we have correctly formatted of Token #0 above.
380     while (State.NextToken != NULL) {
381       if (State.NextToken->Type == TT_ImplicitStringLiteral) {
382         // Calculating the column is important for aligning trailing comments.
383         // FIXME: This does not seem to happen in conjunction with escaped
384         // newlines. If it does, fix!
385         State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
386                         State.NextToken->FormatTok.TokenLength;
387         State.NextToken = State.NextToken->Children.empty() ? NULL :
388                           &State.NextToken->Children[0];
389       } else if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) {
390         addTokenToState(false, false, State);
391       } else {
392         unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
393         unsigned Break = calcPenalty(State, true, NoBreak);
394         DEBUG({
395           if (Break < NoBreak)
396             llvm::errs() << "\n";
397           else
398             llvm::errs() << " ";
399           llvm::errs() << "<";
400           DebugPenalty(Break, Break < NoBreak);
401           llvm::errs() << "/";
402           DebugPenalty(NoBreak, !(Break < NoBreak));
403           llvm::errs() << "> ";
404           DebugTokenState(*State.NextToken);
405         });
406         addTokenToState(Break < NoBreak, false, State);
407         if (State.NextToken != NULL &&
408             State.NextToken->Parent->Type == TT_CtorInitializerColon) {
409           if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine &&
410               Line.Last->TotalLength > getColumnLimit() - State.Column - 1)
411             State.Stack.back().BreakAfterComma = true;
412         }
413       }
414     }
415     DEBUG(llvm::errs() << "\n");
416     return State.Column;
417   }
418 
419 private:
420   void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
421     const Token &Tok = AnnotatedTok.FormatTok.Tok;
422     llvm::errs()
423         << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
424                      Tok.getLength());
425     llvm::errs();
426   }
427 
428   void DebugPenalty(unsigned Penalty, bool Winner) {
429     llvm::errs().changeColor(Winner ? raw_ostream::GREEN : raw_ostream::RED);
430     if (Penalty == UINT_MAX)
431       llvm::errs() << "MAX";
432     else
433       llvm::errs() << Penalty;
434     llvm::errs().resetColor();
435   }
436 
437   struct ParenState {
438     ParenState(unsigned Indent, unsigned LastSpace)
439         : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0),
440           FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
441           BreakAfterComma(false), HasMultiParameterLine(false) {}
442 
443     /// \brief The position to which a specific parenthesis level needs to be
444     /// indented.
445     unsigned Indent;
446 
447     /// \brief The position of the last space on each level.
448     ///
449     /// Used e.g. to break like:
450     /// functionCall(Parameter, otherCall(
451     ///                             OtherParameter));
452     unsigned LastSpace;
453 
454     /// \brief This is the column of the first token after an assignment.
455     unsigned AssignmentColumn;
456 
457     /// \brief The position the first "<<" operator encountered on each level.
458     ///
459     /// Used to align "<<" operators. 0 if no such operator has been encountered
460     /// on a level.
461     unsigned FirstLessLess;
462 
463     /// \brief Whether a newline needs to be inserted before the block's closing
464     /// brace.
465     ///
466     /// We only want to insert a newline before the closing brace if there also
467     /// was a newline after the beginning left brace.
468     bool BreakBeforeClosingBrace;
469 
470     /// \brief The column of a \c ? in a conditional expression;
471     unsigned QuestionColumn;
472 
473     bool BreakAfterComma;
474     bool HasMultiParameterLine;
475 
476     bool operator<(const ParenState &Other) const {
477       if (Indent != Other.Indent)
478         return Indent < Other.Indent;
479       if (LastSpace != Other.LastSpace)
480         return LastSpace < Other.LastSpace;
481       if (AssignmentColumn != Other.AssignmentColumn)
482         return AssignmentColumn < Other.AssignmentColumn;
483       if (FirstLessLess != Other.FirstLessLess)
484         return FirstLessLess < Other.FirstLessLess;
485       if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
486         return BreakBeforeClosingBrace;
487       if (QuestionColumn != Other.QuestionColumn)
488         return QuestionColumn < Other.QuestionColumn;
489       if (BreakAfterComma != Other.BreakAfterComma)
490         return BreakAfterComma;
491       if (HasMultiParameterLine != Other.HasMultiParameterLine)
492         return HasMultiParameterLine;
493       return false;
494     }
495   };
496 
497   /// \brief The current state when indenting a unwrapped line.
498   ///
499   /// As the indenting tries different combinations this is copied by value.
500   struct LineState {
501     /// \brief The number of used columns in the current line.
502     unsigned Column;
503 
504     /// \brief The token that needs to be next formatted.
505     const AnnotatedToken *NextToken;
506 
507     /// \brief The column of the first variable in a for-loop declaration.
508     ///
509     /// Used to align the second variable if necessary.
510     unsigned ForLoopVariablePos;
511 
512     /// \brief \c true if this line contains a continued for-loop section.
513     bool LineContainsContinuedForLoopSection;
514 
515     /// \brief A stack keeping track of properties applying to parenthesis
516     /// levels.
517     std::vector<ParenState> Stack;
518 
519     /// \brief Comparison operator to be able to used \c LineState in \c map.
520     bool operator<(const LineState &Other) const {
521       if (Other.NextToken != NextToken)
522         return Other.NextToken > NextToken;
523       if (Other.Column != Column)
524         return Other.Column > Column;
525       if (Other.ForLoopVariablePos != ForLoopVariablePos)
526         return Other.ForLoopVariablePos < ForLoopVariablePos;
527       if (Other.LineContainsContinuedForLoopSection !=
528           LineContainsContinuedForLoopSection)
529         return LineContainsContinuedForLoopSection;
530       return Other.Stack < Stack;
531     }
532   };
533 
534   /// \brief Appends the next token to \p State and updates information
535   /// necessary for indentation.
536   ///
537   /// Puts the token on the current line if \p Newline is \c true and adds a
538   /// line break and necessary indentation otherwise.
539   ///
540   /// If \p DryRun is \c false, also creates and stores the required
541   /// \c Replacement.
542   void addTokenToState(bool Newline, bool DryRun, LineState &State) {
543     const AnnotatedToken &Current = *State.NextToken;
544     const AnnotatedToken &Previous = *State.NextToken->Parent;
545     assert(State.Stack.size());
546     unsigned ParenLevel = State.Stack.size() - 1;
547 
548     if (Newline) {
549       unsigned WhitespaceStartColumn = State.Column;
550       if (Current.is(tok::r_brace)) {
551         State.Column = Line.Level * 2;
552       } else if (Current.is(tok::string_literal) &&
553                  Previous.is(tok::string_literal)) {
554         State.Column = State.Column - Previous.FormatTok.TokenLength;
555       } else if (Current.is(tok::lessless) &&
556                  State.Stack[ParenLevel].FirstLessLess != 0) {
557         State.Column = State.Stack[ParenLevel].FirstLessLess;
558       } else if (ParenLevel != 0 &&
559                  (Previous.is(tok::equal) || Previous.is(tok::coloncolon) ||
560                   Current.is(tok::period) || Current.is(tok::arrow) ||
561                   Current.is(tok::question))) {
562         // Indent and extra 4 spaces after if we know the current expression is
563         // continued.  Don't do that on the top level, as we already indent 4
564         // there.
565         State.Column = std::max(State.Stack.back().LastSpace,
566                                 State.Stack.back().Indent) + 4;
567       } else if (Current.Type == TT_ConditionalExpr) {
568         State.Column = State.Stack.back().QuestionColumn;
569       } else if (RootToken.is(tok::kw_for) && ParenLevel == 1 &&
570                  Previous.is(tok::comma)) {
571         State.Column = State.ForLoopVariablePos;
572       } else if (State.NextToken->Parent->ClosesTemplateDeclaration ||
573                  Current.Type == TT_StartOfName) {
574         State.Column = State.Stack[ParenLevel].Indent - 4;
575       } else if (Previous.Type == TT_BinaryOperator &&
576                  State.Stack.back().AssignmentColumn != 0) {
577         State.Column = State.Stack.back().AssignmentColumn;
578       } else {
579         State.Column = State.Stack[ParenLevel].Indent;
580       }
581 
582       if (RootToken.is(tok::kw_for))
583         State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi);
584 
585       if (!DryRun) {
586         if (!Line.InPPDirective)
587           Whitespaces.replaceWhitespace(Current, 1, State.Column,
588                                         WhitespaceStartColumn, Style);
589         else
590           Whitespaces.replacePPWhitespace(Current, 1, State.Column,
591                                           WhitespaceStartColumn, Style);
592       }
593 
594       State.Stack[ParenLevel].LastSpace = State.Column;
595       if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr)
596         State.Stack[ParenLevel].Indent += 2;
597     } else {
598       if (Current.is(tok::equal) && RootToken.is(tok::kw_for))
599         State.ForLoopVariablePos = State.Column -
600                                    Previous.FormatTok.TokenLength;
601 
602       unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0;
603       if (State.NextToken->Type == TT_LineComment)
604         Spaces = Style.SpacesBeforeTrailingComments;
605 
606       if (!DryRun)
607         Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style);
608 
609       // FIXME: Do we need to do this for assignments nested in other
610       // expressions?
611       if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 &&
612           (getPrecedence(Previous) == prec::Assignment ||
613            Previous.is(tok::kw_return)))
614         State.Stack.back().AssignmentColumn = State.Column + Spaces;
615       if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
616           State.NextToken->Parent->Type == TT_TemplateOpener)
617         State.Stack[ParenLevel].Indent = State.Column + Spaces;
618       if (Current.getPreviousNoneComment() != NULL &&
619           Current.getPreviousNoneComment()->is(tok::comma) &&
620           Current.isNot(tok::comment))
621         State.Stack[ParenLevel].HasMultiParameterLine = true;
622 
623       State.Column += Spaces;
624       if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
625         // Treat the condition inside an if as if it was a second function
626         // parameter, i.e. let nested calls have an indent of 4.
627         State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
628       else if (Previous.is(tok::comma) && ParenLevel != 0)
629         // Top-level spaces are exempt as that mostly leads to better results.
630         State.Stack.back().LastSpace = State.Column;
631       else if ((Previous.Type == TT_BinaryOperator ||
632                 Previous.Type == TT_ConditionalExpr ||
633                 Previous.Type == TT_CtorInitializerColon) &&
634                getPrecedence(Previous) != prec::Assignment)
635         State.Stack.back().LastSpace = State.Column;
636       else if (Previous.ParameterCount > 1 &&
637                (Previous.is(tok::l_paren) || Previous.is(tok::l_square) ||
638                 Previous.Type == TT_TemplateOpener))
639         // If this function has multiple parameters, indent nested calls from
640         // the start of the first parameter.
641         State.Stack.back().LastSpace = State.Column;
642     }
643 
644     // If we break after an {, we should also break before the corresponding }.
645     if (Newline && Previous.is(tok::l_brace))
646       State.Stack.back().BreakBeforeClosingBrace = true;
647 
648     if (!Style.BinPackParameters && Newline) {
649       // If we are breaking after '(', '{', '<', this is not bin packing unless
650       // AllowAllParametersOnNextLine is false.
651       if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) &&
652            Previous.Type != TT_TemplateOpener) ||
653           !Style.AllowAllParametersOnNextLine)
654         State.Stack.back().BreakAfterComma = true;
655 
656       // Any break on this level means that the parent level has been broken
657       // and we need to avoid bin packing there.
658       for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
659         State.Stack[i].BreakAfterComma = true;
660       }
661     }
662 
663     moveStateToNextToken(State);
664   }
665 
666   /// \brief Mark the next token as consumed in \p State and modify its stacks
667   /// accordingly.
668   void moveStateToNextToken(LineState &State) {
669     const AnnotatedToken &Current = *State.NextToken;
670     assert(State.Stack.size());
671 
672     if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
673       State.Stack.back().FirstLessLess = State.Column;
674     if (Current.is(tok::question))
675       State.Stack.back().QuestionColumn = State.Column;
676 
677     // If we encounter an opening (, [, { or <, we add a level to our stacks to
678     // prepare for the following tokens.
679     if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
680         Current.is(tok::l_brace) ||
681         State.NextToken->Type == TT_TemplateOpener) {
682       unsigned NewIndent;
683       if (Current.is(tok::l_brace)) {
684         // FIXME: This does not work with nested static initializers.
685         // Implement a better handling for static initializers and similar
686         // constructs.
687         NewIndent = Line.Level * 2 + 2;
688       } else {
689         NewIndent = 4 + State.Stack.back().LastSpace;
690       }
691       State.Stack.push_back(
692           ParenState(NewIndent, State.Stack.back().LastSpace));
693     }
694 
695     // If we encounter a closing ), ], } or >, we can remove a level from our
696     // stacks.
697     if (Current.is(tok::r_paren) || Current.is(tok::r_square) ||
698         (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
699         State.NextToken->Type == TT_TemplateCloser) {
700       State.Stack.pop_back();
701     }
702 
703     if (State.NextToken->Children.empty())
704       State.NextToken = NULL;
705     else
706       State.NextToken = &State.NextToken->Children[0];
707 
708     State.Column += Current.FormatTok.TokenLength;
709   }
710 
711   /// \brief Calculate the penalty for splitting after the token at \p Index.
712   unsigned splitPenalty(const AnnotatedToken &Tok) {
713     const AnnotatedToken &Left = Tok;
714     const AnnotatedToken &Right = Tok.Children[0];
715 
716     if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
717       return 50;
718     if (Left.is(tok::equal) && Right.is(tok::l_brace))
719       return 150;
720     if (Left.is(tok::coloncolon))
721       return 500;
722 
723     if (Left.Type == TT_RangeBasedForLoopColon)
724       return 5;
725 
726     if (Right.is(tok::arrow) || Right.is(tok::period)) {
727       if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
728         return 5; // Should be smaller than breaking at a nested comma.
729       return 150;
730     }
731 
732     // In for-loops, prefer breaking at ',' and ';'.
733     if (RootToken.is(tok::kw_for) &&
734         (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
735       return 20;
736 
737     if (Left.is(tok::semi) || Left.is(tok::comma))
738       return 0;
739 
740     // In Objective-C method expressions, prefer breaking before "param:" over
741     // breaking after it.
742     if (isObjCSelectorName(Right))
743       return 0;
744     if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
745       return 20;
746 
747     if (Left.is(tok::l_paren))
748       return 20;
749     // FIXME: The penalty for a trailing "<" or "[" being higher than the
750     // penalty for a trainling "(" is a temporary workaround until we can
751     // properly avoid breaking in array subscripts or template parameters.
752     if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener)
753       return 50;
754 
755     if (Left.Type == TT_ConditionalExpr)
756       return prec::Assignment;
757     prec::Level Level = getPrecedence(Left);
758 
759     if (Level != prec::Unknown)
760       return Level;
761 
762     return 3;
763   }
764 
765   unsigned getColumnLimit() {
766     return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
767   }
768 
769   /// \brief Calculate the number of lines needed to format the remaining part
770   /// of the unwrapped line.
771   ///
772   /// Assumes the formatting so far has led to
773   /// the \c LineSta \p State. If \p NewLine is set, a new line will be
774   /// added after the previous token.
775   ///
776   /// \param StopAt is used for optimization. If we can determine that we'll
777   /// definitely need at least \p StopAt additional lines, we already know of a
778   /// better solution.
779   unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) {
780     // We are at the end of the unwrapped line, so we don't need any more lines.
781     if (State.NextToken == NULL)
782       return 0;
783 
784     if (!NewLine && State.NextToken->MustBreakBefore)
785       return UINT_MAX;
786     if (NewLine && !State.NextToken->CanBreakBefore &&
787         !(State.NextToken->is(tok::r_brace) &&
788           State.Stack.back().BreakBeforeClosingBrace))
789       return UINT_MAX;
790     if (!NewLine && State.NextToken->is(tok::r_brace) &&
791         State.Stack.back().BreakBeforeClosingBrace)
792       return UINT_MAX;
793     if (!NewLine && State.NextToken->Parent->is(tok::semi) &&
794         State.LineContainsContinuedForLoopSection)
795       return UINT_MAX;
796     if (!NewLine && State.NextToken->Parent->is(tok::comma) &&
797         State.NextToken->isNot(tok::comment) &&
798         State.Stack.back().BreakAfterComma)
799       return UINT_MAX;
800     // Trying to insert a parameter on a new line if there are already more than
801     // one parameter on the current line is bin packing.
802     if (NewLine && State.NextToken->Parent->is(tok::comma) &&
803         State.Stack.back().HasMultiParameterLine && !Style.BinPackParameters)
804       return UINT_MAX;
805     if (!NewLine && (State.NextToken->Type == TT_CtorInitializerColon ||
806                      (State.NextToken->Parent->ClosesTemplateDeclaration &&
807                       State.Stack.size() == 1)))
808       return UINT_MAX;
809 
810     unsigned CurrentPenalty = 0;
811     if (NewLine)
812       CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
813                         splitPenalty(*State.NextToken->Parent);
814 
815     addTokenToState(NewLine, true, State);
816 
817     // Exceeding column limit is bad, assign penalty.
818     if (State.Column > getColumnLimit()) {
819       unsigned ExcessCharacters = State.Column - getColumnLimit();
820       CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
821     }
822 
823     if (StopAt <= CurrentPenalty)
824       return UINT_MAX;
825     StopAt -= CurrentPenalty;
826     StateMap::iterator I = Memory.find(State);
827     if (I != Memory.end()) {
828       // If this state has already been examined, we can safely return the
829       // previous result if we
830       // - have not hit the optimatization (and thus returned UINT_MAX) OR
831       // - are now computing for a smaller or equal StopAt.
832       unsigned SavedResult = I->second.first;
833       unsigned SavedStopAt = I->second.second;
834       if (SavedResult != UINT_MAX)
835         return SavedResult + CurrentPenalty;
836       else if (StopAt <= SavedStopAt)
837         return UINT_MAX;
838     }
839 
840     unsigned NoBreak = calcPenalty(State, false, StopAt);
841     unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak));
842     unsigned Result = std::min(NoBreak, WithBreak);
843 
844     // We have to store 'Result' without adding 'CurrentPenalty' as the latter
845     // can depend on 'NewLine'.
846     Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
847 
848     return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
849   }
850 
851   FormatStyle Style;
852   SourceManager &SourceMgr;
853   const AnnotatedLine &Line;
854   const unsigned FirstIndent;
855   const AnnotatedToken &RootToken;
856   WhitespaceManager &Whitespaces;
857 
858   // A map from an indent state to a pair (Result, Used-StopAt).
859   typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap;
860   StateMap Memory;
861 
862   OptimizationParameters Parameters;
863 };
864 
865 /// \brief Determines extra information about the tokens comprising an
866 /// \c UnwrappedLine.
867 class TokenAnnotator {
868 public:
869   TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
870                  AnnotatedLine &Line)
871       : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {}
872 
873   /// \brief A parser that gathers additional information about tokens.
874   ///
875   /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
876   /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
877   /// into template parameter lists.
878   class AnnotatingParser {
879   public:
880     AnnotatingParser(AnnotatedToken &RootToken)
881         : CurrentToken(&RootToken), KeywordVirtualFound(false),
882           ColonIsObjCMethodExpr(false), ColonIsForRangeExpr(false) {}
883 
884     /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
885     struct ObjCSelectorRAII {
886       AnnotatingParser &P;
887       bool ColonWasObjCMethodExpr;
888 
889       ObjCSelectorRAII(AnnotatingParser &P)
890           : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {}
891 
892       ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
893 
894       void markStart(AnnotatedToken &Left) {
895         P.ColonIsObjCMethodExpr = true;
896         Left.Type = TT_ObjCMethodExpr;
897       }
898 
899       void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
900     };
901 
902     bool parseAngle() {
903       if (CurrentToken == NULL)
904         return false;
905       AnnotatedToken *Left = CurrentToken->Parent;
906       while (CurrentToken != NULL) {
907         if (CurrentToken->is(tok::greater)) {
908           Left->MatchingParen = CurrentToken;
909           CurrentToken->MatchingParen = Left;
910           CurrentToken->Type = TT_TemplateCloser;
911           next();
912           return true;
913         }
914         if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
915             CurrentToken->is(tok::r_brace))
916           return false;
917         if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
918             CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
919           return false;
920         if (CurrentToken->is(tok::comma))
921           ++Left->ParameterCount;
922         if (!consumeToken())
923           return false;
924       }
925       return false;
926     }
927 
928     bool parseParens(bool LookForDecls = false) {
929       if (CurrentToken == NULL)
930         return false;
931       bool StartsObjCMethodExpr = false;
932       AnnotatedToken *Left = CurrentToken->Parent;
933       if (CurrentToken->is(tok::caret)) {
934         // ^( starts a block.
935         Left->Type = TT_ObjCBlockLParen;
936       } else if (AnnotatedToken *MaybeSel = Left->Parent) {
937         // @selector( starts a selector.
938         if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
939             MaybeSel->Parent->is(tok::at)) {
940           StartsObjCMethodExpr = true;
941         }
942       }
943 
944       ObjCSelectorRAII objCSelector(*this);
945       if (StartsObjCMethodExpr)
946         objCSelector.markStart(*Left);
947 
948       while (CurrentToken != NULL) {
949         // LookForDecls is set when "if (" has been seen. Check for
950         // 'identifier' '*' 'identifier' followed by not '=' -- this
951         // '*' has to be a binary operator but determineStarAmpUsage() will
952         // categorize it as an unary operator, so set the right type here.
953         if (LookForDecls && !CurrentToken->Children.empty()) {
954           AnnotatedToken &Prev = *CurrentToken->Parent;
955           AnnotatedToken &Next = CurrentToken->Children[0];
956           if (Prev.Parent->is(tok::identifier) &&
957               (Prev.is(tok::star) || Prev.is(tok::amp)) &&
958               CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
959             Prev.Type = TT_BinaryOperator;
960             LookForDecls = false;
961           }
962         }
963 
964         if (CurrentToken->is(tok::r_paren)) {
965           Left->MatchingParen = CurrentToken;
966           CurrentToken->MatchingParen = Left;
967 
968           if (StartsObjCMethodExpr)
969             objCSelector.markEnd(*CurrentToken);
970 
971           next();
972           return true;
973         }
974         if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
975           return false;
976         if (CurrentToken->is(tok::comma))
977           ++Left->ParameterCount;
978         if (!consumeToken())
979           return false;
980       }
981       return false;
982     }
983 
984     bool parseSquare() {
985       if (!CurrentToken)
986         return false;
987 
988       // A '[' could be an index subscript (after an indentifier or after
989       // ')' or ']'), or it could be the start of an Objective-C method
990       // expression.
991       AnnotatedToken *Left = CurrentToken->Parent;
992       bool StartsObjCMethodExpr =
993           !Left->Parent || Left->Parent->is(tok::colon) ||
994           Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
995           Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
996           getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(),
997                              true, true) > prec::Unknown;
998 
999       ObjCSelectorRAII objCSelector(*this);
1000       if (StartsObjCMethodExpr)
1001         objCSelector.markStart(*Left);
1002 
1003       while (CurrentToken != NULL) {
1004         if (CurrentToken->is(tok::r_square)) {
1005           if (!CurrentToken->Children.empty() &&
1006               CurrentToken->Children[0].is(tok::l_paren)) {
1007             // An ObjC method call can't be followed by an open parenthesis.
1008             // FIXME: Do we incorrectly label ":" with this?
1009             StartsObjCMethodExpr = false;
1010             Left->Type = TT_Unknown;
1011 	  }
1012           if (StartsObjCMethodExpr)
1013             objCSelector.markEnd(*CurrentToken);
1014           Left->MatchingParen = CurrentToken;
1015           CurrentToken->MatchingParen = Left;
1016           next();
1017           return true;
1018         }
1019         if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
1020           return false;
1021         if (CurrentToken->is(tok::comma))
1022           ++Left->ParameterCount;
1023         if (!consumeToken())
1024           return false;
1025       }
1026       return false;
1027     }
1028 
1029     bool parseBrace() {
1030       // Lines are fine to end with '{'.
1031       if (CurrentToken == NULL)
1032         return true;
1033       AnnotatedToken *Left = CurrentToken->Parent;
1034       while (CurrentToken != NULL) {
1035         if (CurrentToken->is(tok::r_brace)) {
1036           Left->MatchingParen = CurrentToken;
1037           CurrentToken->MatchingParen = Left;
1038           next();
1039           return true;
1040         }
1041         if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
1042           return false;
1043         if (!consumeToken())
1044           return false;
1045       }
1046       return true;
1047     }
1048 
1049     bool parseConditional() {
1050       while (CurrentToken != NULL) {
1051         if (CurrentToken->is(tok::colon)) {
1052           CurrentToken->Type = TT_ConditionalExpr;
1053           next();
1054           return true;
1055         }
1056         if (!consumeToken())
1057           return false;
1058       }
1059       return false;
1060     }
1061 
1062     bool parseTemplateDeclaration() {
1063       if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
1064         CurrentToken->Type = TT_TemplateOpener;
1065         next();
1066         if (!parseAngle())
1067           return false;
1068         CurrentToken->Parent->ClosesTemplateDeclaration = true;
1069         return true;
1070       }
1071       return false;
1072     }
1073 
1074     bool consumeToken() {
1075       AnnotatedToken *Tok = CurrentToken;
1076       next();
1077       switch (Tok->FormatTok.Tok.getKind()) {
1078       case tok::plus:
1079       case tok::minus:
1080         // At the start of the line, +/- specific ObjectiveC method
1081         // declarations.
1082         if (Tok->Parent == NULL)
1083           Tok->Type = TT_ObjCMethodSpecifier;
1084         break;
1085       case tok::colon:
1086         // Colons from ?: are handled in parseConditional().
1087         if (Tok->Parent->is(tok::r_paren))
1088           Tok->Type = TT_CtorInitializerColon;
1089         else if (ColonIsObjCMethodExpr)
1090           Tok->Type = TT_ObjCMethodExpr;
1091         else if (ColonIsForRangeExpr)
1092           Tok->Type = TT_RangeBasedForLoopColon;
1093         break;
1094       case tok::kw_if:
1095       case tok::kw_while:
1096         if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
1097           next();
1098           if (!parseParens(/*LookForDecls=*/true))
1099             return false;
1100         }
1101         break;
1102       case tok::kw_for:
1103         ColonIsForRangeExpr = true;
1104         next();
1105         if (!parseParens())
1106           return false;
1107         break;
1108       case tok::l_paren:
1109         if (!parseParens())
1110           return false;
1111         break;
1112       case tok::l_square:
1113         if (!parseSquare())
1114           return false;
1115         break;
1116       case tok::l_brace:
1117         if (!parseBrace())
1118           return false;
1119         break;
1120       case tok::less:
1121         if (parseAngle())
1122           Tok->Type = TT_TemplateOpener;
1123         else {
1124           Tok->Type = TT_BinaryOperator;
1125           CurrentToken = Tok;
1126           next();
1127         }
1128         break;
1129       case tok::r_paren:
1130       case tok::r_square:
1131         return false;
1132       case tok::r_brace:
1133         // Lines can start with '}'.
1134         if (Tok->Parent != NULL)
1135           return false;
1136         break;
1137       case tok::greater:
1138         Tok->Type = TT_BinaryOperator;
1139         break;
1140       case tok::kw_operator:
1141         if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
1142           CurrentToken->Type = TT_OverloadedOperator;
1143           next();
1144           if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
1145             CurrentToken->Type = TT_OverloadedOperator;
1146             next();
1147           }
1148         } else {
1149           while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
1150             CurrentToken->Type = TT_OverloadedOperator;
1151             next();
1152           }
1153         }
1154         break;
1155       case tok::question:
1156         parseConditional();
1157         break;
1158       case tok::kw_template:
1159         parseTemplateDeclaration();
1160         break;
1161       default:
1162         break;
1163       }
1164       return true;
1165     }
1166 
1167     void parseIncludeDirective() {
1168       next();
1169       if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
1170         next();
1171         while (CurrentToken != NULL) {
1172           if (CurrentToken->isNot(tok::comment) ||
1173               !CurrentToken->Children.empty())
1174             CurrentToken->Type = TT_ImplicitStringLiteral;
1175           next();
1176         }
1177       } else {
1178         while (CurrentToken != NULL) {
1179           next();
1180         }
1181       }
1182     }
1183 
1184     void parseWarningOrError() {
1185       next();
1186       // We still want to format the whitespace left of the first token of the
1187       // warning or error.
1188       next();
1189       while (CurrentToken != NULL) {
1190         CurrentToken->Type = TT_ImplicitStringLiteral;
1191         next();
1192       }
1193     }
1194 
1195     void parsePreprocessorDirective() {
1196       next();
1197       if (CurrentToken == NULL)
1198         return;
1199       // Hashes in the middle of a line can lead to any strange token
1200       // sequence.
1201       if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
1202         return;
1203       switch (
1204           CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
1205       case tok::pp_include:
1206       case tok::pp_import:
1207         parseIncludeDirective();
1208         break;
1209       case tok::pp_error:
1210       case tok::pp_warning:
1211         parseWarningOrError();
1212         break;
1213       default:
1214         break;
1215       }
1216     }
1217 
1218     LineType parseLine() {
1219       int PeriodsAndArrows = 0;
1220       bool CanBeBuilderTypeStmt = true;
1221       if (CurrentToken->is(tok::hash)) {
1222         parsePreprocessorDirective();
1223         return LT_PreprocessorDirective;
1224       }
1225       while (CurrentToken != NULL) {
1226 
1227         if (CurrentToken->is(tok::kw_virtual))
1228           KeywordVirtualFound = true;
1229         if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
1230           ++PeriodsAndArrows;
1231         if (getPrecedence(*CurrentToken) > prec::Assignment &&
1232             CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
1233           CanBeBuilderTypeStmt = false;
1234         if (!consumeToken())
1235           return LT_Invalid;
1236       }
1237       if (KeywordVirtualFound)
1238         return LT_VirtualFunctionDecl;
1239 
1240       // Assume a builder-type call if there are 2 or more "." and "->".
1241       if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
1242         return LT_BuilderTypeCall;
1243 
1244       return LT_Other;
1245     }
1246 
1247     void next() {
1248       if (CurrentToken != NULL && !CurrentToken->Children.empty())
1249         CurrentToken = &CurrentToken->Children[0];
1250       else
1251         CurrentToken = NULL;
1252     }
1253 
1254   private:
1255     AnnotatedToken *CurrentToken;
1256     bool KeywordVirtualFound;
1257     bool ColonIsObjCMethodExpr;
1258     bool ColonIsForRangeExpr;
1259   };
1260 
1261   void calculateExtraInformation(AnnotatedToken &Current) {
1262     Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
1263 
1264     if (Current.FormatTok.MustBreakBefore) {
1265       Current.MustBreakBefore = true;
1266     } else {
1267       if (Current.Type == TT_LineComment) {
1268         Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
1269       } else if ((Current.Parent->is(tok::comment) &&
1270                   Current.FormatTok.NewlinesBefore > 0) ||
1271                  (Current.is(tok::string_literal) &&
1272                   Current.Parent->is(tok::string_literal))) {
1273         Current.MustBreakBefore = true;
1274       } else {
1275         Current.MustBreakBefore = false;
1276       }
1277     }
1278     Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
1279     if (Current.MustBreakBefore)
1280       Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
1281     else
1282       Current.TotalLength = Current.Parent->TotalLength +
1283                             Current.FormatTok.TokenLength +
1284                             (Current.SpaceRequiredBefore ? 1 : 0);
1285     if (!Current.Children.empty())
1286       calculateExtraInformation(Current.Children[0]);
1287   }
1288 
1289   void annotate() {
1290     AnnotatingParser Parser(Line.First);
1291     Line.Type = Parser.parseLine();
1292     if (Line.Type == LT_Invalid)
1293       return;
1294 
1295     bool LookForFunctionName = Line.MustBeDeclaration;
1296     determineTokenTypes(Line.First, /*IsExpression=*/ false,
1297                         LookForFunctionName);
1298 
1299     if (Line.First.Type == TT_ObjCMethodSpecifier)
1300       Line.Type = LT_ObjCMethodDecl;
1301     else if (Line.First.Type == TT_ObjCDecl)
1302       Line.Type = LT_ObjCDecl;
1303     else if (Line.First.Type == TT_ObjCProperty)
1304       Line.Type = LT_ObjCProperty;
1305 
1306     Line.First.SpaceRequiredBefore = true;
1307     Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
1308     Line.First.CanBreakBefore = Line.First.MustBreakBefore;
1309 
1310     Line.First.TotalLength = Line.First.FormatTok.TokenLength;
1311     if (!Line.First.Children.empty())
1312       calculateExtraInformation(Line.First.Children[0]);
1313   }
1314 
1315 private:
1316   void determineTokenTypes(AnnotatedToken &Current, bool IsExpression,
1317                            bool LookForFunctionName) {
1318     if (getPrecedence(Current) == prec::Assignment) {
1319       IsExpression = true;
1320       AnnotatedToken *Previous = Current.Parent;
1321       while (Previous != NULL) {
1322         if (Previous->Type == TT_BinaryOperator &&
1323             (Previous->is(tok::star) || Previous->is(tok::amp))) {
1324           Previous->Type = TT_PointerOrReference;
1325         }
1326         Previous = Previous->Parent;
1327       }
1328     }
1329     if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
1330         (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
1331          (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
1332       IsExpression = true;
1333 
1334     if (Current.Type == TT_Unknown) {
1335       if (LookForFunctionName && Current.is(tok::l_paren)) {
1336         findFunctionName(&Current);
1337         LookForFunctionName = false;
1338       } else if (Current.is(tok::star) || Current.is(tok::amp)) {
1339         Current.Type = determineStarAmpUsage(Current, IsExpression);
1340       } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
1341                  Current.is(tok::caret)) {
1342         Current.Type = determinePlusMinusCaretUsage(Current);
1343       } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
1344         Current.Type = determineIncrementUsage(Current);
1345       } else if (Current.is(tok::exclaim)) {
1346         Current.Type = TT_UnaryOperator;
1347       } else if (isBinaryOperator(Current)) {
1348         Current.Type = TT_BinaryOperator;
1349       } else if (Current.is(tok::comment)) {
1350         std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
1351                                             Lex.getLangOpts()));
1352         if (StringRef(Data).startswith("//"))
1353           Current.Type = TT_LineComment;
1354         else
1355           Current.Type = TT_BlockComment;
1356       } else if (Current.is(tok::r_paren) &&
1357                  (Current.Parent->Type == TT_PointerOrReference ||
1358                   Current.Parent->Type == TT_TemplateCloser) &&
1359                  (Current.Children.empty() ||
1360                   (Current.Children[0].isNot(tok::equal) &&
1361                    Current.Children[0].isNot(tok::semi) &&
1362                    Current.Children[0].isNot(tok::l_brace)))) {
1363         // FIXME: We need to get smarter and understand more cases of casts.
1364         Current.Type = TT_CastRParen;
1365       } else if (Current.is(tok::at) && Current.Children.size()) {
1366         switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
1367         case tok::objc_interface:
1368         case tok::objc_implementation:
1369         case tok::objc_protocol:
1370           Current.Type = TT_ObjCDecl;
1371           break;
1372         case tok::objc_property:
1373           Current.Type = TT_ObjCProperty;
1374           break;
1375         default:
1376           break;
1377         }
1378       }
1379     }
1380 
1381     if (!Current.Children.empty())
1382       determineTokenTypes(Current.Children[0], IsExpression,
1383                           LookForFunctionName);
1384   }
1385 
1386   /// \brief Starting from \p Current, this searches backwards for an
1387   /// identifier which could be the start of a function name and marks it.
1388   void findFunctionName(AnnotatedToken *Current) {
1389     AnnotatedToken *Parent = Current->Parent;
1390     while (Parent != NULL && Parent->Parent != NULL) {
1391       if (Parent->is(tok::identifier) &&
1392           (Parent->Parent->is(tok::identifier) ||
1393            Parent->Parent->Type == TT_PointerOrReference ||
1394            Parent->Parent->Type == TT_TemplateCloser)) {
1395         Parent->Type = TT_StartOfName;
1396         break;
1397       }
1398       Parent = Parent->Parent;
1399     }
1400   }
1401 
1402   /// \brief Returns the previous token ignoring comments.
1403   const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
1404     const AnnotatedToken *PrevToken = Tok.Parent;
1405     while (PrevToken != NULL && PrevToken->is(tok::comment))
1406       PrevToken = PrevToken->Parent;
1407     return PrevToken;
1408   }
1409 
1410   /// \brief Returns the next token ignoring comments.
1411   const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
1412     if (Tok.Children.empty())
1413       return NULL;
1414     const AnnotatedToken *NextToken = &Tok.Children[0];
1415     while (NextToken->is(tok::comment)) {
1416       if (NextToken->Children.empty())
1417         return NULL;
1418       NextToken = &NextToken->Children[0];
1419     }
1420     return NextToken;
1421   }
1422 
1423   /// \brief Return the type of the given token assuming it is * or &.
1424   TokenType determineStarAmpUsage(const AnnotatedToken &Tok,
1425                                   bool IsExpression) {
1426     const AnnotatedToken *PrevToken = getPreviousToken(Tok);
1427     if (PrevToken == NULL)
1428       return TT_UnaryOperator;
1429 
1430     const AnnotatedToken *NextToken = getNextToken(Tok);
1431     if (NextToken == NULL)
1432       return TT_Unknown;
1433 
1434     if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr)
1435       return TT_PointerOrReference;
1436 
1437     if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
1438         PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
1439         PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
1440         PrevToken->Type == TT_BinaryOperator ||
1441         PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
1442       return TT_UnaryOperator;
1443 
1444     if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
1445         PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
1446         NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
1447         NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
1448         NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
1449         NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
1450         NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
1451       return TT_BinaryOperator;
1452 
1453     if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
1454         NextToken->is(tok::greater))
1455       return TT_PointerOrReference;
1456 
1457     // It is very unlikely that we are going to find a pointer or reference type
1458     // definition on the RHS of an assignment.
1459     if (IsExpression)
1460       return TT_BinaryOperator;
1461 
1462     return TT_PointerOrReference;
1463   }
1464 
1465   TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
1466     const AnnotatedToken *PrevToken = getPreviousToken(Tok);
1467     if (PrevToken == NULL)
1468       return TT_UnaryOperator;
1469 
1470     // Use heuristics to recognize unary operators.
1471     if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
1472         PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
1473         PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
1474         PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
1475         PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
1476       return TT_UnaryOperator;
1477 
1478     // There can't be to consecutive binary operators.
1479     if (PrevToken->Type == TT_BinaryOperator)
1480       return TT_UnaryOperator;
1481 
1482     // Fall back to marking the token as binary operator.
1483     return TT_BinaryOperator;
1484   }
1485 
1486   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1487   TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
1488     const AnnotatedToken *PrevToken = getPreviousToken(Tok);
1489     if (PrevToken == NULL)
1490       return TT_UnaryOperator;
1491     if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
1492         PrevToken->is(tok::identifier))
1493       return TT_TrailingUnaryOperator;
1494 
1495     return TT_UnaryOperator;
1496   }
1497 
1498   bool spaceRequiredBetween(const AnnotatedToken &Left,
1499                             const AnnotatedToken &Right) {
1500     if (Right.is(tok::hashhash))
1501       return Left.is(tok::hash);
1502     if (Left.is(tok::hashhash) || Left.is(tok::hash))
1503       return Right.is(tok::hash);
1504     if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
1505       return false;
1506     if (Right.is(tok::less) &&
1507         (Left.is(tok::kw_template) ||
1508          (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1509       return true;
1510     if (Left.is(tok::arrow) || Right.is(tok::arrow))
1511       return false;
1512     if (Left.is(tok::exclaim) || Left.is(tok::tilde))
1513       return false;
1514     if (Left.is(tok::at) &&
1515         (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
1516          Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
1517          Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
1518          Right.is(tok::kw_true) || Right.is(tok::kw_false)))
1519       return false;
1520     if (Left.is(tok::coloncolon))
1521       return false;
1522     if (Right.is(tok::coloncolon))
1523       return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
1524     if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
1525       return false;
1526     if (Right.is(tok::amp) || Right.is(tok::star))
1527       return Left.FormatTok.Tok.isLiteral() ||
1528              (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
1529               !Style.PointerAndReferenceBindToType);
1530     if (Left.is(tok::amp) || Left.is(tok::star))
1531       return Right.FormatTok.Tok.isLiteral() ||
1532              Style.PointerAndReferenceBindToType;
1533     if (Right.is(tok::star) && Left.is(tok::l_paren))
1534       return false;
1535     if (Left.is(tok::l_square) || Right.is(tok::r_square))
1536       return false;
1537     if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
1538       return false;
1539     if (Left.is(tok::period) || Right.is(tok::period))
1540       return false;
1541     if (Left.is(tok::colon))
1542       return Left.Type != TT_ObjCMethodExpr;
1543     if (Right.is(tok::colon))
1544       return Right.Type != TT_ObjCMethodExpr;
1545     if (Left.is(tok::l_paren))
1546       return false;
1547     if (Right.is(tok::l_paren)) {
1548       return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
1549              Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
1550              Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
1551              Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
1552              Left.is(tok::kw_delete);
1553     }
1554     if (Left.is(tok::at) &&
1555         Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1556       return false;
1557     if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1558       return false;
1559     return true;
1560   }
1561 
1562   bool spaceRequiredBefore(const AnnotatedToken &Tok) {
1563     if (Line.Type == LT_ObjCMethodDecl) {
1564       if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
1565           Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
1566         return true;
1567       if (Tok.is(tok::colon))
1568         return false;
1569       if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1570         return true;
1571       if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1572         // Don't space between ')' and <id>
1573         return false;
1574       if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
1575         // Don't space between ':' and '('
1576         return false;
1577     }
1578     if (Line.Type == LT_ObjCProperty &&
1579         (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1580       return false;
1581 
1582     if (Tok.Parent->is(tok::comma))
1583       return true;
1584     if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1585       return true;
1586     if (Tok.Type == TT_OverloadedOperator)
1587       return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
1588              Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
1589     if (Tok.Parent->Type == TT_OverloadedOperator)
1590       return false;
1591     if (Tok.is(tok::colon))
1592       return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
1593              Tok.Type != TT_ObjCMethodExpr;
1594     if (Tok.Parent->Type == TT_UnaryOperator ||
1595         Tok.Parent->Type == TT_CastRParen)
1596       return false;
1597     if (Tok.Type == TT_UnaryOperator)
1598       return Tok.Parent->isNot(tok::l_paren) &&
1599              Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
1600              (Tok.Parent->isNot(tok::colon) ||
1601               Tok.Parent->Type != TT_ObjCMethodExpr);
1602     if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1603       return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
1604              TT_TemplateCloser && Style.SplitTemplateClosingGreater;
1605     }
1606     if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1607       return true;
1608     if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1609       return false;
1610     if (Tok.is(tok::less) && Line.First.is(tok::hash))
1611       return true;
1612     if (Tok.Type == TT_TrailingUnaryOperator)
1613       return false;
1614     return spaceRequiredBetween(*Tok.Parent, Tok);
1615   }
1616 
1617   bool canBreakBefore(const AnnotatedToken &Right) {
1618     const AnnotatedToken &Left = *Right.Parent;
1619     if (Line.Type == LT_ObjCMethodDecl) {
1620       if (Right.is(tok::identifier) && !Right.Children.empty() &&
1621           Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
1622         return true;
1623       if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
1624           Left.Parent->is(tok::colon))
1625         // Don't break this identifier as ':' or identifier
1626         // before it will break.
1627         return false;
1628       if (Right.is(tok::colon) && Left.is(tok::identifier) &&
1629           Left.CanBreakBefore)
1630         // Don't break at ':' if identifier before it can beak.
1631         return false;
1632     }
1633     if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
1634       return true;
1635     if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1636       return false;
1637     if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1638       return true;
1639     if (isObjCSelectorName(Right))
1640       return true;
1641     if (Left.ClosesTemplateDeclaration)
1642       return true;
1643     if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1644       return true;
1645     if (Left.Type == TT_RangeBasedForLoopColon)
1646       return true;
1647     if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1648         Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
1649         Left.is(tok::question))
1650       return false;
1651     if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1652       return false;
1653 
1654     if (Right.Type == TT_LineComment)
1655       // We rely on MustBreakBefore being set correctly here as we should not
1656       // change the "binding" behavior of a comment.
1657       return false;
1658 
1659     // Allow breaking after a trailing 'const', e.g. after a method declaration,
1660     // unless it is follow by ';', '{' or '='.
1661     if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1662         Left.Parent->is(tok::r_paren))
1663       return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
1664              Right.isNot(tok::equal);
1665 
1666     // We only break before r_brace if there was a corresponding break before
1667     // the l_brace, which is tracked by BreakBeforeClosingBrace.
1668     if (Right.is(tok::r_brace))
1669       return false;
1670 
1671     if (Right.is(tok::r_paren) || Right.is(tok::greater))
1672       return false;
1673     return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1674            Left.is(tok::comma) || Right.is(tok::lessless) ||
1675            Right.is(tok::arrow) || Right.is(tok::period) ||
1676            Right.is(tok::colon) || Left.is(tok::coloncolon) ||
1677            Left.is(tok::semi) || Left.is(tok::l_brace) ||
1678            (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1679             Right.is(tok::identifier)) ||
1680            (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1681            (Left.is(tok::l_square) && !Right.is(tok::r_square));
1682   }
1683 
1684   FormatStyle Style;
1685   SourceManager &SourceMgr;
1686   Lexer &Lex;
1687   AnnotatedLine &Line;
1688 };
1689 
1690 class LexerBasedFormatTokenSource : public FormatTokenSource {
1691 public:
1692   LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1693       : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1694         IdentTable(Lex.getLangOpts()) {
1695     Lex.SetKeepWhitespaceMode(true);
1696   }
1697 
1698   virtual FormatToken getNextToken() {
1699     if (GreaterStashed) {
1700       FormatTok.NewlinesBefore = 0;
1701       FormatTok.WhiteSpaceStart =
1702           FormatTok.Tok.getLocation().getLocWithOffset(1);
1703       FormatTok.WhiteSpaceLength = 0;
1704       GreaterStashed = false;
1705       return FormatTok;
1706     }
1707 
1708     FormatTok = FormatToken();
1709     Lex.LexFromRawLexer(FormatTok.Tok);
1710     StringRef Text = rawTokenText(FormatTok.Tok);
1711     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1712     if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
1713       FormatTok.IsFirst = true;
1714 
1715     // Consume and record whitespace until we find a significant token.
1716     while (FormatTok.Tok.is(tok::unknown)) {
1717       FormatTok.NewlinesBefore += Text.count('\n');
1718       FormatTok.HasUnescapedNewline = Text.count("\\\n") !=
1719                                       FormatTok.NewlinesBefore;
1720       FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1721 
1722       if (FormatTok.Tok.is(tok::eof))
1723         return FormatTok;
1724       Lex.LexFromRawLexer(FormatTok.Tok);
1725       Text = rawTokenText(FormatTok.Tok);
1726     }
1727 
1728     // Now FormatTok is the next non-whitespace token.
1729     FormatTok.TokenLength = Text.size();
1730 
1731     // In case the token starts with escaped newlines, we want to
1732     // take them into account as whitespace - this pattern is quite frequent
1733     // in macro definitions.
1734     // FIXME: What do we want to do with other escaped spaces, and escaped
1735     // spaces or newlines in the middle of tokens?
1736     // FIXME: Add a more explicit test.
1737     unsigned i = 0;
1738     while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
1739       // FIXME: ++FormatTok.NewlinesBefore is missing...
1740       FormatTok.WhiteSpaceLength += 2;
1741       FormatTok.TokenLength -= 2;
1742       i += 2;
1743     }
1744 
1745     if (FormatTok.Tok.is(tok::raw_identifier)) {
1746       IdentifierInfo &Info = IdentTable.get(Text);
1747       FormatTok.Tok.setIdentifierInfo(&Info);
1748       FormatTok.Tok.setKind(Info.getTokenID());
1749     }
1750 
1751     if (FormatTok.Tok.is(tok::greatergreater)) {
1752       FormatTok.Tok.setKind(tok::greater);
1753       GreaterStashed = true;
1754     }
1755 
1756     return FormatTok;
1757   }
1758 
1759 private:
1760   FormatToken FormatTok;
1761   bool GreaterStashed;
1762   Lexer &Lex;
1763   SourceManager &SourceMgr;
1764   IdentifierTable IdentTable;
1765 
1766   /// Returns the text of \c FormatTok.
1767   StringRef rawTokenText(Token &Tok) {
1768     return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1769                      Tok.getLength());
1770   }
1771 };
1772 
1773 class Formatter : public UnwrappedLineConsumer {
1774 public:
1775   Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
1776             SourceManager &SourceMgr,
1777             const std::vector<CharSourceRange> &Ranges)
1778       : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1779         Whitespaces(SourceMgr), Ranges(Ranges) {}
1780 
1781   virtual ~Formatter() {}
1782 
1783   tooling::Replacements format() {
1784     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1785     UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1786     StructuralError = Parser.parse();
1787     unsigned PreviousEndOfLineColumn = 0;
1788     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1789       TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]);
1790       Annotator.annotate();
1791     }
1792     for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1793                                               E = AnnotatedLines.end();
1794          I != E; ++I) {
1795       const AnnotatedLine &TheLine = *I;
1796       if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) {
1797         unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level,
1798                                            TheLine.InPPDirective,
1799                                            PreviousEndOfLineColumn);
1800         tryFitMultipleLinesInOne(Indent, I, E);
1801         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1802                                          TheLine.First, Whitespaces,
1803                                          StructuralError);
1804         PreviousEndOfLineColumn = Formatter.format();
1805       } else {
1806         // If we did not reformat this unwrapped line, the column at the end of
1807         // the last token is unchanged - thus, we can calculate the end of the
1808         // last token, and return the result.
1809         PreviousEndOfLineColumn =
1810             SourceMgr.getSpellingColumnNumber(
1811                 TheLine.Last->FormatTok.Tok.getLocation()) +
1812             Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
1813                                    SourceMgr, Lex.getLangOpts()) -
1814             1;
1815       }
1816     }
1817     return Whitespaces.generateReplacements();
1818   }
1819 
1820 private:
1821   /// \brief Tries to merge lines into one.
1822   ///
1823   /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1824   /// if possible; note that \c I will be incremented when lines are merged.
1825   ///
1826   /// Returns whether the resulting \c Line can fit in a single line.
1827   void tryFitMultipleLinesInOne(unsigned Indent,
1828                                 std::vector<AnnotatedLine>::iterator &I,
1829                                 std::vector<AnnotatedLine>::iterator E) {
1830     unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent;
1831 
1832     // We can never merge stuff if there are trailing line comments.
1833     if (I->Last->Type == TT_LineComment)
1834       return;
1835 
1836     // Check whether the UnwrappedLine can be put onto a single line. If
1837     // so, this is bound to be the optimal solution (by definition) and we
1838     // don't need to analyze the entire solution space.
1839     if (I->Last->TotalLength > Limit)
1840       return;
1841     Limit -= I->Last->TotalLength;
1842 
1843     if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1844       return;
1845 
1846     if (I->Last->is(tok::l_brace)) {
1847       tryMergeSimpleBlock(I, E, Limit);
1848     } else if (I->First.is(tok::kw_if)) {
1849       tryMergeSimpleIf(I, E, Limit);
1850     } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1851                                     I->First.FormatTok.IsFirst)) {
1852       tryMergeSimplePPDirective(I, E, Limit);
1853     }
1854     return;
1855   }
1856 
1857   void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1858                                  std::vector<AnnotatedLine>::iterator E,
1859                                  unsigned Limit) {
1860     AnnotatedLine &Line = *I;
1861     if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1862       return;
1863     if (I + 2 != E && (I + 2)->InPPDirective &&
1864         !(I + 2)->First.FormatTok.HasUnescapedNewline)
1865       return;
1866     if (1 + (I + 1)->Last->TotalLength > Limit)
1867       return;
1868     join(Line, *(++I));
1869   }
1870 
1871   void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1872                         std::vector<AnnotatedLine>::iterator E,
1873                         unsigned Limit) {
1874     if (!Style.AllowShortIfStatementsOnASingleLine)
1875       return;
1876     if ((I + 1)->InPPDirective != I->InPPDirective ||
1877         ((I + 1)->InPPDirective &&
1878          (I + 1)->First.FormatTok.HasUnescapedNewline))
1879       return;
1880     AnnotatedLine &Line = *I;
1881     if (Line.Last->isNot(tok::r_paren))
1882       return;
1883     if (1 + (I + 1)->Last->TotalLength > Limit)
1884       return;
1885     if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1886       return;
1887     // Only inline simple if's (no nested if or else).
1888     if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1889       return;
1890     join(Line, *(++I));
1891   }
1892 
1893   void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1894                         std::vector<AnnotatedLine>::iterator E,
1895                         unsigned Limit){
1896     // First, check that the current line allows merging. This is the case if
1897     // we're not in a control flow statement and the last token is an opening
1898     // brace.
1899     AnnotatedLine &Line = *I;
1900     bool AllowedTokens =
1901         Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
1902         Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
1903         Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
1904         Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
1905         // This gets rid of all ObjC @ keywords and methods.
1906         Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
1907         Line.First.isNot(tok::plus);
1908     if (!AllowedTokens)
1909       return;
1910 
1911     AnnotatedToken *Tok = &(I + 1)->First;
1912     if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
1913         !Tok->MustBreakBefore && Tok->TotalLength <= Limit) {
1914       Tok->SpaceRequiredBefore = false;
1915       join(Line, *(I + 1));
1916       I += 1;
1917     } else {
1918       // Check that we still have three lines and they fit into the limit.
1919       if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1920           !nextTwoLinesFitInto(I, Limit))
1921         return;
1922 
1923       // Second, check that the next line does not contain any braces - if it
1924       // does, readability declines when putting it into a single line.
1925       if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1926         return;
1927       do {
1928         if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
1929           return;
1930         Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1931       } while (Tok != NULL);
1932 
1933       // Last, check that the third line contains a single closing brace.
1934       Tok = &(I + 2)->First;
1935       if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1936           Tok->MustBreakBefore)
1937         return;
1938 
1939       join(Line, *(I + 1));
1940       join(Line, *(I + 2));
1941       I += 2;
1942     }
1943   }
1944 
1945   bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1946                            unsigned Limit) {
1947     return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1948            Limit;
1949   }
1950 
1951   void join(AnnotatedLine &A, const AnnotatedLine &B) {
1952     A.Last->Children.push_back(B.First);
1953     while (!A.Last->Children.empty()) {
1954       A.Last->Children[0].Parent = A.Last;
1955       A.Last = &A.Last->Children[0];
1956     }
1957   }
1958 
1959   bool touchesRanges(const AnnotatedLine &TheLine) {
1960     const FormatToken *First = &TheLine.First.FormatTok;
1961     const FormatToken *Last = &TheLine.Last->FormatTok;
1962     CharSourceRange LineRange = CharSourceRange::getTokenRange(
1963                                     First->Tok.getLocation(),
1964                                     Last->Tok.getLocation());
1965     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1966       if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(),
1967                                                Ranges[i].getBegin()) &&
1968           !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1969                                                LineRange.getBegin()))
1970         return true;
1971     }
1972     return false;
1973   }
1974 
1975   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1976     AnnotatedLines.push_back(AnnotatedLine(TheLine));
1977   }
1978 
1979   /// \brief Add a new line and the required indent before the first Token
1980   /// of the \c UnwrappedLine if there was no structural parsing error.
1981   /// Returns the indent level of the \c UnwrappedLine.
1982   unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level,
1983                             bool InPPDirective,
1984                             unsigned PreviousEndOfLineColumn) {
1985     const FormatToken &Tok = RootToken.FormatTok;
1986     if (!Tok.WhiteSpaceStart.isValid() || StructuralError)
1987       return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1;
1988 
1989     unsigned Newlines = std::min(Tok.NewlinesBefore,
1990                                  Style.MaxEmptyLinesToKeep + 1);
1991     if (Newlines == 0 && !Tok.IsFirst)
1992       Newlines = 1;
1993     unsigned Indent = Level * 2;
1994 
1995     bool IsAccessModifier = false;
1996     if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) ||
1997         RootToken.is(tok::kw_private))
1998       IsAccessModifier = true;
1999     else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
2000              (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
2001               RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
2002               RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
2003               RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
2004       IsAccessModifier = true;
2005 
2006     if (IsAccessModifier &&
2007         static_cast<int>(Indent) + Style.AccessModifierOffset >= 0)
2008       Indent += Style.AccessModifierOffset;
2009     if (!InPPDirective || Tok.HasUnescapedNewline) {
2010       Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style);
2011     } else {
2012       Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
2013                                       PreviousEndOfLineColumn, Style);
2014     }
2015     return Indent;
2016   }
2017 
2018   DiagnosticsEngine &Diag;
2019   FormatStyle Style;
2020   Lexer &Lex;
2021   SourceManager &SourceMgr;
2022   WhitespaceManager Whitespaces;
2023   std::vector<CharSourceRange> Ranges;
2024   std::vector<AnnotatedLine> AnnotatedLines;
2025   bool StructuralError;
2026 };
2027 
2028 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
2029                                SourceManager &SourceMgr,
2030                                std::vector<CharSourceRange> Ranges,
2031                                DiagnosticConsumer *DiagClient) {
2032   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
2033   OwningPtr<DiagnosticConsumer> DiagPrinter;
2034   if (DiagClient == 0) {
2035     DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
2036     DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
2037     DiagClient = DiagPrinter.get();
2038   }
2039   DiagnosticsEngine Diagnostics(
2040       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
2041       DiagClient, false);
2042   Diagnostics.setSourceManager(&SourceMgr);
2043   Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
2044   return formatter.format();
2045 }
2046 
2047 LangOptions getFormattingLangOpts() {
2048   LangOptions LangOpts;
2049   LangOpts.CPlusPlus = 1;
2050   LangOpts.CPlusPlus11 = 1;
2051   LangOpts.Bool = 1;
2052   LangOpts.ObjC1 = 1;
2053   LangOpts.ObjC2 = 1;
2054   return LangOpts;
2055 }
2056 
2057 } // namespace format
2058 } // namespace clang
2059