1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15 /// where it can be used to format real code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "clang/Format/Format.h"
20 #include "UnwrappedLineParser.h"
21 #include "clang/Basic/Diagnostic.h"
22 #include "clang/Basic/OperatorPrecedence.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Frontend/TextDiagnosticPrinter.h"
25 #include "clang/Lex/Lexer.h"
26 #include <string>
27 
28 namespace clang {
29 namespace format {
30 
31 enum TokenType {
32   TT_BinaryOperator,
33   TT_BlockComment,
34   TT_CastRParen,
35   TT_ConditionalExpr,
36   TT_CtorInitializerColon,
37   TT_IncludePath,
38   TT_LineComment,
39   TT_ObjCBlockLParen,
40   TT_ObjCDecl,
41   TT_ObjCMethodSpecifier,
42   TT_ObjCMethodExpr,
43   TT_ObjCSelectorStart,
44   TT_ObjCProperty,
45   TT_OverloadedOperator,
46   TT_PointerOrReference,
47   TT_PureVirtualSpecifier,
48   TT_TemplateCloser,
49   TT_TemplateOpener,
50   TT_TrailingUnaryOperator,
51   TT_UnaryOperator,
52   TT_Unknown
53 };
54 
55 enum LineType {
56   LT_Invalid,
57   LT_Other,
58   LT_PreprocessorDirective,
59   LT_VirtualFunctionDecl,
60   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
61   LT_ObjCMethodDecl,
62   LT_ObjCProperty // An @property line.
63 };
64 
65 class AnnotatedToken {
66 public:
67   AnnotatedToken(const FormatToken &FormatTok)
68       : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false),
69         CanBreakBefore(false), MustBreakBefore(false),
70         ClosesTemplateDeclaration(false), Parent(NULL) {}
71 
72   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
73   bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
74 
75   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
76     return FormatTok.Tok.isObjCAtKeyword(Kind);
77   }
78 
79   FormatToken FormatTok;
80 
81   TokenType Type;
82 
83   bool SpaceRequiredBefore;
84   bool CanBreakBefore;
85   bool MustBreakBefore;
86 
87   bool ClosesTemplateDeclaration;
88 
89   std::vector<AnnotatedToken> Children;
90   AnnotatedToken *Parent;
91 };
92 
93 class AnnotatedLine {
94 public:
95   AnnotatedLine(const FormatToken &FormatTok, unsigned Level,
96                 bool InPPDirective)
97       : First(FormatTok), Level(Level), InPPDirective(InPPDirective) {}
98   AnnotatedToken First;
99   AnnotatedToken *Last;
100 
101   LineType Type;
102   unsigned Level;
103   bool InPPDirective;
104 };
105 
106 static prec::Level getPrecedence(const AnnotatedToken &Tok) {
107   return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
108 }
109 
110 FormatStyle getLLVMStyle() {
111   FormatStyle LLVMStyle;
112   LLVMStyle.ColumnLimit = 80;
113   LLVMStyle.MaxEmptyLinesToKeep = 1;
114   LLVMStyle.PointerAndReferenceBindToType = false;
115   LLVMStyle.AccessModifierOffset = -2;
116   LLVMStyle.SplitTemplateClosingGreater = true;
117   LLVMStyle.IndentCaseLabels = false;
118   LLVMStyle.SpacesBeforeTrailingComments = 1;
119   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
120   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
121   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
122   LLVMStyle.ObjCSpaceBeforeReturnType = true;
123   return LLVMStyle;
124 }
125 
126 FormatStyle getGoogleStyle() {
127   FormatStyle GoogleStyle;
128   GoogleStyle.ColumnLimit = 80;
129   GoogleStyle.MaxEmptyLinesToKeep = 1;
130   GoogleStyle.PointerAndReferenceBindToType = true;
131   GoogleStyle.AccessModifierOffset = -1;
132   GoogleStyle.SplitTemplateClosingGreater = false;
133   GoogleStyle.IndentCaseLabels = true;
134   GoogleStyle.SpacesBeforeTrailingComments = 2;
135   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
136   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
137   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
138   GoogleStyle.ObjCSpaceBeforeReturnType = false;
139   return GoogleStyle;
140 }
141 
142 FormatStyle getChromiumStyle() {
143   FormatStyle ChromiumStyle = getGoogleStyle();
144   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
145   return ChromiumStyle;
146 }
147 
148 struct OptimizationParameters {
149   unsigned PenaltyIndentLevel;
150   unsigned PenaltyLevelDecrease;
151   unsigned PenaltyExcessCharacter;
152 };
153 
154 /// \brief Replaces the whitespace in front of \p Tok. Only call once for
155 /// each \c FormatToken.
156 static void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
157                               unsigned Spaces, const FormatStyle &Style,
158                               SourceManager &SourceMgr,
159                               tooling::Replacements &Replaces) {
160   Replaces.insert(tooling::Replacement(
161       SourceMgr, Tok.FormatTok.WhiteSpaceStart, Tok.FormatTok.WhiteSpaceLength,
162       std::string(NewLines, '\n') + std::string(Spaces, ' ')));
163 }
164 
165 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
166 /// backslashes to escape newlines inside a preprocessor directive.
167 ///
168 /// This function and \c replaceWhitespace have the same behavior if
169 /// \c Newlines == 0.
170 static void replacePPWhitespace(
171     const AnnotatedToken &Tok, unsigned NewLines, unsigned Spaces,
172     unsigned WhitespaceStartColumn, const FormatStyle &Style,
173     SourceManager &SourceMgr, tooling::Replacements &Replaces) {
174   std::string NewLineText;
175   if (NewLines > 0) {
176     unsigned Offset = std::min<int>(Style.ColumnLimit - 1,
177                                     WhitespaceStartColumn);
178     for (unsigned i = 0; i < NewLines; ++i) {
179       NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
180       NewLineText += "\\\n";
181       Offset = 0;
182     }
183   }
184   Replaces.insert(tooling::Replacement(SourceMgr, Tok.FormatTok.WhiteSpaceStart,
185                                        Tok.FormatTok.WhiteSpaceLength,
186                                        NewLineText + std::string(Spaces, ' ')));
187 }
188 
189 /// \brief Calculates whether the (remaining) \c AnnotatedLine starting with
190 /// \p RootToken fits into \p Limit columns on a single line.
191 ///
192 /// If true, sets \p Length to the required length.
193 static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit,
194                           unsigned *Length = 0) {
195   unsigned Columns = RootToken.FormatTok.TokenLength;
196   if (Columns > Limit) return false;
197   const AnnotatedToken *Tok = &RootToken;
198   while (!Tok->Children.empty()) {
199     Tok = &Tok->Children[0];
200     Columns += (Tok->SpaceRequiredBefore ? 1 : 0) + Tok->FormatTok.TokenLength;
201     // A special case for the colon of a constructor initializer as this only
202     // needs to be put on a new line if the line needs to be split.
203     if (Columns > Limit ||
204         (Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) {
205       // FIXME: Remove this hack.
206       return false;
207     }
208   }
209   if (Length != 0)
210     *Length = Columns;
211   return true;
212 }
213 
214 /// \brief Returns if a token is an Objective-C selector name.
215 ///
216 /// For example, "bar" is a selector name in [foo bar:(4 + 5)].
217 static bool isObjCSelectorName(const AnnotatedToken &Tok) {
218   return Tok.is(tok::identifier) && !Tok.Children.empty() &&
219          Tok.Children[0].is(tok::colon) &&
220          Tok.Children[0].Type == TT_ObjCMethodExpr;
221 }
222 
223 class UnwrappedLineFormatter {
224 public:
225   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
226                          const AnnotatedLine &Line, unsigned FirstIndent,
227                          bool FitsOnALine, const AnnotatedToken &RootToken,
228                          tooling::Replacements &Replaces, bool StructuralError)
229       : Style(Style), SourceMgr(SourceMgr), Line(Line),
230         FirstIndent(FirstIndent), FitsOnALine(FitsOnALine),
231         RootToken(RootToken), Replaces(Replaces) {
232     Parameters.PenaltyIndentLevel = 15;
233     Parameters.PenaltyLevelDecrease = 30;
234     Parameters.PenaltyExcessCharacter = 1000000;
235   }
236 
237   /// \brief Formats an \c UnwrappedLine.
238   ///
239   /// \returns The column after the last token in the last line of the
240   /// \c UnwrappedLine.
241   unsigned format() {
242     // Initialize state dependent on indent.
243     LineState State;
244     State.Column = FirstIndent;
245     State.NextToken = &RootToken;
246     State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent));
247     State.ForLoopVariablePos = 0;
248     State.LineContainsContinuedForLoopSection = false;
249     State.StartOfLineLevel = 1;
250 
251     // The first token has already been indented and thus consumed.
252     moveStateToNextToken(State);
253 
254     // Start iterating at 1 as we have correctly formatted of Token #0 above.
255     while (State.NextToken != NULL) {
256       if (FitsOnALine) {
257         addTokenToState(false, false, State);
258       } else {
259         unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
260         unsigned Break = calcPenalty(State, true, NoBreak);
261         addTokenToState(Break < NoBreak, false, State);
262         if (State.NextToken != NULL &&
263             State.NextToken->Parent->Type == TT_CtorInitializerColon) {
264           if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine &&
265               !fitsIntoLimit(*State.NextToken,
266                              getColumnLimit() - State.Column - 1))
267             State.Stack.back().BreakAfterComma = true;
268         }
269       }
270     }
271     return State.Column;
272   }
273 
274 private:
275   struct ParenState {
276     ParenState(unsigned Indent, unsigned LastSpace)
277         : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
278           BreakBeforeClosingBrace(false), BreakAfterComma(false) {}
279 
280     /// \brief The position to which a specific parenthesis level needs to be
281     /// indented.
282     unsigned Indent;
283 
284     /// \brief The position of the last space on each level.
285     ///
286     /// Used e.g. to break like:
287     /// functionCall(Parameter, otherCall(
288     ///                             OtherParameter));
289     unsigned LastSpace;
290 
291     /// \brief The position the first "<<" operator encountered on each level.
292     ///
293     /// Used to align "<<" operators. 0 if no such operator has been encountered
294     /// on a level.
295     unsigned FirstLessLess;
296 
297     /// \brief Whether a newline needs to be inserted before the block's closing
298     /// brace.
299     ///
300     /// We only want to insert a newline before the closing brace if there also
301     /// was a newline after the beginning left brace.
302     bool BreakBeforeClosingBrace;
303 
304     bool BreakAfterComma;
305 
306     bool operator<(const ParenState &Other) const {
307       if (Indent != Other.Indent)
308         return Indent < Other.Indent;
309       if (LastSpace != Other.LastSpace)
310         return LastSpace < Other.LastSpace;
311       if (FirstLessLess != Other.FirstLessLess)
312         return FirstLessLess < Other.FirstLessLess;
313       if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
314         return BreakBeforeClosingBrace;
315       if (BreakAfterComma != Other.BreakAfterComma)
316         return BreakAfterComma;
317       return false;
318     }
319   };
320 
321   /// \brief The current state when indenting a unwrapped line.
322   ///
323   /// As the indenting tries different combinations this is copied by value.
324   struct LineState {
325     /// \brief The number of used columns in the current line.
326     unsigned Column;
327 
328     /// \brief The token that needs to be next formatted.
329     const AnnotatedToken *NextToken;
330 
331     /// \brief The parenthesis level of the first token on the current line.
332     unsigned StartOfLineLevel;
333 
334     /// \brief The column of the first variable in a for-loop declaration.
335     ///
336     /// Used to align the second variable if necessary.
337     unsigned ForLoopVariablePos;
338 
339     /// \brief \c true if this line contains a continued for-loop section.
340     bool LineContainsContinuedForLoopSection;
341 
342     /// \brief A stack keeping track of properties applying to parenthesis
343     /// levels.
344     std::vector<ParenState> Stack;
345 
346     /// \brief Comparison operator to be able to used \c LineState in \c map.
347     bool operator<(const LineState &Other) const {
348       if (Other.NextToken != NextToken)
349         return Other.NextToken > NextToken;
350       if (Other.Column != Column)
351         return Other.Column > Column;
352       if (Other.StartOfLineLevel != StartOfLineLevel)
353         return Other.StartOfLineLevel > StartOfLineLevel;
354       if (Other.ForLoopVariablePos != ForLoopVariablePos)
355         return Other.ForLoopVariablePos < ForLoopVariablePos;
356       if (Other.LineContainsContinuedForLoopSection !=
357           LineContainsContinuedForLoopSection)
358         return LineContainsContinuedForLoopSection;
359       return Other.Stack < Stack;
360     }
361   };
362 
363   /// \brief Appends the next token to \p State and updates information
364   /// necessary for indentation.
365   ///
366   /// Puts the token on the current line if \p Newline is \c true and adds a
367   /// line break and necessary indentation otherwise.
368   ///
369   /// If \p DryRun is \c false, also creates and stores the required
370   /// \c Replacement.
371   void addTokenToState(bool Newline, bool DryRun, LineState &State) {
372     const AnnotatedToken &Current = *State.NextToken;
373     const AnnotatedToken &Previous = *State.NextToken->Parent;
374     assert(State.Stack.size());
375     unsigned ParenLevel = State.Stack.size() - 1;
376 
377     if (Newline) {
378       unsigned WhitespaceStartColumn = State.Column;
379       if (Current.is(tok::r_brace)) {
380         State.Column = Line.Level * 2;
381       } else if (Current.is(tok::string_literal) &&
382                  Previous.is(tok::string_literal)) {
383         State.Column = State.Column - Previous.FormatTok.TokenLength;
384       } else if (Current.is(tok::lessless) &&
385                  State.Stack[ParenLevel].FirstLessLess != 0) {
386         State.Column = State.Stack[ParenLevel].FirstLessLess;
387       } else if (ParenLevel != 0 &&
388                  (Previous.is(tok::equal) || Current.is(tok::arrow) ||
389                   Current.is(tok::period) || Previous.is(tok::question) ||
390                   Previous.Type == TT_ConditionalExpr)) {
391         // Indent and extra 4 spaces after if we know the current expression is
392         // continued.  Don't do that on the top level, as we already indent 4
393         // there.
394         State.Column = State.Stack[ParenLevel].Indent + 4;
395       } else if (RootToken.is(tok::kw_for) && Previous.is(tok::comma)) {
396         State.Column = State.ForLoopVariablePos;
397       } else if (State.NextToken->Parent->ClosesTemplateDeclaration) {
398         State.Column = State.Stack[ParenLevel].Indent - 4;
399       } else {
400         State.Column = State.Stack[ParenLevel].Indent;
401       }
402 
403       // A line starting with a closing brace is assumed to be correct for the
404       // same level as before the opening brace.
405       State.StartOfLineLevel = ParenLevel + (Current.is(tok::r_brace) ? 0 : 1);
406 
407       if (RootToken.is(tok::kw_for))
408         State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi);
409 
410       if (!DryRun) {
411         if (!Line.InPPDirective)
412           replaceWhitespace(Current.FormatTok, 1, State.Column, Style,
413                             SourceMgr, Replaces);
414         else
415           replacePPWhitespace(Current.FormatTok, 1, State.Column,
416                               WhitespaceStartColumn, Style, SourceMgr,
417                               Replaces);
418       }
419 
420       State.Stack[ParenLevel].LastSpace = State.Column;
421       if (Current.is(tok::colon) && State.NextToken->Type != TT_ConditionalExpr)
422         State.Stack[ParenLevel].Indent += 2;
423     } else {
424       if (Current.is(tok::equal) && RootToken.is(tok::kw_for))
425         State.ForLoopVariablePos = State.Column -
426                                    Previous.FormatTok.TokenLength;
427 
428       unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0;
429       if (State.NextToken->Type == TT_LineComment)
430         Spaces = Style.SpacesBeforeTrailingComments;
431 
432       if (!DryRun)
433         replaceWhitespace(Current, 0, Spaces, Style, SourceMgr, Replaces);
434 
435       // FIXME: Do we need to do this for assignments nested in other
436       // expressions?
437       if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 &&
438           (getPrecedence(Previous) == prec::Assignment ||
439            Previous.is(tok::kw_return)))
440         State.Stack[ParenLevel].Indent = State.Column + Spaces;
441       if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
442           State.NextToken->Parent->Type == TT_TemplateOpener)
443         State.Stack[ParenLevel].Indent = State.Column + Spaces;
444 
445       // Top-level spaces that are not part of assignments are exempt as that
446       // mostly leads to better results.
447       State.Column += Spaces;
448       if (Spaces > 0 &&
449           (ParenLevel != 0 || getPrecedence(Previous) == prec::Assignment))
450         State.Stack[ParenLevel].LastSpace = State.Column;
451     }
452     if (Newline && Previous.is(tok::l_brace)) {
453       State.Stack.back().BreakBeforeClosingBrace = true;
454     }
455     moveStateToNextToken(State);
456   }
457 
458   /// \brief Mark the next token as consumed in \p State and modify its stacks
459   /// accordingly.
460   void moveStateToNextToken(LineState &State) {
461     const AnnotatedToken &Current = *State.NextToken;
462     assert(State.Stack.size());
463 
464     if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
465       State.Stack.back().FirstLessLess = State.Column;
466 
467     // If we encounter an opening (, [, { or <, we add a level to our stacks to
468     // prepare for the following tokens.
469     if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
470         Current.is(tok::l_brace) ||
471         State.NextToken->Type == TT_TemplateOpener) {
472       unsigned NewIndent;
473       if (Current.is(tok::l_brace)) {
474         // FIXME: This does not work with nested static initializers.
475         // Implement a better handling for static initializers and similar
476         // constructs.
477         NewIndent = Line.Level * 2 + 2;
478       } else {
479         NewIndent = 4 + State.Stack.back().LastSpace;
480       }
481       State.Stack.push_back(
482           ParenState(NewIndent, State.Stack.back().LastSpace));
483     }
484 
485     // If we encounter a closing ), ], } or >, we can remove a level from our
486     // stacks.
487     if (Current.is(tok::r_paren) || Current.is(tok::r_square) ||
488         (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
489         State.NextToken->Type == TT_TemplateCloser) {
490       State.Stack.pop_back();
491     }
492 
493     if (State.NextToken->Children.empty())
494       State.NextToken = NULL;
495     else
496       State.NextToken = &State.NextToken->Children[0];
497 
498     State.Column += Current.FormatTok.TokenLength;
499   }
500 
501   /// \brief Calculate the penalty for splitting after the token at \p Index.
502   unsigned splitPenalty(const AnnotatedToken &Tok) {
503     const AnnotatedToken &Left = Tok;
504     const AnnotatedToken &Right = Tok.Children[0];
505 
506     if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
507       return 50;
508     if (Left.is(tok::equal) && Right.is(tok::l_brace))
509       return 150;
510 
511     // In for-loops, prefer breaking at ',' and ';'.
512     if (RootToken.is(tok::kw_for) &&
513         (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
514       return 20;
515 
516     if (Left.is(tok::semi) || Left.is(tok::comma) ||
517         Left.ClosesTemplateDeclaration)
518       return 0;
519 
520     // In Objective-C method expressions, prefer breaking before "param:" over
521     // breaking after it.
522     if (isObjCSelectorName(Right))
523       return 0;
524     if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
525       return 20;
526 
527     if (Left.is(tok::l_paren))
528       return 20;
529 
530     if (Left.is(tok::question) || Left.Type == TT_ConditionalExpr)
531       return prec::Assignment;
532     prec::Level Level = getPrecedence(Left);
533 
534     // Breaking after an assignment leads to a bad result as the two sides of
535     // the assignment are visually very close together.
536     if (Level == prec::Assignment)
537       return 50;
538 
539     if (Level != prec::Unknown)
540       return Level;
541 
542     if (Right.is(tok::arrow) || Right.is(tok::period))
543       return 150;
544 
545     return 3;
546   }
547 
548   unsigned getColumnLimit() {
549     return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
550   }
551 
552   /// \brief Calculate the number of lines needed to format the remaining part
553   /// of the unwrapped line.
554   ///
555   /// Assumes the formatting so far has led to
556   /// the \c LineSta \p State. If \p NewLine is set, a new line will be
557   /// added after the previous token.
558   ///
559   /// \param StopAt is used for optimization. If we can determine that we'll
560   /// definitely need at least \p StopAt additional lines, we already know of a
561   /// better solution.
562   unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) {
563     // We are at the end of the unwrapped line, so we don't need any more lines.
564     if (State.NextToken == NULL)
565       return 0;
566 
567     if (!NewLine && State.NextToken->MustBreakBefore)
568       return UINT_MAX;
569     if (NewLine && !State.NextToken->CanBreakBefore &&
570         !(State.NextToken->is(tok::r_brace) &&
571           State.Stack.back().BreakBeforeClosingBrace))
572       return UINT_MAX;
573     if (!NewLine && State.NextToken->is(tok::r_brace) &&
574         State.Stack.back().BreakBeforeClosingBrace)
575       return UINT_MAX;
576     if (!NewLine && State.NextToken->Parent->is(tok::semi) &&
577         State.LineContainsContinuedForLoopSection)
578       return UINT_MAX;
579     if (!NewLine && State.NextToken->Parent->is(tok::comma) &&
580         State.NextToken->Type != TT_LineComment &&
581         State.Stack.back().BreakAfterComma)
582       return UINT_MAX;
583 
584     unsigned CurrentPenalty = 0;
585     if (NewLine) {
586       CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
587                         splitPenalty(*State.NextToken->Parent);
588     } else {
589       if (State.Stack.size() < State.StartOfLineLevel)
590         CurrentPenalty += Parameters.PenaltyLevelDecrease *
591                           (State.StartOfLineLevel - State.Stack.size());
592     }
593 
594     addTokenToState(NewLine, true, State);
595 
596     // Exceeding column limit is bad, assign penalty.
597     if (State.Column > getColumnLimit()) {
598       unsigned ExcessCharacters = State.Column - getColumnLimit();
599       CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
600     }
601 
602     if (StopAt <= CurrentPenalty)
603       return UINT_MAX;
604     StopAt -= CurrentPenalty;
605     StateMap::iterator I = Memory.find(State);
606     if (I != Memory.end()) {
607       // If this state has already been examined, we can safely return the
608       // previous result if we
609       // - have not hit the optimatization (and thus returned UINT_MAX) OR
610       // - are now computing for a smaller or equal StopAt.
611       unsigned SavedResult = I->second.first;
612       unsigned SavedStopAt = I->second.second;
613       if (SavedResult != UINT_MAX)
614         return SavedResult + CurrentPenalty;
615       else if (StopAt <= SavedStopAt)
616         return UINT_MAX;
617     }
618 
619     unsigned NoBreak = calcPenalty(State, false, StopAt);
620     unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak));
621     unsigned Result = std::min(NoBreak, WithBreak);
622 
623     // We have to store 'Result' without adding 'CurrentPenalty' as the latter
624     // can depend on 'NewLine'.
625     Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
626 
627     return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
628   }
629 
630   FormatStyle Style;
631   SourceManager &SourceMgr;
632   const AnnotatedLine &Line;
633   const unsigned FirstIndent;
634   const bool FitsOnALine;
635   const AnnotatedToken &RootToken;
636   tooling::Replacements &Replaces;
637 
638   // A map from an indent state to a pair (Result, Used-StopAt).
639   typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap;
640   StateMap Memory;
641 
642   OptimizationParameters Parameters;
643 };
644 
645 /// \brief Determines extra information about the tokens comprising an
646 /// \c UnwrappedLine.
647 class TokenAnnotator {
648 public:
649   TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
650                  AnnotatedLine &Line)
651       : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {}
652 
653   /// \brief A parser that gathers additional information about tokens.
654   ///
655   /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
656   /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
657   /// into template parameter lists.
658   class AnnotatingParser {
659   public:
660     AnnotatingParser(AnnotatedToken &RootToken)
661         : CurrentToken(&RootToken), KeywordVirtualFound(false),
662           ColonIsObjCMethodExpr(false) {}
663 
664     bool parseAngle() {
665       while (CurrentToken != NULL) {
666         if (CurrentToken->is(tok::greater)) {
667           CurrentToken->Type = TT_TemplateCloser;
668           next();
669           return true;
670         }
671         if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
672             CurrentToken->is(tok::r_brace))
673           return false;
674         if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
675             CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
676           return false;
677         if (!consumeToken())
678           return false;
679       }
680       return false;
681     }
682 
683     bool parseParens() {
684       if (CurrentToken != NULL && CurrentToken->is(tok::caret))
685         CurrentToken->Parent->Type = TT_ObjCBlockLParen;
686       while (CurrentToken != NULL) {
687         if (CurrentToken->is(tok::r_paren)) {
688           next();
689           return true;
690         }
691         if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
692           return false;
693         if (!consumeToken())
694           return false;
695       }
696       return false;
697     }
698 
699     bool parseSquare() {
700       if (!CurrentToken)
701         return false;
702 
703       // A '[' could be an index subscript (after an indentifier or after
704       // ')' or ']'), or it could be the start of an Objective-C method
705       // expression.
706       AnnotatedToken *LSquare = CurrentToken->Parent;
707       bool StartsObjCMethodExpr =
708           !LSquare->Parent || LSquare->Parent->is(tok::colon) ||
709           LSquare->Parent->is(tok::l_square) ||
710           LSquare->Parent->is(tok::l_paren) ||
711           LSquare->Parent->is(tok::kw_return) ||
712           LSquare->Parent->is(tok::kw_throw) ||
713           getBinOpPrecedence(LSquare->Parent->FormatTok.Tok.getKind(),
714                              true, true) > prec::Unknown;
715 
716       bool ColonWasObjCMethodExpr = ColonIsObjCMethodExpr;
717       if (StartsObjCMethodExpr) {
718         ColonIsObjCMethodExpr = true;
719         LSquare->Type = TT_ObjCMethodExpr;
720       }
721 
722       while (CurrentToken != NULL) {
723         if (CurrentToken->is(tok::r_square)) {
724           if (StartsObjCMethodExpr) {
725             ColonIsObjCMethodExpr = ColonWasObjCMethodExpr;
726             CurrentToken->Type = TT_ObjCMethodExpr;
727           }
728           next();
729           return true;
730         }
731         if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
732           return false;
733         if (!consumeToken())
734           return false;
735       }
736       return false;
737     }
738 
739     bool parseBrace() {
740       while (CurrentToken != NULL) {
741         if (CurrentToken->is(tok::r_brace)) {
742           next();
743           return true;
744         }
745         if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
746           return false;
747         if (!consumeToken())
748           return false;
749       }
750       // Lines can currently end with '{'.
751       return true;
752     }
753 
754     bool parseConditional() {
755       while (CurrentToken != NULL) {
756         if (CurrentToken->is(tok::colon)) {
757           CurrentToken->Type = TT_ConditionalExpr;
758           next();
759           return true;
760         }
761         if (!consumeToken())
762           return false;
763       }
764       return false;
765     }
766 
767     bool parseTemplateDeclaration() {
768       if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
769         CurrentToken->Type = TT_TemplateOpener;
770         next();
771         if (!parseAngle())
772           return false;
773         CurrentToken->Parent->ClosesTemplateDeclaration = true;
774         parseLine();
775         return true;
776       }
777       return false;
778     }
779 
780     bool consumeToken() {
781       AnnotatedToken *Tok = CurrentToken;
782       next();
783       switch (Tok->FormatTok.Tok.getKind()) {
784       case tok::plus:
785       case tok::minus:
786         // At the start of the line, +/- specific ObjectiveC method
787         // declarations.
788         if (Tok->Parent == NULL)
789           Tok->Type = TT_ObjCMethodSpecifier;
790         break;
791       case tok::colon:
792         // Colons from ?: are handled in parseConditional().
793         if (ColonIsObjCMethodExpr)
794           Tok->Type = TT_ObjCMethodExpr;
795         break;
796       case tok::l_paren: {
797         bool ParensWereObjCReturnType = Tok->Parent && Tok->Parent->Type ==
798                                         TT_ObjCMethodSpecifier;
799         if (!parseParens())
800           return false;
801         if (CurrentToken != NULL && CurrentToken->is(tok::colon)) {
802           CurrentToken->Type = TT_CtorInitializerColon;
803           next();
804         } else if (CurrentToken != NULL && ParensWereObjCReturnType) {
805           CurrentToken->Type = TT_ObjCSelectorStart;
806           next();
807         }
808       } break;
809       case tok::l_square:
810         if (!parseSquare())
811           return false;
812         break;
813       case tok::l_brace:
814         if (!parseBrace())
815           return false;
816         break;
817       case tok::less:
818         if (parseAngle())
819           Tok->Type = TT_TemplateOpener;
820         else {
821           Tok->Type = TT_BinaryOperator;
822           CurrentToken = Tok;
823           next();
824         }
825         break;
826       case tok::r_paren:
827       case tok::r_square:
828         return false;
829       case tok::r_brace:
830         // Lines can start with '}'.
831         if (Tok->Parent != NULL)
832           return false;
833         break;
834       case tok::greater:
835         Tok->Type = TT_BinaryOperator;
836         break;
837       case tok::kw_operator:
838         if (CurrentToken->is(tok::l_paren)) {
839           CurrentToken->Type = TT_OverloadedOperator;
840           next();
841           if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
842             CurrentToken->Type = TT_OverloadedOperator;
843             next();
844           }
845         } else {
846           while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
847             CurrentToken->Type = TT_OverloadedOperator;
848             next();
849           }
850         }
851         break;
852       case tok::question:
853         parseConditional();
854         break;
855       case tok::kw_template:
856         parseTemplateDeclaration();
857         break;
858       default:
859         break;
860       }
861       return true;
862     }
863 
864     void parseIncludeDirective() {
865       while (CurrentToken != NULL) {
866         CurrentToken->Type = TT_IncludePath;
867         next();
868       }
869     }
870 
871     void parsePreprocessorDirective() {
872       next();
873       if (CurrentToken == NULL)
874         return;
875       // Hashes in the middle of a line can lead to any strange token
876       // sequence.
877       if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
878         return;
879       switch (
880           CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
881       case tok::pp_include:
882       case tok::pp_import:
883         parseIncludeDirective();
884         break;
885       default:
886         break;
887       }
888     }
889 
890     LineType parseLine() {
891       if (CurrentToken->is(tok::hash)) {
892         parsePreprocessorDirective();
893         return LT_PreprocessorDirective;
894       }
895       while (CurrentToken != NULL) {
896         if (CurrentToken->is(tok::kw_virtual))
897           KeywordVirtualFound = true;
898         if (!consumeToken())
899           return LT_Invalid;
900       }
901       if (KeywordVirtualFound)
902         return LT_VirtualFunctionDecl;
903       return LT_Other;
904     }
905 
906     void next() {
907       if (CurrentToken != NULL && !CurrentToken->Children.empty())
908         CurrentToken = &CurrentToken->Children[0];
909       else
910         CurrentToken = NULL;
911     }
912 
913   private:
914     AnnotatedToken *CurrentToken;
915     bool KeywordVirtualFound;
916     bool ColonIsObjCMethodExpr;
917   };
918 
919   void createAnnotatedTokens(AnnotatedToken &Current) {
920     if (Current.FormatTok.Children.empty()) {
921       Line.Last = &Current;
922     } else {
923       Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0]));
924       Current.Children.back().Parent = &Current;
925       createAnnotatedTokens(Current.Children.back());
926     }
927   }
928 
929   void calculateExtraInformation(AnnotatedToken &Current) {
930     Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
931 
932     if (Current.FormatTok.MustBreakBefore) {
933       Current.MustBreakBefore = true;
934     } else {
935       if (Current.Type == TT_LineComment) {
936         Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
937       } else if (Current.Type == TT_CtorInitializerColon ||
938                  Current.Parent->Type == TT_LineComment ||
939                  (Current.is(tok::string_literal) &&
940                   Current.Parent->is(tok::string_literal))) {
941         Current.MustBreakBefore = true;
942       } else {
943         Current.MustBreakBefore = false;
944       }
945     }
946     Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
947     if (!Current.Children.empty())
948       calculateExtraInformation(Current.Children[0]);
949   }
950 
951   void annotate() {
952     Line.Last = &Line.First;
953     createAnnotatedTokens(Line.First);
954 
955     AnnotatingParser Parser(Line.First);
956     Line.Type = Parser.parseLine();
957     if (Line.Type == LT_Invalid)
958       return;
959 
960     determineTokenTypes(Line.First, /*IsRHS=*/false);
961 
962     if (Line.First.Type == TT_ObjCMethodSpecifier)
963       Line.Type = LT_ObjCMethodDecl;
964     else if (Line.First.Type == TT_ObjCDecl)
965       Line.Type = LT_ObjCDecl;
966     else if (Line.First.Type == TT_ObjCProperty)
967       Line.Type = LT_ObjCProperty;
968 
969     Line.First.SpaceRequiredBefore = true;
970     Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
971     Line.First.CanBreakBefore = Line.First.MustBreakBefore;
972 
973     if (!Line.First.Children.empty())
974       calculateExtraInformation(Line.First.Children[0]);
975   }
976 
977 private:
978   void determineTokenTypes(AnnotatedToken &Current, bool IsRHS) {
979     if (getPrecedence(Current) == prec::Assignment ||
980         Current.is(tok::kw_return) || Current.is(tok::kw_throw))
981       IsRHS = true;
982 
983     if (Current.Type == TT_Unknown) {
984       if (Current.is(tok::star) || Current.is(tok::amp)) {
985         Current.Type = determineStarAmpUsage(Current, IsRHS);
986       } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
987                  Current.is(tok::caret)) {
988         Current.Type = determinePlusMinusCaretUsage(Current);
989       } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
990         Current.Type = determineIncrementUsage(Current);
991       } else if (Current.is(tok::exclaim)) {
992         Current.Type = TT_UnaryOperator;
993       } else if (isBinaryOperator(Current)) {
994         Current.Type = TT_BinaryOperator;
995       } else if (Current.is(tok::comment)) {
996         std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
997                                             Lex.getLangOpts()));
998         if (StringRef(Data).startswith("//"))
999           Current.Type = TT_LineComment;
1000         else
1001           Current.Type = TT_BlockComment;
1002       } else if (Current.is(tok::r_paren) &&
1003                  (Current.Parent->Type == TT_PointerOrReference ||
1004                   Current.Parent->Type == TT_TemplateCloser) &&
1005                  (Current.Children.empty() ||
1006                   (Current.Children[0].isNot(tok::equal) &&
1007                    Current.Children[0].isNot(tok::semi) &&
1008                    Current.Children[0].isNot(tok::l_brace)))) {
1009         // FIXME: We need to get smarter and understand more cases of casts.
1010         Current.Type = TT_CastRParen;
1011       } else if (Current.is(tok::at) && Current.Children.size()) {
1012         switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
1013         case tok::objc_interface:
1014         case tok::objc_implementation:
1015         case tok::objc_protocol:
1016           Current.Type = TT_ObjCDecl;
1017           break;
1018         case tok::objc_property:
1019           Current.Type = TT_ObjCProperty;
1020           break;
1021         default:
1022           break;
1023         }
1024       }
1025     }
1026 
1027     if (!Current.Children.empty())
1028       determineTokenTypes(Current.Children[0], IsRHS);
1029   }
1030 
1031   bool isBinaryOperator(const AnnotatedToken &Tok) {
1032     // Comma is a binary operator, but does not behave as such wrt. formatting.
1033     return getPrecedence(Tok) > prec::Comma;
1034   }
1035 
1036   TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsRHS) {
1037     if (Tok.Parent == NULL)
1038       return TT_UnaryOperator;
1039     if (Tok.Children.size() == 0)
1040       return TT_Unknown;
1041     const FormatToken &PrevToken = Tok.Parent->FormatTok;
1042     const FormatToken &NextToken = Tok.Children[0].FormatTok;
1043 
1044     if (PrevToken.Tok.is(tok::l_paren) || PrevToken.Tok.is(tok::l_square) ||
1045         PrevToken.Tok.is(tok::comma) || PrevToken.Tok.is(tok::kw_return) ||
1046         PrevToken.Tok.is(tok::colon) || Tok.Parent->Type == TT_BinaryOperator ||
1047         Tok.Parent->Type == TT_CastRParen)
1048       return TT_UnaryOperator;
1049 
1050     if (PrevToken.Tok.isLiteral() || PrevToken.Tok.is(tok::r_paren) ||
1051         PrevToken.Tok.is(tok::r_square) || NextToken.Tok.isLiteral() ||
1052         NextToken.Tok.is(tok::plus) || NextToken.Tok.is(tok::minus) ||
1053         NextToken.Tok.is(tok::plusplus) || NextToken.Tok.is(tok::minusminus) ||
1054         NextToken.Tok.is(tok::tilde) || NextToken.Tok.is(tok::exclaim) ||
1055         NextToken.Tok.is(tok::l_paren) || NextToken.Tok.is(tok::l_square) ||
1056         NextToken.Tok.is(tok::kw_alignof) || NextToken.Tok.is(tok::kw_sizeof))
1057       return TT_BinaryOperator;
1058 
1059     if (NextToken.Tok.is(tok::comma) || NextToken.Tok.is(tok::r_paren) ||
1060         NextToken.Tok.is(tok::greater))
1061       return TT_PointerOrReference;
1062 
1063     // It is very unlikely that we are going to find a pointer or reference type
1064     // definition on the RHS of an assignment.
1065     if (IsRHS)
1066       return TT_BinaryOperator;
1067 
1068     return TT_PointerOrReference;
1069   }
1070 
1071   TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
1072     // Use heuristics to recognize unary operators.
1073     if (Tok.Parent->is(tok::equal) || Tok.Parent->is(tok::l_paren) ||
1074         Tok.Parent->is(tok::comma) || Tok.Parent->is(tok::l_square) ||
1075         Tok.Parent->is(tok::question) || Tok.Parent->is(tok::colon) ||
1076         Tok.Parent->is(tok::kw_return) || Tok.Parent->is(tok::kw_case) ||
1077         Tok.Parent->is(tok::at) || Tok.Parent->is(tok::l_brace))
1078       return TT_UnaryOperator;
1079 
1080     // There can't be to consecutive binary operators.
1081     if (Tok.Parent->Type == TT_BinaryOperator)
1082       return TT_UnaryOperator;
1083 
1084     // Fall back to marking the token as binary operator.
1085     return TT_BinaryOperator;
1086   }
1087 
1088   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
1089   TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
1090     if (Tok.Parent == NULL)
1091       return TT_UnaryOperator;
1092     if (Tok.Parent->is(tok::r_paren) || Tok.Parent->is(tok::r_square) ||
1093         Tok.Parent->is(tok::identifier))
1094       return TT_TrailingUnaryOperator;
1095 
1096     return TT_UnaryOperator;
1097   }
1098 
1099   bool spaceRequiredBetween(const AnnotatedToken &Left,
1100                             const AnnotatedToken &Right) {
1101     if (Right.is(tok::hashhash))
1102       return Left.is(tok::hash);
1103     if (Left.is(tok::hashhash) || Left.is(tok::hash))
1104       return Right.is(tok::hash);
1105     if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
1106       return false;
1107     if (Right.is(tok::less) &&
1108         (Left.is(tok::kw_template) ||
1109          (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1110       return true;
1111     if (Left.is(tok::arrow) || Right.is(tok::arrow))
1112       return false;
1113     if (Left.is(tok::exclaim) || Left.is(tok::tilde))
1114       return false;
1115     if (Left.is(tok::at) &&
1116         (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
1117          Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
1118          Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
1119          Right.is(tok::kw_true) || Right.is(tok::kw_false)))
1120       return false;
1121     if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
1122       return false;
1123     if (Right.is(tok::amp) || Right.is(tok::star))
1124       return Left.FormatTok.Tok.isLiteral() ||
1125              (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
1126               !Style.PointerAndReferenceBindToType);
1127     if (Left.is(tok::amp) || Left.is(tok::star))
1128       return Right.FormatTok.Tok.isLiteral() ||
1129              Style.PointerAndReferenceBindToType;
1130     if (Right.is(tok::star) && Left.is(tok::l_paren))
1131       return false;
1132     if (Left.is(tok::l_square) || Right.is(tok::r_square))
1133       return false;
1134     if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
1135       return false;
1136     if (Left.is(tok::coloncolon) ||
1137         (Right.is(tok::coloncolon) &&
1138          (Left.is(tok::identifier) || Left.is(tok::greater))))
1139       return false;
1140     if (Left.is(tok::period) || Right.is(tok::period))
1141       return false;
1142     if (Left.is(tok::colon))
1143       return Left.Type != TT_ObjCMethodExpr;
1144     if (Right.is(tok::colon))
1145       return Right.Type != TT_ObjCMethodExpr;
1146     if (Left.is(tok::l_paren))
1147       return false;
1148     if (Right.is(tok::l_paren)) {
1149       return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
1150              Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
1151              Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
1152              Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
1153              Left.is(tok::kw_delete);
1154     }
1155     if (Left.is(tok::at) &&
1156         Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1157       return false;
1158     if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1159       return false;
1160     return true;
1161   }
1162 
1163   bool spaceRequiredBefore(const AnnotatedToken &Tok) {
1164     if (Line.Type == LT_ObjCMethodDecl) {
1165       if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
1166           Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
1167         return true;
1168       if (Tok.is(tok::colon))
1169         return false;
1170       if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1171         return Style.ObjCSpaceBeforeReturnType || Tok.isNot(tok::l_paren);
1172       if (Tok.Type == TT_ObjCSelectorStart)
1173         return !Style.ObjCSpaceBeforeReturnType;
1174       if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1175         // Don't space between ')' and <id>
1176         return false;
1177       if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
1178         // Don't space between ':' and '('
1179         return false;
1180     }
1181     if (Line.Type == LT_ObjCProperty &&
1182         (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1183       return false;
1184 
1185     if (Tok.Parent->is(tok::comma))
1186       return true;
1187     if (Tok.Type == TT_IncludePath)
1188       return Tok.is(tok::less) || Tok.is(tok::string_literal);
1189     if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1190       return true;
1191     if (Tok.Type == TT_OverloadedOperator)
1192       return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
1193              Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
1194     if (Tok.Parent->Type == TT_OverloadedOperator)
1195       return false;
1196     if (Tok.is(tok::colon))
1197       return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
1198              Tok.Type != TT_ObjCMethodExpr;
1199     if (Tok.Parent->Type == TT_UnaryOperator ||
1200         Tok.Parent->Type == TT_CastRParen)
1201       return false;
1202     if (Tok.Type == TT_UnaryOperator)
1203       return Tok.Parent->isNot(tok::l_paren) &&
1204              Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
1205              (Tok.Parent->isNot(tok::colon) ||
1206               Tok.Parent->Type != TT_ObjCMethodExpr);
1207     if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1208       return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
1209              TT_TemplateCloser && Style.SplitTemplateClosingGreater;
1210     }
1211     if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1212       return true;
1213     if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1214       return false;
1215     if (Tok.is(tok::less) && Line.First.is(tok::hash))
1216       return true;
1217     if (Tok.Type == TT_TrailingUnaryOperator)
1218       return false;
1219     return spaceRequiredBetween(*Tok.Parent, Tok);
1220   }
1221 
1222   bool canBreakBefore(const AnnotatedToken &Right) {
1223     const AnnotatedToken &Left = *Right.Parent;
1224     if (Line.Type == LT_ObjCMethodDecl) {
1225       if (Right.is(tok::identifier) && !Right.Children.empty() &&
1226           Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
1227         return true;
1228       if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
1229           Left.Parent->is(tok::colon))
1230         // Don't break this identifier as ':' or identifier
1231         // before it will break.
1232         return false;
1233       if (Right.is(tok::colon) && Left.is(tok::identifier) &&
1234           Left.CanBreakBefore)
1235         // Don't break at ':' if identifier before it can beak.
1236         return false;
1237     }
1238     if (Right.Type == TT_IncludePath)
1239       return false;
1240     if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1241       return false;
1242     if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1243       return true;
1244     if (isObjCSelectorName(Right))
1245       return true;
1246     if (Left.ClosesTemplateDeclaration)
1247       return true;
1248     if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1249         Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr)
1250       return false;
1251     if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1252       return false;
1253 
1254     if (Right.is(tok::comment))
1255       // We rely on MustBreakBefore being set correctly here as we should not
1256       // change the "binding" behavior of a comment.
1257       return false;
1258 
1259     // We only break before r_brace if there was a corresponding break before
1260     // the l_brace, which is tracked by BreakBeforeClosingBrace.
1261     if (Right.is(tok::r_brace))
1262       return false;
1263 
1264     if (Right.is(tok::r_paren) ||
1265         Right.is(tok::greater))
1266       return false;
1267     return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1268            Left.is(tok::comma) || Right.is(tok::lessless) ||
1269            Right.is(tok::arrow) || Right.is(tok::period) ||
1270            Right.is(tok::colon) || Left.is(tok::semi) ||
1271            Left.is(tok::l_brace) || Left.is(tok::question) || Left.Type ==
1272            TT_ConditionalExpr || (Left.is(tok::r_paren) && Left.Type !=
1273                                   TT_CastRParen && Right.is(tok::identifier)) ||
1274            (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1275   }
1276 
1277   FormatStyle Style;
1278   SourceManager &SourceMgr;
1279   Lexer &Lex;
1280   AnnotatedLine &Line;
1281 };
1282 
1283 class LexerBasedFormatTokenSource : public FormatTokenSource {
1284 public:
1285   LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1286       : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1287         IdentTable(Lex.getLangOpts()) {
1288     Lex.SetKeepWhitespaceMode(true);
1289   }
1290 
1291   virtual FormatToken getNextToken() {
1292     if (GreaterStashed) {
1293       FormatTok.NewlinesBefore = 0;
1294       FormatTok.WhiteSpaceStart =
1295           FormatTok.Tok.getLocation().getLocWithOffset(1);
1296       FormatTok.WhiteSpaceLength = 0;
1297       GreaterStashed = false;
1298       return FormatTok;
1299     }
1300 
1301     FormatTok = FormatToken();
1302     Lex.LexFromRawLexer(FormatTok.Tok);
1303     StringRef Text = rawTokenText(FormatTok.Tok);
1304     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1305     if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
1306       FormatTok.IsFirst = true;
1307 
1308     // Consume and record whitespace until we find a significant token.
1309     while (FormatTok.Tok.is(tok::unknown)) {
1310       FormatTok.NewlinesBefore += Text.count('\n');
1311       FormatTok.HasUnescapedNewline = Text.count("\\\n") !=
1312                                       FormatTok.NewlinesBefore;
1313       FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1314 
1315       if (FormatTok.Tok.is(tok::eof))
1316         return FormatTok;
1317       Lex.LexFromRawLexer(FormatTok.Tok);
1318       Text = rawTokenText(FormatTok.Tok);
1319     }
1320 
1321     // Now FormatTok is the next non-whitespace token.
1322     FormatTok.TokenLength = Text.size();
1323 
1324     // In case the token starts with escaped newlines, we want to
1325     // take them into account as whitespace - this pattern is quite frequent
1326     // in macro definitions.
1327     // FIXME: What do we want to do with other escaped spaces, and escaped
1328     // spaces or newlines in the middle of tokens?
1329     // FIXME: Add a more explicit test.
1330     unsigned i = 0;
1331     while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
1332       FormatTok.WhiteSpaceLength += 2;
1333       FormatTok.TokenLength -= 2;
1334       i += 2;
1335     }
1336 
1337     if (FormatTok.Tok.is(tok::raw_identifier)) {
1338       IdentifierInfo &Info = IdentTable.get(Text);
1339       FormatTok.Tok.setIdentifierInfo(&Info);
1340       FormatTok.Tok.setKind(Info.getTokenID());
1341     }
1342 
1343     if (FormatTok.Tok.is(tok::greatergreater)) {
1344       FormatTok.Tok.setKind(tok::greater);
1345       GreaterStashed = true;
1346     }
1347 
1348     return FormatTok;
1349   }
1350 
1351 private:
1352   FormatToken FormatTok;
1353   bool GreaterStashed;
1354   Lexer &Lex;
1355   SourceManager &SourceMgr;
1356   IdentifierTable IdentTable;
1357 
1358   /// Returns the text of \c FormatTok.
1359   StringRef rawTokenText(Token &Tok) {
1360     return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1361                      Tok.getLength());
1362   }
1363 };
1364 
1365 class Formatter : public UnwrappedLineConsumer {
1366 public:
1367   Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
1368             SourceManager &SourceMgr,
1369             const std::vector<CharSourceRange> &Ranges)
1370       : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1371         Ranges(Ranges) {}
1372 
1373   virtual ~Formatter() {}
1374 
1375   tooling::Replacements format() {
1376     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1377     UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
1378     StructuralError = Parser.parse();
1379     unsigned PreviousEndOfLineColumn = 0;
1380     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1381       TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]);
1382       Annotator.annotate();
1383     }
1384     for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1385                                               E = AnnotatedLines.end();
1386          I != E; ++I) {
1387       const AnnotatedLine &TheLine = *I;
1388       if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) {
1389         unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level,
1390                                            TheLine.InPPDirective,
1391                                            PreviousEndOfLineColumn);
1392         bool FitsOnALine = tryFitMultipleLinesInOne(Indent, I, E);
1393         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1394                                          FitsOnALine, TheLine.First, Replaces,
1395                                          StructuralError);
1396         PreviousEndOfLineColumn = Formatter.format();
1397       } else {
1398         // If we did not reformat this unwrapped line, the column at the end of
1399         // the last token is unchanged - thus, we can calculate the end of the
1400         // last token, and return the result.
1401         PreviousEndOfLineColumn =
1402             SourceMgr.getSpellingColumnNumber(
1403                 TheLine.Last->FormatTok.Tok.getLocation()) +
1404             Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
1405                                    SourceMgr, Lex.getLangOpts()) -
1406             1;
1407       }
1408     }
1409     return Replaces;
1410   }
1411 
1412 private:
1413   /// \brief Tries to merge lines into one.
1414   ///
1415   /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1416   /// if possible; note that \c I will be incremented when lines are merged.
1417   ///
1418   /// Returns whether the resulting \c Line can fit in a single line.
1419   bool tryFitMultipleLinesInOne(unsigned Indent,
1420                                 std::vector<AnnotatedLine>::iterator &I,
1421                                 std::vector<AnnotatedLine>::iterator E) {
1422     unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent;
1423 
1424     // Check whether the UnwrappedLine can be put onto a single line. If
1425     // so, this is bound to be the optimal solution (by definition) and we
1426     // don't need to analyze the entire solution space.
1427     unsigned Length = 0;
1428     if (!fitsIntoLimit(I->First, Limit, &Length))
1429       return false;
1430     if (Limit == Length)
1431       return true; // Couldn't fit a space.
1432     Limit -= Length + 1; // One space.
1433     if (I + 1 == E)
1434       return true;
1435 
1436     if (I->Last->is(tok::l_brace)) {
1437       tryMergeSimpleBlock(I, E, Limit);
1438     } else if (I->First.is(tok::kw_if)) {
1439       tryMergeSimpleIf(I, E, Limit);
1440     } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1441                                     I->First.FormatTok.IsFirst)) {
1442       tryMergeSimplePPDirective(I, E, Limit);
1443     }
1444     return true;
1445   }
1446 
1447   void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1448                                  std::vector<AnnotatedLine>::iterator E,
1449                                  unsigned Limit) {
1450     AnnotatedLine &Line = *I;
1451     if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1452       return;
1453     if (I + 2 != E && (I + 2)->InPPDirective &&
1454         !(I + 2)->First.FormatTok.HasUnescapedNewline)
1455       return;
1456     if (!fitsIntoLimit((I + 1)->First, Limit)) return;
1457     join(Line, *(++I));
1458   }
1459 
1460   void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1461                         std::vector<AnnotatedLine>::iterator E,
1462                         unsigned Limit) {
1463     if (!Style.AllowShortIfStatementsOnASingleLine)
1464       return;
1465     AnnotatedLine &Line = *I;
1466     if (!fitsIntoLimit((I + 1)->First, Limit))
1467       return;
1468     if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1469       return;
1470     // Only inline simple if's (no nested if or else).
1471     if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1472       return;
1473     join(Line, *(++I));
1474   }
1475 
1476   void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1477                         std::vector<AnnotatedLine>::iterator E,
1478                         unsigned Limit){
1479     // Check that we still have three lines and they fit into the limit.
1480     if (I + 2 == E || !nextTwoLinesFitInto(I, Limit))
1481       return;
1482 
1483     // First, check that the current line allows merging. This is the case if
1484     // we're not in a control flow statement and the last token is an opening
1485     // brace.
1486     AnnotatedLine &Line = *I;
1487     bool AllowedTokens =
1488         Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
1489         Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
1490         Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
1491         Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
1492         // This gets rid of all ObjC @ keywords and methods.
1493         Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
1494         Line.First.isNot(tok::plus);
1495     if (!AllowedTokens)
1496       return;
1497 
1498     // Second, check that the next line does not contain any braces - if it
1499     // does, readability declines when putting it into a single line.
1500     const AnnotatedToken *Tok = &(I + 1)->First;
1501     if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1502       return;
1503     do {
1504       if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
1505         return;
1506       Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1507     } while (Tok != NULL);
1508 
1509     // Last, check that the third line contains a single closing brace.
1510     Tok = &(I + 2)->First;
1511     if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1512         Tok->MustBreakBefore)
1513       return;
1514 
1515     // If the merged line fits, we use that instead and skip the next two lines.
1516     Line.Last->Children.push_back((I + 1)->First);
1517     while (!Line.Last->Children.empty()) {
1518       Line.Last->Children[0].Parent = Line.Last;
1519       Line.Last = &Line.Last->Children[0];
1520     }
1521 
1522     join(Line, *(I + 1));
1523     join(Line, *(I + 2));
1524     I += 2;
1525   }
1526 
1527   bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1528                            unsigned Limit) {
1529     unsigned LengthLine1 = 0;
1530     unsigned LengthLine2 = 0;
1531     if (!fitsIntoLimit((I + 1)->First, Limit, &LengthLine1) ||
1532         !fitsIntoLimit((I + 2)->First, Limit, &LengthLine2))
1533       return false;
1534     return LengthLine1 + LengthLine2 + 1 <= Limit; // One space.
1535   }
1536 
1537   void join(AnnotatedLine &A, const AnnotatedLine &B) {
1538     A.Last->Children.push_back(B.First);
1539     while (!A.Last->Children.empty()) {
1540       A.Last->Children[0].Parent = A.Last;
1541       A.Last = &A.Last->Children[0];
1542     }
1543   }
1544 
1545   bool touchesRanges(const AnnotatedLine &TheLine) {
1546     const FormatToken *First = &TheLine.First.FormatTok;
1547     const FormatToken *Last = &TheLine.Last->FormatTok;
1548     CharSourceRange LineRange = CharSourceRange::getTokenRange(
1549                                     First->Tok.getLocation(),
1550                                     Last->Tok.getLocation());
1551     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1552       if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(),
1553                                                Ranges[i].getBegin()) &&
1554           !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1555                                                LineRange.getBegin()))
1556         return true;
1557     }
1558     return false;
1559   }
1560 
1561   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1562     AnnotatedLines.push_back(
1563         AnnotatedLine(TheLine.RootToken, TheLine.Level, TheLine.InPPDirective));
1564   }
1565 
1566   /// \brief Add a new line and the required indent before the first Token
1567   /// of the \c UnwrappedLine if there was no structural parsing error.
1568   /// Returns the indent level of the \c UnwrappedLine.
1569   unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level,
1570                             bool InPPDirective,
1571                             unsigned PreviousEndOfLineColumn) {
1572     const FormatToken &Tok = RootToken.FormatTok;
1573     if (!Tok.WhiteSpaceStart.isValid() || StructuralError)
1574       return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1;
1575 
1576     unsigned Newlines = std::min(Tok.NewlinesBefore,
1577                                  Style.MaxEmptyLinesToKeep + 1);
1578     if (Newlines == 0 && !Tok.IsFirst)
1579       Newlines = 1;
1580     unsigned Indent = Level * 2;
1581 
1582     bool IsAccessModifier = false;
1583     if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) ||
1584         RootToken.is(tok::kw_private))
1585       IsAccessModifier = true;
1586     else if (RootToken.is(tok::at) && !RootToken.Children.empty() &&
1587              (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) ||
1588               RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) ||
1589               RootToken.Children[0].isObjCAtKeyword(tok::objc_package) ||
1590               RootToken.Children[0].isObjCAtKeyword(tok::objc_private)))
1591       IsAccessModifier = true;
1592 
1593     if (IsAccessModifier &&
1594         static_cast<int>(Indent) + Style.AccessModifierOffset >= 0)
1595       Indent += Style.AccessModifierOffset;
1596     if (!InPPDirective || Tok.HasUnescapedNewline) {
1597       replaceWhitespace(Tok, Newlines, Indent, Style, SourceMgr, Replaces);
1598     } else {
1599       replacePPWhitespace(Tok, Newlines, Indent, PreviousEndOfLineColumn, Style,
1600                           SourceMgr, Replaces);
1601     }
1602     return Indent;
1603   }
1604 
1605   DiagnosticsEngine &Diag;
1606   FormatStyle Style;
1607   Lexer &Lex;
1608   SourceManager &SourceMgr;
1609   tooling::Replacements Replaces;
1610   std::vector<CharSourceRange> Ranges;
1611   std::vector<AnnotatedLine> AnnotatedLines;
1612   bool StructuralError;
1613 };
1614 
1615 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1616                                SourceManager &SourceMgr,
1617                                std::vector<CharSourceRange> Ranges,
1618                                DiagnosticConsumer *DiagClient) {
1619   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1620   OwningPtr<DiagnosticConsumer> DiagPrinter;
1621   if (DiagClient == 0) {
1622     DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
1623     DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1624     DiagClient = DiagPrinter.get();
1625   }
1626   DiagnosticsEngine Diagnostics(
1627       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1628       DiagClient, false);
1629   Diagnostics.setSourceManager(&SourceMgr);
1630   Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1631   return formatter.format();
1632 }
1633 
1634 LangOptions getFormattingLangOpts() {
1635   LangOptions LangOpts;
1636   LangOpts.CPlusPlus = 1;
1637   LangOpts.CPlusPlus11 = 1;
1638   LangOpts.Bool = 1;
1639   LangOpts.ObjC1 = 1;
1640   LangOpts.ObjC2 = 1;
1641   return LangOpts;
1642 }
1643 
1644 } // namespace format
1645 } // namespace clang
1646