1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "BreakableToken.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/OperatorPrecedence.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Format/Format.h"
26 #include "clang/Frontend/TextDiagnosticPrinter.h"
27 #include "clang/Lex/Lexer.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/Support/Allocator.h"
30 #include "llvm/Support/Debug.h"
31 #include <queue>
32 #include <string>
33 
34 namespace clang {
35 namespace format {
36 
37 FormatStyle getLLVMStyle() {
38   FormatStyle LLVMStyle;
39   LLVMStyle.ColumnLimit = 80;
40   LLVMStyle.MaxEmptyLinesToKeep = 1;
41   LLVMStyle.PointerBindsToType = false;
42   LLVMStyle.DerivePointerBinding = false;
43   LLVMStyle.AccessModifierOffset = -2;
44   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
45   LLVMStyle.IndentCaseLabels = false;
46   LLVMStyle.SpacesBeforeTrailingComments = 1;
47   LLVMStyle.BinPackParameters = true;
48   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
49   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
50   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
51   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
52   LLVMStyle.PenaltyExcessCharacter = 1000000;
53   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 75;
54   LLVMStyle.AlignEscapedNewlinesLeft = false;
55   return LLVMStyle;
56 }
57 
58 FormatStyle getGoogleStyle() {
59   FormatStyle GoogleStyle;
60   GoogleStyle.ColumnLimit = 80;
61   GoogleStyle.MaxEmptyLinesToKeep = 1;
62   GoogleStyle.PointerBindsToType = true;
63   GoogleStyle.DerivePointerBinding = true;
64   GoogleStyle.AccessModifierOffset = -1;
65   GoogleStyle.Standard = FormatStyle::LS_Auto;
66   GoogleStyle.IndentCaseLabels = true;
67   GoogleStyle.SpacesBeforeTrailingComments = 2;
68   GoogleStyle.BinPackParameters = true;
69   GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
70   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
71   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
72   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
73   GoogleStyle.PenaltyExcessCharacter = 1000000;
74   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
75   GoogleStyle.AlignEscapedNewlinesLeft = true;
76   return GoogleStyle;
77 }
78 
79 FormatStyle getChromiumStyle() {
80   FormatStyle ChromiumStyle = getGoogleStyle();
81   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
82   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
83   ChromiumStyle.BinPackParameters = false;
84   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
85   ChromiumStyle.DerivePointerBinding = false;
86   return ChromiumStyle;
87 }
88 
89 // Returns the length of everything up to the first possible line break after
90 // the ), ], } or > matching \c Tok.
91 static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
92   if (Tok.MatchingParen == NULL)
93     return 0;
94   AnnotatedToken *End = Tok.MatchingParen;
95   while (!End->Children.empty() && !End->Children[0].CanBreakBefore) {
96     End = &End->Children[0];
97   }
98   return End->TotalLength - Tok.TotalLength + 1;
99 }
100 
101 class UnwrappedLineFormatter {
102 public:
103   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
104                          const AnnotatedLine &Line, unsigned FirstIndent,
105                          const AnnotatedToken &RootToken,
106                          WhitespaceManager &Whitespaces)
107       : Style(Style), SourceMgr(SourceMgr), Line(Line),
108         FirstIndent(FirstIndent), RootToken(RootToken),
109         Whitespaces(Whitespaces), Count(0) {}
110 
111   /// \brief Formats an \c UnwrappedLine.
112   ///
113   /// \returns The column after the last token in the last line of the
114   /// \c UnwrappedLine.
115   unsigned format(const AnnotatedLine *NextLine) {
116     // Initialize state dependent on indent.
117     LineState State;
118     State.Column = FirstIndent;
119     State.NextToken = &RootToken;
120     State.Stack.push_back(
121         ParenState(FirstIndent, FirstIndent, !Style.BinPackParameters,
122                    /*NoLineBreak=*/ false));
123     State.LineContainsContinuedForLoopSection = false;
124     State.ParenLevel = 0;
125     State.StartOfStringLiteral = 0;
126     State.StartOfLineLevel = State.ParenLevel;
127 
128     // The first token has already been indented and thus consumed.
129     moveStateToNextToken(State, /*DryRun=*/ false);
130 
131     // If everything fits on a single line, just put it there.
132     unsigned ColumnLimit = Style.ColumnLimit;
133     if (NextLine && NextLine->InPPDirective &&
134         !NextLine->First.FormatTok.HasUnescapedNewline)
135       ColumnLimit = getColumnLimit();
136     if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
137       while (State.NextToken != NULL) {
138         addTokenToState(false, false, State);
139       }
140       return State.Column;
141     }
142 
143     // If the ObjC method declaration does not fit on a line, we should format
144     // it with one arg per line.
145     if (Line.Type == LT_ObjCMethodDecl)
146       State.Stack.back().BreakBeforeParameter = true;
147 
148     // Find best solution in solution space.
149     return analyzeSolutionSpace(State);
150   }
151 
152 private:
153   void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
154     const Token &Tok = AnnotatedTok.FormatTok.Tok;
155     llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
156                               Tok.getLength());
157     llvm::errs();
158   }
159 
160   struct ParenState {
161     ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
162                bool NoLineBreak)
163         : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
164           BreakBeforeClosingBrace(false), QuestionColumn(0),
165           AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
166           NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0),
167           NestedNameSpecifierContinuation(0), CallContinuation(0),
168           VariablePos(0) {}
169 
170     /// \brief The position to which a specific parenthesis level needs to be
171     /// indented.
172     unsigned Indent;
173 
174     /// \brief The position of the last space on each level.
175     ///
176     /// Used e.g. to break like:
177     /// functionCall(Parameter, otherCall(
178     ///                             OtherParameter));
179     unsigned LastSpace;
180 
181     /// \brief The position the first "<<" operator encountered on each level.
182     ///
183     /// Used to align "<<" operators. 0 if no such operator has been encountered
184     /// on a level.
185     unsigned FirstLessLess;
186 
187     /// \brief Whether a newline needs to be inserted before the block's closing
188     /// brace.
189     ///
190     /// We only want to insert a newline before the closing brace if there also
191     /// was a newline after the beginning left brace.
192     bool BreakBeforeClosingBrace;
193 
194     /// \brief The column of a \c ? in a conditional expression;
195     unsigned QuestionColumn;
196 
197     /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
198     /// lines, in this context.
199     bool AvoidBinPacking;
200 
201     /// \brief Break after the next comma (or all the commas in this context if
202     /// \c AvoidBinPacking is \c true).
203     bool BreakBeforeParameter;
204 
205     /// \brief Line breaking in this context would break a formatting rule.
206     bool NoLineBreak;
207 
208     /// \brief The position of the colon in an ObjC method declaration/call.
209     unsigned ColonPos;
210 
211     /// \brief The start of the most recent function in a builder-type call.
212     unsigned StartOfFunctionCall;
213 
214     /// \brief If a nested name specifier was broken over multiple lines, this
215     /// contains the start column of the second line. Otherwise 0.
216     unsigned NestedNameSpecifierContinuation;
217 
218     /// \brief If a call expression was broken over multiple lines, this
219     /// contains the start column of the second line. Otherwise 0.
220     unsigned CallContinuation;
221 
222     /// \brief The column of the first variable name in a variable declaration.
223     ///
224     /// Used to align further variables if necessary.
225     unsigned VariablePos;
226 
227     bool operator<(const ParenState &Other) const {
228       if (Indent != Other.Indent)
229         return Indent < Other.Indent;
230       if (LastSpace != Other.LastSpace)
231         return LastSpace < Other.LastSpace;
232       if (FirstLessLess != Other.FirstLessLess)
233         return FirstLessLess < Other.FirstLessLess;
234       if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
235         return BreakBeforeClosingBrace;
236       if (QuestionColumn != Other.QuestionColumn)
237         return QuestionColumn < Other.QuestionColumn;
238       if (AvoidBinPacking != Other.AvoidBinPacking)
239         return AvoidBinPacking;
240       if (BreakBeforeParameter != Other.BreakBeforeParameter)
241         return BreakBeforeParameter;
242       if (NoLineBreak != Other.NoLineBreak)
243         return NoLineBreak;
244       if (ColonPos != Other.ColonPos)
245         return ColonPos < Other.ColonPos;
246       if (StartOfFunctionCall != Other.StartOfFunctionCall)
247         return StartOfFunctionCall < Other.StartOfFunctionCall;
248       if (NestedNameSpecifierContinuation !=
249           Other.NestedNameSpecifierContinuation)
250         return NestedNameSpecifierContinuation <
251                Other.NestedNameSpecifierContinuation;
252       if (CallContinuation != Other.CallContinuation)
253         return CallContinuation < Other.CallContinuation;
254       if (VariablePos != Other.VariablePos)
255         return VariablePos < Other.VariablePos;
256       return false;
257     }
258   };
259 
260   /// \brief The current state when indenting a unwrapped line.
261   ///
262   /// As the indenting tries different combinations this is copied by value.
263   struct LineState {
264     /// \brief The number of used columns in the current line.
265     unsigned Column;
266 
267     /// \brief The token that needs to be next formatted.
268     const AnnotatedToken *NextToken;
269 
270     /// \brief \c true if this line contains a continued for-loop section.
271     bool LineContainsContinuedForLoopSection;
272 
273     /// \brief The level of nesting inside (), [], <> and {}.
274     unsigned ParenLevel;
275 
276     /// \brief The \c ParenLevel at the start of this line.
277     unsigned StartOfLineLevel;
278 
279     /// \brief The start column of the string literal, if we're in a string
280     /// literal sequence, 0 otherwise.
281     unsigned StartOfStringLiteral;
282 
283     /// \brief A stack keeping track of properties applying to parenthesis
284     /// levels.
285     std::vector<ParenState> Stack;
286 
287     /// \brief Comparison operator to be able to used \c LineState in \c map.
288     bool operator<(const LineState &Other) const {
289       if (NextToken != Other.NextToken)
290         return NextToken < Other.NextToken;
291       if (Column != Other.Column)
292         return Column < Other.Column;
293       if (LineContainsContinuedForLoopSection !=
294           Other.LineContainsContinuedForLoopSection)
295         return LineContainsContinuedForLoopSection;
296       if (ParenLevel != Other.ParenLevel)
297         return ParenLevel < Other.ParenLevel;
298       if (StartOfLineLevel != Other.StartOfLineLevel)
299         return StartOfLineLevel < Other.StartOfLineLevel;
300       if (StartOfStringLiteral != Other.StartOfStringLiteral)
301         return StartOfStringLiteral < Other.StartOfStringLiteral;
302       return Stack < Other.Stack;
303     }
304   };
305 
306   /// \brief Appends the next token to \p State and updates information
307   /// necessary for indentation.
308   ///
309   /// Puts the token on the current line if \p Newline is \c true and adds a
310   /// line break and necessary indentation otherwise.
311   ///
312   /// If \p DryRun is \c false, also creates and stores the required
313   /// \c Replacement.
314   unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
315     const AnnotatedToken &Current = *State.NextToken;
316     const AnnotatedToken &Previous = *State.NextToken->Parent;
317 
318     if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
319       State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
320                       State.NextToken->FormatTok.TokenLength;
321       if (State.NextToken->Children.empty())
322         State.NextToken = NULL;
323       else
324         State.NextToken = &State.NextToken->Children[0];
325       return 0;
326     }
327 
328     // If we are continuing an expression, we want to indent an extra 4 spaces.
329     unsigned ContinuationIndent =
330         std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
331     if (Newline) {
332       unsigned WhitespaceStartColumn = State.Column;
333       if (Current.is(tok::r_brace)) {
334         State.Column = Line.Level * 2;
335       } else if (Current.is(tok::string_literal) &&
336                  State.StartOfStringLiteral != 0) {
337         State.Column = State.StartOfStringLiteral;
338         State.Stack.back().BreakBeforeParameter = true;
339       } else if (Current.is(tok::lessless) &&
340                  State.Stack.back().FirstLessLess != 0) {
341         State.Column = State.Stack.back().FirstLessLess;
342       } else if (Previous.is(tok::coloncolon)) {
343         if (State.Stack.back().NestedNameSpecifierContinuation == 0) {
344           State.Column = ContinuationIndent;
345           State.Stack.back().NestedNameSpecifierContinuation = State.Column;
346         } else {
347           State.Column = State.Stack.back().NestedNameSpecifierContinuation;
348         }
349       } else if (Current.isOneOf(tok::period, tok::arrow)) {
350         if (State.Stack.back().CallContinuation == 0) {
351           State.Column = ContinuationIndent;
352           State.Stack.back().CallContinuation = State.Column;
353         } else {
354           State.Column = State.Stack.back().CallContinuation;
355         }
356       } else if (Current.Type == TT_ConditionalExpr) {
357         State.Column = State.Stack.back().QuestionColumn;
358       } else if (Previous.is(tok::comma) &&
359                  State.Stack.back().VariablePos != 0) {
360         State.Column = State.Stack.back().VariablePos;
361       } else if (Previous.ClosesTemplateDeclaration ||
362                  (Current.Type == TT_StartOfName && State.ParenLevel == 0)) {
363         State.Column = State.Stack.back().Indent;
364       } else if (Current.Type == TT_ObjCSelectorName) {
365         if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
366           State.Column =
367               State.Stack.back().ColonPos - Current.FormatTok.TokenLength;
368         } else {
369           State.Column = State.Stack.back().Indent;
370           State.Stack.back().ColonPos =
371               State.Column + Current.FormatTok.TokenLength;
372         }
373       } else if (Current.Type == TT_StartOfName || Previous.is(tok::equal) ||
374                  Previous.Type == TT_ObjCMethodExpr) {
375         State.Column = ContinuationIndent;
376       } else {
377         State.Column = State.Stack.back().Indent;
378         // Ensure that we fall back to indenting 4 spaces instead of just
379         // flushing continuations left.
380         if (State.Column == FirstIndent)
381           State.Column += 4;
382       }
383 
384       if (Current.is(tok::question))
385         State.Stack.back().BreakBeforeParameter = true;
386       if (Previous.isOneOf(tok::comma, tok::semi) &&
387           !State.Stack.back().AvoidBinPacking)
388         State.Stack.back().BreakBeforeParameter = false;
389 
390       if (!DryRun) {
391         unsigned NewLines = 1;
392         if (Current.Type == TT_LineComment)
393           NewLines =
394               std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
395                                           Style.MaxEmptyLinesToKeep + 1));
396         if (!Line.InPPDirective)
397           Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
398                                         WhitespaceStartColumn);
399         else
400           Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
401                                           WhitespaceStartColumn);
402       }
403 
404       State.Stack.back().LastSpace = State.Column;
405       State.StartOfLineLevel = State.ParenLevel;
406 
407       // Any break on this level means that the parent level has been broken
408       // and we need to avoid bin packing there.
409       for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
410         State.Stack[i].BreakBeforeParameter = true;
411       }
412       const AnnotatedToken *TokenBefore = Current.getPreviousNoneComment();
413       if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) &&
414           !TokenBefore->opensScope())
415         State.Stack.back().BreakBeforeParameter = true;
416 
417       // If we break after {, we should also break before the corresponding }.
418       if (Previous.is(tok::l_brace))
419         State.Stack.back().BreakBeforeClosingBrace = true;
420 
421       if (State.Stack.back().AvoidBinPacking) {
422         // If we are breaking after '(', '{', '<', this is not bin packing
423         // unless AllowAllParametersOfDeclarationOnNextLine is false.
424         if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) ||
425             (!Style.AllowAllParametersOfDeclarationOnNextLine &&
426              Line.MustBeDeclaration))
427           State.Stack.back().BreakBeforeParameter = true;
428       }
429     } else {
430       if (Current.is(tok::equal) &&
431           (RootToken.is(tok::kw_for) || State.ParenLevel == 0) &&
432           State.Stack.back().VariablePos == 0) {
433         State.Stack.back().VariablePos = State.Column;
434         // Move over * and & if they are bound to the variable name.
435         const AnnotatedToken *Tok = &Previous;
436         while (Tok &&
437                State.Stack.back().VariablePos >= Tok->FormatTok.TokenLength) {
438           State.Stack.back().VariablePos -= Tok->FormatTok.TokenLength;
439           if (Tok->SpacesRequiredBefore != 0)
440             break;
441           Tok = Tok->Parent;
442         }
443         if (Previous.PartOfMultiVariableDeclStmt)
444           State.Stack.back().LastSpace = State.Stack.back().VariablePos;
445       }
446 
447       unsigned Spaces = State.NextToken->SpacesRequiredBefore;
448 
449       if (!DryRun)
450         Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column);
451 
452       if (Current.Type == TT_ObjCSelectorName &&
453           State.Stack.back().ColonPos == 0) {
454         if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
455             State.Column + Spaces + Current.FormatTok.TokenLength)
456           State.Stack.back().ColonPos =
457               State.Stack.back().Indent + Current.LongestObjCSelectorName;
458         else
459           State.Stack.back().ColonPos =
460               State.Column + Spaces + Current.FormatTok.TokenLength;
461       }
462 
463       if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
464           Current.Type != TT_LineComment)
465         State.Stack.back().Indent = State.Column + Spaces;
466       if (Previous.is(tok::comma) && !Current.isTrailingComment() &&
467           State.Stack.back().AvoidBinPacking)
468         State.Stack.back().NoLineBreak = true;
469 
470       State.Column += Spaces;
471       if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for))
472         // Treat the condition inside an if as if it was a second function
473         // parameter, i.e. let nested calls have an indent of 4.
474         State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
475       else if (Previous.is(tok::comma))
476         State.Stack.back().LastSpace = State.Column;
477       else if ((Previous.Type == TT_BinaryOperator ||
478                 Previous.Type == TT_ConditionalExpr ||
479                 Previous.Type == TT_CtorInitializerColon) &&
480                getPrecedence(Previous) != prec::Assignment)
481         State.Stack.back().LastSpace = State.Column;
482       else if (Previous.Type == TT_InheritanceColon)
483         State.Stack.back().Indent = State.Column;
484       else if (Previous.opensScope() && Previous.ParameterCount > 1)
485         // If this function has multiple parameters, indent nested calls from
486         // the start of the first parameter.
487         State.Stack.back().LastSpace = State.Column;
488     }
489 
490     return moveStateToNextToken(State, DryRun);
491   }
492 
493   /// \brief Mark the next token as consumed in \p State and modify its stacks
494   /// accordingly.
495   unsigned moveStateToNextToken(LineState &State, bool DryRun) {
496     const AnnotatedToken &Current = *State.NextToken;
497     assert(State.Stack.size());
498 
499     if (Current.Type == TT_InheritanceColon)
500       State.Stack.back().AvoidBinPacking = true;
501     if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
502       State.Stack.back().FirstLessLess = State.Column;
503     if (Current.is(tok::question))
504       State.Stack.back().QuestionColumn = State.Column;
505     if (Current.isOneOf(tok::period, tok::arrow) &&
506         Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
507       State.Stack.back().StartOfFunctionCall =
508           Current.LastInChainOfCalls ? 0 : State.Column;
509     if (Current.Type == TT_CtorInitializerColon) {
510       State.Stack.back().Indent = State.Column + 2;
511       if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
512         State.Stack.back().AvoidBinPacking = true;
513       State.Stack.back().BreakBeforeParameter = false;
514     }
515 
516     // If return returns a binary expression, align after it.
517     if (Current.is(tok::kw_return) && !Current.FakeLParens.empty())
518       State.Stack.back().LastSpace = State.Column + 7;
519 
520     // In ObjC method declaration we align on the ":" of parameters, but we need
521     // to ensure that we indent parameters on subsequent lines by at least 4.
522     if (Current.Type == TT_ObjCMethodSpecifier)
523       State.Stack.back().Indent += 4;
524 
525     // Insert scopes created by fake parenthesis.
526     const AnnotatedToken *Previous = Current.getPreviousNoneComment();
527     // Don't add extra indentation for the first fake parenthesis after
528     // 'return', assignements or opening <({[. The indentation for these cases
529     // is special cased.
530     bool SkipFirstExtraIndent =
531         Current.is(tok::kw_return) ||
532         (Previous && (Previous->opensScope() ||
533                       getPrecedence(*Previous) == prec::Assignment));
534     for (SmallVector<prec::Level, 4>::const_reverse_iterator
535              I = Current.FakeLParens.rbegin(),
536              E = Current.FakeLParens.rend();
537          I != E; ++I) {
538       ParenState NewParenState = State.Stack.back();
539       NewParenState.Indent =
540           std::max(std::max(State.Column, NewParenState.Indent),
541                    State.Stack.back().LastSpace);
542 
543       // Always indent conditional expressions. Never indent expression where
544       // the 'operator' is ',', ';' or an assignment (i.e. *I <=
545       // prec::Assignment) as those have different indentation rules. Indent
546       // other expression, unless the indentation needs to be skipped.
547       if (*I == prec::Conditional ||
548           (!SkipFirstExtraIndent && *I > prec::Assignment))
549         NewParenState.Indent += 4;
550       if (Previous && !Previous->opensScope())
551         NewParenState.BreakBeforeParameter = false;
552       State.Stack.push_back(NewParenState);
553       SkipFirstExtraIndent = false;
554     }
555 
556     // If we encounter an opening (, [, { or <, we add a level to our stacks to
557     // prepare for the following tokens.
558     if (Current.opensScope()) {
559       unsigned NewIndent;
560       bool AvoidBinPacking;
561       if (Current.is(tok::l_brace)) {
562         NewIndent = 2 + State.Stack.back().LastSpace;
563         AvoidBinPacking = false;
564       } else {
565         NewIndent = 4 + std::max(State.Stack.back().LastSpace,
566                                  State.Stack.back().StartOfFunctionCall);
567         AvoidBinPacking = !Style.BinPackParameters;
568       }
569       State.Stack.push_back(
570           ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
571                      State.Stack.back().NoLineBreak));
572 
573       if (Current.NoMoreTokensOnLevel && Current.FakeLParens.empty()) {
574         // This parenthesis was the last token possibly making use of Indent and
575         // LastSpace of the next higher ParenLevel. Thus, erase them to acieve
576         // better memoization results.
577         State.Stack[State.Stack.size() - 2].Indent = 0;
578         State.Stack[State.Stack.size() - 2].LastSpace = 0;
579       }
580 
581       ++State.ParenLevel;
582     }
583 
584     // If this '[' opens an ObjC call, determine whether all parameters fit into
585     // one line and put one per line if they don't.
586     if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
587         Current.MatchingParen != NULL) {
588       if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
589         State.Stack.back().BreakBeforeParameter = true;
590     }
591 
592     // If we encounter a closing ), ], } or >, we can remove a level from our
593     // stacks.
594     if (Current.isOneOf(tok::r_paren, tok::r_square) ||
595         (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
596         State.NextToken->Type == TT_TemplateCloser) {
597       State.Stack.pop_back();
598       --State.ParenLevel;
599     }
600 
601     // Remove scopes created by fake parenthesis.
602     for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
603       unsigned VariablePos = State.Stack.back().VariablePos;
604       State.Stack.pop_back();
605       State.Stack.back().VariablePos = VariablePos;
606     }
607 
608     if (Current.is(tok::string_literal)) {
609       State.StartOfStringLiteral = State.Column;
610     } else if (Current.isNot(tok::comment)) {
611       State.StartOfStringLiteral = 0;
612     }
613 
614     State.Column += Current.FormatTok.TokenLength;
615 
616     if (State.NextToken->Children.empty())
617       State.NextToken = NULL;
618     else
619       State.NextToken = &State.NextToken->Children[0];
620 
621     return breakProtrudingToken(Current, State, DryRun);
622   }
623 
624   /// \brief If the current token sticks out over the end of the line, break
625   /// it if possible.
626   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
627                                 bool DryRun) {
628     llvm::OwningPtr<BreakableToken> Token;
629     unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
630     if (Current.is(tok::string_literal)) {
631       // Only break up default narrow strings.
632       const char *LiteralData = SourceMgr.getCharacterData(
633           Current.FormatTok.getStartOfNonWhitespace());
634       if (!LiteralData || *LiteralData != '"')
635         return 0;
636 
637       Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok,
638                                              StartColumn));
639     } else if (Current.Type == TT_BlockComment) {
640       BreakableBlockComment *BBC =
641           new BreakableBlockComment(SourceMgr, Current, StartColumn);
642       if (!DryRun)
643         BBC->alignLines(Whitespaces);
644       Token.reset(BBC);
645     } else if (Current.Type == TT_LineComment) {
646       Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
647     } else {
648       return 0;
649     }
650 
651     bool BreakInserted = false;
652     unsigned Penalty = 0;
653     for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
654          ++LineIndex) {
655       unsigned TailOffset = 0;
656       unsigned RemainingLength =
657           Token->getLineLengthAfterSplit(LineIndex, TailOffset);
658       while (RemainingLength > getColumnLimit()) {
659         BreakableToken::Split Split =
660             Token->getSplit(LineIndex, TailOffset, getColumnLimit());
661         if (Split.first == StringRef::npos)
662           break;
663         assert(Split.first != 0);
664         unsigned NewRemainingLength = Token->getLineLengthAfterSplit(
665             LineIndex, TailOffset + Split.first + Split.second);
666         if (NewRemainingLength >= RemainingLength)
667           break;
668         if (!DryRun) {
669           Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
670                              Whitespaces);
671         }
672         TailOffset += Split.first + Split.second;
673         RemainingLength = NewRemainingLength;
674         Penalty += Style.PenaltyExcessCharacter;
675         BreakInserted = true;
676       }
677       State.Column = RemainingLength;
678       if (!DryRun) {
679         Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
680       }
681     }
682 
683     if (BreakInserted) {
684       for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
685         State.Stack[i].BreakBeforeParameter = true;
686       State.Stack.back().LastSpace = StartColumn;
687     }
688     return Penalty;
689   }
690 
691   unsigned getColumnLimit() {
692     // In preprocessor directives reserve two chars for trailing " \"
693     return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
694   }
695 
696   /// \brief An edge in the solution space from \c Previous->State to \c State,
697   /// inserting a newline dependent on the \c NewLine.
698   struct StateNode {
699     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
700         : State(State), NewLine(NewLine), Previous(Previous) {}
701     LineState State;
702     bool NewLine;
703     StateNode *Previous;
704   };
705 
706   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
707   ///
708   /// In case of equal penalties, we want to prefer states that were inserted
709   /// first. During state generation we make sure that we insert states first
710   /// that break the line as late as possible.
711   typedef std::pair<unsigned, unsigned> OrderedPenalty;
712 
713   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
714   /// \c State has the given \c OrderedPenalty.
715   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
716 
717   /// \brief The BFS queue type.
718   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
719                               std::greater<QueueItem> > QueueType;
720 
721   /// \brief Analyze the entire solution space starting from \p InitialState.
722   ///
723   /// This implements a variant of Dijkstra's algorithm on the graph that spans
724   /// the solution space (\c LineStates are the nodes). The algorithm tries to
725   /// find the shortest path (the one with lowest penalty) from \p InitialState
726   /// to a state where all tokens are placed.
727   unsigned analyzeSolutionSpace(LineState &InitialState) {
728     std::set<LineState> Seen;
729 
730     // Insert start element into queue.
731     StateNode *Node =
732         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
733     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
734     ++Count;
735 
736     // While not empty, take first element and follow edges.
737     while (!Queue.empty()) {
738       unsigned Penalty = Queue.top().first.first;
739       StateNode *Node = Queue.top().second;
740       if (Node->State.NextToken == NULL) {
741         DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
742         break;
743       }
744       Queue.pop();
745 
746       if (!Seen.insert(Node->State).second)
747         // State already examined with lower penalty.
748         continue;
749 
750       addNextStateToQueue(Penalty, Node, /*NewLine=*/ false);
751       addNextStateToQueue(Penalty, Node, /*NewLine=*/ true);
752     }
753 
754     if (Queue.empty())
755       // We were unable to find a solution, do nothing.
756       // FIXME: Add diagnostic?
757       return 0;
758 
759     // Reconstruct the solution.
760     reconstructPath(InitialState, Queue.top().second);
761     DEBUG(llvm::errs() << "---\n");
762 
763     // Return the column after the last token of the solution.
764     return Queue.top().second->State.Column;
765   }
766 
767   void reconstructPath(LineState &State, StateNode *Current) {
768     // FIXME: This recursive implementation limits the possible number
769     // of tokens per line if compiled into a binary with small stack space.
770     // To become more independent of stack frame limitations we would need
771     // to also change the TokenAnnotator.
772     if (Current->Previous == NULL)
773       return;
774     reconstructPath(State, Current->Previous);
775     DEBUG({
776       if (Current->NewLine) {
777         llvm::errs()
778             << "Penalty for splitting before "
779             << Current->Previous->State.NextToken->FormatTok.Tok.getName()
780             << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n";
781       }
782     });
783     addTokenToState(Current->NewLine, false, State);
784   }
785 
786   /// \brief Add the following state to the analysis queue \c Queue.
787   ///
788   /// Assume the current state is \p PreviousNode and has been reached with a
789   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
790   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
791                            bool NewLine) {
792     if (NewLine && !canBreak(PreviousNode->State))
793       return;
794     if (!NewLine && mustBreak(PreviousNode->State))
795       return;
796     if (NewLine)
797       Penalty += PreviousNode->State.NextToken->SplitPenalty;
798 
799     StateNode *Node = new (Allocator.Allocate())
800         StateNode(PreviousNode->State, NewLine, PreviousNode);
801     Penalty += addTokenToState(NewLine, true, Node->State);
802     if (Node->State.Column > getColumnLimit()) {
803       unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
804       Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
805     }
806 
807     Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
808     ++Count;
809   }
810 
811   /// \brief Returns \c true, if a line break after \p State is allowed.
812   bool canBreak(const LineState &State) {
813     if (!State.NextToken->CanBreakBefore &&
814         !(State.NextToken->is(tok::r_brace) &&
815           State.Stack.back().BreakBeforeClosingBrace))
816       return false;
817     return !State.Stack.back().NoLineBreak;
818   }
819 
820   /// \brief Returns \c true, if a line break after \p State is mandatory.
821   bool mustBreak(const LineState &State) {
822     if (State.NextToken->MustBreakBefore)
823       return true;
824     if (State.NextToken->is(tok::r_brace) &&
825         State.Stack.back().BreakBeforeClosingBrace)
826       return true;
827     if (State.NextToken->Parent->is(tok::semi) &&
828         State.LineContainsContinuedForLoopSection)
829       return true;
830     if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) ||
831          State.NextToken->is(tok::question) ||
832          State.NextToken->Type == TT_ConditionalExpr) &&
833         State.Stack.back().BreakBeforeParameter &&
834         !State.NextToken->isTrailingComment() &&
835         State.NextToken->isNot(tok::r_paren) &&
836         State.NextToken->isNot(tok::r_brace))
837       return true;
838     // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
839     // out whether it is the first parameter. Clean this up.
840     if (State.NextToken->Type == TT_ObjCSelectorName &&
841         State.NextToken->LongestObjCSelectorName == 0 &&
842         State.Stack.back().BreakBeforeParameter)
843       return true;
844     if ((State.NextToken->Type == TT_CtorInitializerColon ||
845          (State.NextToken->Parent->ClosesTemplateDeclaration &&
846           State.ParenLevel == 0)))
847       return true;
848     if (State.NextToken->Type == TT_InlineASMColon)
849       return true;
850     // This prevents breaks like:
851     //   ...
852     //   SomeParameter, OtherParameter).DoSomething(
853     //   ...
854     // As they hide "DoSomething" and generally bad for readability.
855     if (State.NextToken->isOneOf(tok::period, tok::arrow) &&
856         getRemainingLength(State) + State.Column > getColumnLimit() &&
857         State.ParenLevel < State.StartOfLineLevel)
858       return true;
859     return false;
860   }
861 
862   // Returns the total number of columns required for the remaining tokens.
863   unsigned getRemainingLength(const LineState &State) {
864     if (State.NextToken && State.NextToken->Parent)
865       return Line.Last->TotalLength - State.NextToken->Parent->TotalLength;
866     return 0;
867   }
868 
869   FormatStyle Style;
870   SourceManager &SourceMgr;
871   const AnnotatedLine &Line;
872   const unsigned FirstIndent;
873   const AnnotatedToken &RootToken;
874   WhitespaceManager &Whitespaces;
875 
876   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
877   QueueType Queue;
878   // Increasing count of \c StateNode items we have created. This is used
879   // to create a deterministic order independent of the container.
880   unsigned Count;
881 };
882 
883 class LexerBasedFormatTokenSource : public FormatTokenSource {
884 public:
885   LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
886       : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
887         IdentTable(Lex.getLangOpts()) {
888     Lex.SetKeepWhitespaceMode(true);
889   }
890 
891   virtual FormatToken getNextToken() {
892     if (GreaterStashed) {
893       FormatTok.NewlinesBefore = 0;
894       FormatTok.WhiteSpaceStart =
895           FormatTok.Tok.getLocation().getLocWithOffset(1);
896       FormatTok.WhiteSpaceLength = 0;
897       GreaterStashed = false;
898       return FormatTok;
899     }
900 
901     FormatTok = FormatToken();
902     Lex.LexFromRawLexer(FormatTok.Tok);
903     StringRef Text = rawTokenText(FormatTok.Tok);
904     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
905     if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
906       FormatTok.IsFirst = true;
907 
908     // Consume and record whitespace until we find a significant token.
909     while (FormatTok.Tok.is(tok::unknown)) {
910       unsigned Newlines = Text.count('\n');
911       if (Newlines > 0)
912         FormatTok.LastNewlineOffset =
913             FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1;
914       unsigned EscapedNewlines = Text.count("\\\n");
915       FormatTok.NewlinesBefore += Newlines;
916       FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
917       FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
918 
919       if (FormatTok.Tok.is(tok::eof))
920         return FormatTok;
921       Lex.LexFromRawLexer(FormatTok.Tok);
922       Text = rawTokenText(FormatTok.Tok);
923     }
924 
925     // Now FormatTok is the next non-whitespace token.
926     FormatTok.TokenLength = Text.size();
927 
928     if (FormatTok.Tok.is(tok::comment)) {
929       FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size();
930       FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength;
931     }
932 
933     // In case the token starts with escaped newlines, we want to
934     // take them into account as whitespace - this pattern is quite frequent
935     // in macro definitions.
936     // FIXME: What do we want to do with other escaped spaces, and escaped
937     // spaces or newlines in the middle of tokens?
938     // FIXME: Add a more explicit test.
939     unsigned i = 0;
940     while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
941       // FIXME: ++FormatTok.NewlinesBefore is missing...
942       FormatTok.WhiteSpaceLength += 2;
943       FormatTok.TokenLength -= 2;
944       i += 2;
945     }
946 
947     if (FormatTok.Tok.is(tok::raw_identifier)) {
948       IdentifierInfo &Info = IdentTable.get(Text);
949       FormatTok.Tok.setIdentifierInfo(&Info);
950       FormatTok.Tok.setKind(Info.getTokenID());
951     }
952 
953     if (FormatTok.Tok.is(tok::greatergreater)) {
954       FormatTok.Tok.setKind(tok::greater);
955       FormatTok.TokenLength = 1;
956       GreaterStashed = true;
957     }
958 
959     return FormatTok;
960   }
961 
962   IdentifierTable &getIdentTable() { return IdentTable; }
963 
964 private:
965   FormatToken FormatTok;
966   bool GreaterStashed;
967   Lexer &Lex;
968   SourceManager &SourceMgr;
969   IdentifierTable IdentTable;
970 
971   /// Returns the text of \c FormatTok.
972   StringRef rawTokenText(Token &Tok) {
973     return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
974                      Tok.getLength());
975   }
976 };
977 
978 class Formatter : public UnwrappedLineConsumer {
979 public:
980   Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
981             SourceManager &SourceMgr,
982             const std::vector<CharSourceRange> &Ranges)
983       : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
984         Whitespaces(SourceMgr, Style), Ranges(Ranges) {}
985 
986   virtual ~Formatter() {}
987 
988   tooling::Replacements format() {
989     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
990     UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
991     bool StructuralError = Parser.parse();
992     unsigned PreviousEndOfLineColumn = 0;
993     TokenAnnotator Annotator(Style, SourceMgr, Lex,
994                              Tokens.getIdentTable().get("in"));
995     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
996       Annotator.annotate(AnnotatedLines[i]);
997     }
998     deriveLocalStyle();
999     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1000       Annotator.calculateFormattingInformation(AnnotatedLines[i]);
1001     }
1002 
1003     // Adapt level to the next line if this is a comment.
1004     // FIXME: Can/should this be done in the UnwrappedLineParser?
1005     const AnnotatedLine *NextNoneCommentLine = NULL;
1006     for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) {
1007       if (NextNoneCommentLine && AnnotatedLines[i].First.is(tok::comment) &&
1008           AnnotatedLines[i].First.Children.empty())
1009         AnnotatedLines[i].Level = NextNoneCommentLine->Level;
1010       else
1011         NextNoneCommentLine =
1012             AnnotatedLines[i].First.isNot(tok::r_brace) ? &AnnotatedLines[i]
1013                                                         : NULL;
1014     }
1015 
1016     std::vector<int> IndentForLevel;
1017     bool PreviousLineWasTouched = false;
1018     const AnnotatedToken *PreviousLineLastToken = 0;
1019     for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1020                                               E = AnnotatedLines.end();
1021          I != E; ++I) {
1022       const AnnotatedLine &TheLine = *I;
1023       const FormatToken &FirstTok = TheLine.First.FormatTok;
1024       int Offset = getIndentOffset(TheLine.First);
1025       while (IndentForLevel.size() <= TheLine.Level)
1026         IndentForLevel.push_back(-1);
1027       IndentForLevel.resize(TheLine.Level + 1);
1028       bool WasMoved = PreviousLineWasTouched && FirstTok.NewlinesBefore == 0;
1029       if (TheLine.First.is(tok::eof)) {
1030         if (PreviousLineWasTouched) {
1031           unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u);
1032           Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
1033                                         /*WhitespaceStartColumn*/ 0);
1034         }
1035       } else if (TheLine.Type != LT_Invalid &&
1036                  (WasMoved || touchesLine(TheLine))) {
1037         unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
1038         unsigned Indent = LevelIndent;
1039         if (static_cast<int>(Indent) + Offset >= 0)
1040           Indent += Offset;
1041         if (FirstTok.WhiteSpaceStart.isValid() &&
1042             // Insert a break even if there is a structural error in case where
1043             // we break apart a line consisting of multiple unwrapped lines.
1044             (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
1045           formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
1046                            TheLine.InPPDirective, PreviousEndOfLineColumn);
1047         } else {
1048           Indent = LevelIndent =
1049               SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
1050         }
1051         tryFitMultipleLinesInOne(Indent, I, E);
1052         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1053                                          TheLine.First, Whitespaces);
1054         PreviousEndOfLineColumn =
1055             Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
1056         IndentForLevel[TheLine.Level] = LevelIndent;
1057         PreviousLineWasTouched = true;
1058       } else {
1059         if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) {
1060           unsigned Indent =
1061               SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
1062           unsigned LevelIndent = Indent;
1063           if (static_cast<int>(LevelIndent) - Offset >= 0)
1064             LevelIndent -= Offset;
1065           if (TheLine.First.isNot(tok::comment))
1066             IndentForLevel[TheLine.Level] = LevelIndent;
1067 
1068           // Remove trailing whitespace of the previous line if it was touched.
1069           if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
1070             formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
1071                              TheLine.InPPDirective, PreviousEndOfLineColumn);
1072         }
1073         // If we did not reformat this unwrapped line, the column at the end of
1074         // the last token is unchanged - thus, we can calculate the end of the
1075         // last token.
1076         SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation();
1077         PreviousEndOfLineColumn =
1078             SourceMgr.getSpellingColumnNumber(LastLoc) +
1079             Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1;
1080         PreviousLineWasTouched = false;
1081         if (TheLine.Last->is(tok::comment))
1082           Whitespaces.addUntouchableComment(SourceMgr.getSpellingColumnNumber(
1083               TheLine.Last->FormatTok.Tok.getLocation()) - 1);
1084         else
1085           Whitespaces.alignComments();
1086       }
1087       PreviousLineLastToken = I->Last;
1088     }
1089     return Whitespaces.generateReplacements();
1090   }
1091 
1092 private:
1093   void deriveLocalStyle() {
1094     unsigned CountBoundToVariable = 0;
1095     unsigned CountBoundToType = 0;
1096     bool HasCpp03IncompatibleFormat = false;
1097     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1098       if (AnnotatedLines[i].First.Children.empty())
1099         continue;
1100       AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
1101       while (!Tok->Children.empty()) {
1102         if (Tok->Type == TT_PointerOrReference) {
1103           bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
1104           bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
1105           if (SpacesBefore && !SpacesAfter)
1106             ++CountBoundToVariable;
1107           else if (!SpacesBefore && SpacesAfter)
1108             ++CountBoundToType;
1109         }
1110 
1111         if (Tok->Type == TT_TemplateCloser &&
1112             Tok->Parent->Type == TT_TemplateCloser &&
1113             Tok->FormatTok.WhiteSpaceLength == 0)
1114           HasCpp03IncompatibleFormat = true;
1115         Tok = &Tok->Children[0];
1116       }
1117     }
1118     if (Style.DerivePointerBinding) {
1119       if (CountBoundToType > CountBoundToVariable)
1120         Style.PointerBindsToType = true;
1121       else if (CountBoundToType < CountBoundToVariable)
1122         Style.PointerBindsToType = false;
1123     }
1124     if (Style.Standard == FormatStyle::LS_Auto) {
1125       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1126                                                   : FormatStyle::LS_Cpp03;
1127     }
1128   }
1129 
1130   /// \brief Get the indent of \p Level from \p IndentForLevel.
1131   ///
1132   /// \p IndentForLevel must contain the indent for the level \c l
1133   /// at \p IndentForLevel[l], or a value < 0 if the indent for
1134   /// that level is unknown.
1135   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1136     if (IndentForLevel[Level] != -1)
1137       return IndentForLevel[Level];
1138     if (Level == 0)
1139       return 0;
1140     return getIndent(IndentForLevel, Level - 1) + 2;
1141   }
1142 
1143   /// \brief Get the offset of the line relatively to the level.
1144   ///
1145   /// For example, 'public:' labels in classes are offset by 1 or 2
1146   /// characters to the left from their level.
1147   int getIndentOffset(const AnnotatedToken &RootToken) {
1148     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
1149       return Style.AccessModifierOffset;
1150     return 0;
1151   }
1152 
1153   /// \brief Tries to merge lines into one.
1154   ///
1155   /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1156   /// if possible; note that \c I will be incremented when lines are merged.
1157   ///
1158   /// Returns whether the resulting \c Line can fit in a single line.
1159   void tryFitMultipleLinesInOne(unsigned Indent,
1160                                 std::vector<AnnotatedLine>::iterator &I,
1161                                 std::vector<AnnotatedLine>::iterator E) {
1162     // We can never merge stuff if there are trailing line comments.
1163     if (I->Last->Type == TT_LineComment)
1164       return;
1165 
1166     unsigned Limit = Style.ColumnLimit - Indent;
1167     // If we already exceed the column limit, we set 'Limit' to 0. The different
1168     // tryMerge..() functions can then decide whether to still do merging.
1169     Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
1170 
1171     if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1172       return;
1173 
1174     if (I->Last->is(tok::l_brace)) {
1175       tryMergeSimpleBlock(I, E, Limit);
1176     } else if (I->First.is(tok::kw_if)) {
1177       tryMergeSimpleIf(I, E, Limit);
1178     } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
1179                                     I->First.FormatTok.IsFirst)) {
1180       tryMergeSimplePPDirective(I, E, Limit);
1181     }
1182     return;
1183   }
1184 
1185   void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1186                                  std::vector<AnnotatedLine>::iterator E,
1187                                  unsigned Limit) {
1188     if (Limit == 0)
1189       return;
1190     AnnotatedLine &Line = *I;
1191     if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
1192       return;
1193     if (I + 2 != E && (I + 2)->InPPDirective &&
1194         !(I + 2)->First.FormatTok.HasUnescapedNewline)
1195       return;
1196     if (1 + (I + 1)->Last->TotalLength > Limit)
1197       return;
1198     join(Line, *(++I));
1199   }
1200 
1201   void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
1202                         std::vector<AnnotatedLine>::iterator E,
1203                         unsigned Limit) {
1204     if (Limit == 0)
1205       return;
1206     if (!Style.AllowShortIfStatementsOnASingleLine)
1207       return;
1208     if ((I + 1)->InPPDirective != I->InPPDirective ||
1209         ((I + 1)->InPPDirective &&
1210          (I + 1)->First.FormatTok.HasUnescapedNewline))
1211       return;
1212     AnnotatedLine &Line = *I;
1213     if (Line.Last->isNot(tok::r_paren))
1214       return;
1215     if (1 + (I + 1)->Last->TotalLength > Limit)
1216       return;
1217     if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
1218       return;
1219     // Only inline simple if's (no nested if or else).
1220     if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
1221       return;
1222     join(Line, *(++I));
1223   }
1224 
1225   void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1226                            std::vector<AnnotatedLine>::iterator E,
1227                            unsigned Limit) {
1228     // First, check that the current line allows merging. This is the case if
1229     // we're not in a control flow statement and the last token is an opening
1230     // brace.
1231     AnnotatedLine &Line = *I;
1232     if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1233                            tok::kw_else, tok::kw_try, tok::kw_catch,
1234                            tok::kw_for,
1235                            // This gets rid of all ObjC @ keywords and methods.
1236                            tok::at, tok::minus, tok::plus))
1237       return;
1238 
1239     AnnotatedToken *Tok = &(I + 1)->First;
1240     if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
1241         !Tok->MustBreakBefore) {
1242       // We merge empty blocks even if the line exceeds the column limit.
1243       Tok->SpacesRequiredBefore = 0;
1244       Tok->CanBreakBefore = true;
1245       join(Line, *(I + 1));
1246       I += 1;
1247     } else if (Limit != 0) {
1248       // Check that we still have three lines and they fit into the limit.
1249       if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1250           !nextTwoLinesFitInto(I, Limit))
1251         return;
1252 
1253       // Second, check that the next line does not contain any braces - if it
1254       // does, readability declines when putting it into a single line.
1255       if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1256         return;
1257       do {
1258         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1259           return;
1260         Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
1261       } while (Tok != NULL);
1262 
1263       // Last, check that the third line contains a single closing brace.
1264       Tok = &(I + 2)->First;
1265       if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
1266           Tok->MustBreakBefore)
1267         return;
1268 
1269       join(Line, *(I + 1));
1270       join(Line, *(I + 2));
1271       I += 2;
1272     }
1273   }
1274 
1275   bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1276                            unsigned Limit) {
1277     return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1278            Limit;
1279   }
1280 
1281   void join(AnnotatedLine &A, const AnnotatedLine &B) {
1282     unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore;
1283     A.Last->Children.push_back(B.First);
1284     while (!A.Last->Children.empty()) {
1285       A.Last->Children[0].Parent = A.Last;
1286       A.Last->Children[0].TotalLength += LengthA;
1287       A.Last = &A.Last->Children[0];
1288     }
1289   }
1290 
1291   bool touchesRanges(const CharSourceRange &Range) {
1292     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1293       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1294                                                Ranges[i].getBegin()) &&
1295           !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1296                                                Range.getBegin()))
1297         return true;
1298     }
1299     return false;
1300   }
1301 
1302   bool touchesLine(const AnnotatedLine &TheLine) {
1303     const FormatToken *First = &TheLine.First.FormatTok;
1304     const FormatToken *Last = &TheLine.Last->FormatTok;
1305     CharSourceRange LineRange = CharSourceRange::getTokenRange(
1306         First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset),
1307         Last->Tok.getLocation());
1308     return touchesRanges(LineRange);
1309   }
1310 
1311   bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1312     const FormatToken *First = &TheLine.First.FormatTok;
1313     CharSourceRange LineRange = CharSourceRange::getCharRange(
1314         First->WhiteSpaceStart,
1315         First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset));
1316     return touchesRanges(LineRange);
1317   }
1318 
1319   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1320     AnnotatedLines.push_back(AnnotatedLine(TheLine));
1321   }
1322 
1323   /// \brief Add a new line and the required indent before the first Token
1324   /// of the \c UnwrappedLine if there was no structural parsing error.
1325   /// Returns the indent level of the \c UnwrappedLine.
1326   void formatFirstToken(const AnnotatedToken &RootToken,
1327                         const AnnotatedToken *PreviousToken, unsigned Indent,
1328                         bool InPPDirective, unsigned PreviousEndOfLineColumn) {
1329     const FormatToken &Tok = RootToken.FormatTok;
1330 
1331     unsigned Newlines =
1332         std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1333     if (Newlines == 0 && !Tok.IsFirst)
1334       Newlines = 1;
1335 
1336     if (!InPPDirective || Tok.HasUnescapedNewline) {
1337       // Insert extra new line before access specifiers.
1338       if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
1339           RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1)
1340         ++Newlines;
1341 
1342       Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0);
1343     } else {
1344       Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
1345                                       PreviousEndOfLineColumn);
1346     }
1347   }
1348 
1349   DiagnosticsEngine &Diag;
1350   FormatStyle Style;
1351   Lexer &Lex;
1352   SourceManager &SourceMgr;
1353   WhitespaceManager Whitespaces;
1354   std::vector<CharSourceRange> Ranges;
1355   std::vector<AnnotatedLine> AnnotatedLines;
1356 };
1357 
1358 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1359                                SourceManager &SourceMgr,
1360                                std::vector<CharSourceRange> Ranges,
1361                                DiagnosticConsumer *DiagClient) {
1362   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
1363   OwningPtr<DiagnosticConsumer> DiagPrinter;
1364   if (DiagClient == 0) {
1365     DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
1366     DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
1367     DiagClient = DiagPrinter.get();
1368   }
1369   DiagnosticsEngine Diagnostics(
1370       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
1371       DiagClient, false);
1372   Diagnostics.setSourceManager(&SourceMgr);
1373   Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
1374   return formatter.format();
1375 }
1376 
1377 LangOptions getFormattingLangOpts() {
1378   LangOptions LangOpts;
1379   LangOpts.CPlusPlus = 1;
1380   LangOpts.CPlusPlus11 = 1;
1381   LangOpts.LineComment = 1;
1382   LangOpts.Bool = 1;
1383   LangOpts.ObjC1 = 1;
1384   LangOpts.ObjC2 = 1;
1385   return LangOpts;
1386 }
1387 
1388 } // namespace format
1389 } // namespace clang
1390