1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "BreakableToken.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/OperatorPrecedence.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Format/Format.h"
26 #include "clang/Lex/Lexer.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/Support/Allocator.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33 
34 namespace llvm {
35 namespace yaml {
36 template <>
37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
38   static void enumeration(IO &IO,
39                           clang::format::FormatStyle::LanguageStandard &Value) {
40     IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
41     IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
42     IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
43   }
44 };
45 
46 template <>
47 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
48   static void
49   enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
50     IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
51     IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
52     IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
53     IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
54   }
55 };
56 
57 template <>
58 struct ScalarEnumerationTraits<
59     clang::format::FormatStyle::NamespaceIndentationKind> {
60   static void
61   enumeration(IO &IO,
62               clang::format::FormatStyle::NamespaceIndentationKind &Value) {
63     IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
64     IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
65     IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
66   }
67 };
68 
69 template <> struct MappingTraits<clang::format::FormatStyle> {
70   static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
71     if (IO.outputting()) {
72       StringRef StylesArray[] = { "LLVM", "Google", "Chromium", "Mozilla" };
73       ArrayRef<StringRef> Styles(StylesArray);
74       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
75         StringRef StyleName(Styles[i]);
76         clang::format::FormatStyle PredefinedStyle;
77         if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
78             Style == PredefinedStyle) {
79           IO.mapOptional("# BasedOnStyle", StyleName);
80           break;
81         }
82       }
83     } else {
84       StringRef BasedOnStyle;
85       IO.mapOptional("BasedOnStyle", BasedOnStyle);
86       if (!BasedOnStyle.empty())
87         if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
88           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
89           return;
90         }
91     }
92 
93     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
94     IO.mapOptional("ConstructorInitializerIndentWidth",
95                    Style.ConstructorInitializerIndentWidth);
96     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
97     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
98     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
99                    Style.AllowAllParametersOfDeclarationOnNextLine);
100     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
101                    Style.AllowShortIfStatementsOnASingleLine);
102     IO.mapOptional("AllowShortLoopsOnASingleLine",
103                    Style.AllowShortLoopsOnASingleLine);
104     IO.mapOptional("AlwaysBreakTemplateDeclarations",
105                    Style.AlwaysBreakTemplateDeclarations);
106     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
107                    Style.AlwaysBreakBeforeMultilineStrings);
108     IO.mapOptional("BreakBeforeBinaryOperators",
109                    Style.BreakBeforeBinaryOperators);
110     IO.mapOptional("BreakConstructorInitializersBeforeComma",
111                    Style.BreakConstructorInitializersBeforeComma);
112     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
113     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
114     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
115                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
116     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
117     IO.mapOptional("ExperimentalAutoDetectBinPacking",
118                    Style.ExperimentalAutoDetectBinPacking);
119     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
120     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
121     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
122     IO.mapOptional("ObjCSpaceBeforeProtocolList",
123                    Style.ObjCSpaceBeforeProtocolList);
124     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
125     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
126     IO.mapOptional("PenaltyBreakFirstLessLess",
127                    Style.PenaltyBreakFirstLessLess);
128     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
129     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
130                    Style.PenaltyReturnTypeOnItsOwnLine);
131     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
132     IO.mapOptional("SpacesBeforeTrailingComments",
133                    Style.SpacesBeforeTrailingComments);
134     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
135     IO.mapOptional("Standard", Style.Standard);
136     IO.mapOptional("IndentWidth", Style.IndentWidth);
137     IO.mapOptional("UseTab", Style.UseTab);
138     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
139     IO.mapOptional("IndentFunctionDeclarationAfterType",
140                    Style.IndentFunctionDeclarationAfterType);
141   }
142 };
143 }
144 }
145 
146 namespace clang {
147 namespace format {
148 
149 void setDefaultPenalties(FormatStyle &Style) {
150   Style.PenaltyBreakComment = 45;
151   Style.PenaltyBreakFirstLessLess = 120;
152   Style.PenaltyBreakString = 1000;
153   Style.PenaltyExcessCharacter = 1000000;
154 }
155 
156 FormatStyle getLLVMStyle() {
157   FormatStyle LLVMStyle;
158   LLVMStyle.AccessModifierOffset = -2;
159   LLVMStyle.AlignEscapedNewlinesLeft = false;
160   LLVMStyle.AlignTrailingComments = true;
161   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
162   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
163   LLVMStyle.AllowShortLoopsOnASingleLine = false;
164   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
165   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
166   LLVMStyle.BinPackParameters = true;
167   LLVMStyle.BreakBeforeBinaryOperators = false;
168   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
169   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
170   LLVMStyle.ColumnLimit = 80;
171   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
172   LLVMStyle.ConstructorInitializerIndentWidth = 4;
173   LLVMStyle.Cpp11BracedListStyle = false;
174   LLVMStyle.DerivePointerBinding = false;
175   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
176   LLVMStyle.IndentCaseLabels = false;
177   LLVMStyle.IndentFunctionDeclarationAfterType = false;
178   LLVMStyle.IndentWidth = 2;
179   LLVMStyle.MaxEmptyLinesToKeep = 1;
180   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
181   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
182   LLVMStyle.PointerBindsToType = false;
183   LLVMStyle.SpacesBeforeTrailingComments = 1;
184   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
185   LLVMStyle.UseTab = false;
186 
187   setDefaultPenalties(LLVMStyle);
188   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
189 
190   return LLVMStyle;
191 }
192 
193 FormatStyle getGoogleStyle() {
194   FormatStyle GoogleStyle;
195   GoogleStyle.AccessModifierOffset = -1;
196   GoogleStyle.AlignEscapedNewlinesLeft = true;
197   GoogleStyle.AlignTrailingComments = true;
198   GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
199   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
200   GoogleStyle.AllowShortLoopsOnASingleLine = true;
201   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
202   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
203   GoogleStyle.BinPackParameters = true;
204   GoogleStyle.BreakBeforeBinaryOperators = false;
205   GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
206   GoogleStyle.BreakConstructorInitializersBeforeComma = false;
207   GoogleStyle.ColumnLimit = 80;
208   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
209   GoogleStyle.ConstructorInitializerIndentWidth = 4;
210   GoogleStyle.Cpp11BracedListStyle = true;
211   GoogleStyle.DerivePointerBinding = true;
212   GoogleStyle.ExperimentalAutoDetectBinPacking = false;
213   GoogleStyle.IndentCaseLabels = true;
214   GoogleStyle.IndentFunctionDeclarationAfterType = true;
215   GoogleStyle.IndentWidth = 2;
216   GoogleStyle.MaxEmptyLinesToKeep = 1;
217   GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
218   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
219   GoogleStyle.PointerBindsToType = true;
220   GoogleStyle.SpacesBeforeTrailingComments = 2;
221   GoogleStyle.Standard = FormatStyle::LS_Auto;
222   GoogleStyle.UseTab = false;
223 
224   setDefaultPenalties(GoogleStyle);
225   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
226 
227   return GoogleStyle;
228 }
229 
230 FormatStyle getChromiumStyle() {
231   FormatStyle ChromiumStyle = getGoogleStyle();
232   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
233   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
234   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
235   ChromiumStyle.BinPackParameters = false;
236   ChromiumStyle.DerivePointerBinding = false;
237   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
238   return ChromiumStyle;
239 }
240 
241 FormatStyle getMozillaStyle() {
242   FormatStyle MozillaStyle = getLLVMStyle();
243   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
244   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
245   MozillaStyle.DerivePointerBinding = true;
246   MozillaStyle.IndentCaseLabels = true;
247   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
248   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
249   MozillaStyle.PointerBindsToType = true;
250   return MozillaStyle;
251 }
252 
253 FormatStyle getWebKitStyle() {
254   FormatStyle Style = getLLVMStyle();
255   Style.AccessModifierOffset = -4;
256   Style.AlignTrailingComments = false;
257   Style.BreakBeforeBinaryOperators = true;
258   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
259   Style.BreakConstructorInitializersBeforeComma = true;
260   Style.ColumnLimit = 0;
261   Style.IndentWidth = 4;
262   Style.NamespaceIndentation = FormatStyle::NI_Inner;
263   Style.PointerBindsToType = true;
264   return Style;
265 }
266 
267 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
268   if (Name.equals_lower("llvm"))
269     *Style = getLLVMStyle();
270   else if (Name.equals_lower("chromium"))
271     *Style = getChromiumStyle();
272   else if (Name.equals_lower("mozilla"))
273     *Style = getMozillaStyle();
274   else if (Name.equals_lower("google"))
275     *Style = getGoogleStyle();
276   else if (Name.equals_lower("webkit"))
277     *Style = getWebKitStyle();
278   else
279     return false;
280 
281   return true;
282 }
283 
284 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
285   if (Text.trim().empty())
286     return llvm::make_error_code(llvm::errc::invalid_argument);
287   llvm::yaml::Input Input(Text);
288   Input >> *Style;
289   return Input.error();
290 }
291 
292 std::string configurationAsText(const FormatStyle &Style) {
293   std::string Text;
294   llvm::raw_string_ostream Stream(Text);
295   llvm::yaml::Output Output(Stream);
296   // We use the same mapping method for input and output, so we need a non-const
297   // reference here.
298   FormatStyle NonConstStyle = Style;
299   Output << NonConstStyle;
300   return Stream.str();
301 }
302 
303 // Returns the length of everything up to the first possible line break after
304 // the ), ], } or > matching \c Tok.
305 static unsigned getLengthToMatchingParen(const FormatToken &Tok) {
306   if (Tok.MatchingParen == NULL)
307     return 0;
308   FormatToken *End = Tok.MatchingParen;
309   while (End->Next && !End->Next->CanBreakBefore) {
310     End = End->Next;
311   }
312   return End->TotalLength - Tok.TotalLength + 1;
313 }
314 
315 namespace {
316 
317 class UnwrappedLineFormatter {
318 public:
319   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
320                          const AnnotatedLine &Line, unsigned FirstIndent,
321                          const FormatToken *RootToken,
322                          WhitespaceManager &Whitespaces,
323                          encoding::Encoding Encoding,
324                          bool BinPackInconclusiveFunctions)
325       : Style(Style), SourceMgr(SourceMgr), Line(Line),
326         FirstIndent(FirstIndent), RootToken(RootToken),
327         Whitespaces(Whitespaces), Count(0), Encoding(Encoding),
328         BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {}
329 
330   /// \brief Formats an \c UnwrappedLine.
331   void format(const AnnotatedLine *NextLine) {
332     // Initialize state dependent on indent.
333     LineState State;
334     State.Column = FirstIndent;
335     State.NextToken = RootToken;
336     State.Stack.push_back(ParenState(FirstIndent, FirstIndent,
337                                      /*AvoidBinPacking=*/false,
338                                      /*NoLineBreak=*/false));
339     State.LineContainsContinuedForLoopSection = false;
340     State.ParenLevel = 0;
341     State.StartOfStringLiteral = 0;
342     State.StartOfLineLevel = State.ParenLevel;
343     State.LowestLevelOnLine = State.ParenLevel;
344     State.IgnoreStackForComparison = false;
345 
346     // The first token has already been indented and thus consumed.
347     moveStateToNextToken(State, /*DryRun=*/false, /*Newline=*/false);
348 
349     if (Style.ColumnLimit == 0) {
350       formatWithoutColumnLimit(State);
351       return;
352     }
353 
354     // If everything fits on a single line, just put it there.
355     unsigned ColumnLimit = Style.ColumnLimit;
356     if (NextLine && NextLine->InPPDirective &&
357         !NextLine->First->HasUnescapedNewline)
358       ColumnLimit = getColumnLimit();
359     if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
360       while (State.NextToken != NULL) {
361         addTokenToState(false, false, State);
362       }
363     }
364 
365     // If the ObjC method declaration does not fit on a line, we should format
366     // it with one arg per line.
367     if (Line.Type == LT_ObjCMethodDecl)
368       State.Stack.back().BreakBeforeParameter = true;
369 
370     // Find best solution in solution space.
371     analyzeSolutionSpace(State);
372   }
373 
374 private:
375   void DebugTokenState(const FormatToken &FormatTok) {
376     const Token &Tok = FormatTok.Tok;
377     llvm::dbgs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
378                               Tok.getLength());
379     llvm::dbgs();
380   }
381 
382   struct ParenState {
383     ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
384                bool NoLineBreak)
385         : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
386           BreakBeforeClosingBrace(false), QuestionColumn(0),
387           AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
388           NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0),
389           StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0),
390           CallContinuation(0), VariablePos(0), ContainsLineBreak(false) {}
391 
392     /// \brief The position to which a specific parenthesis level needs to be
393     /// indented.
394     unsigned Indent;
395 
396     /// \brief The position of the last space on each level.
397     ///
398     /// Used e.g. to break like:
399     /// functionCall(Parameter, otherCall(
400     ///                             OtherParameter));
401     unsigned LastSpace;
402 
403     /// \brief The position the first "<<" operator encountered on each level.
404     ///
405     /// Used to align "<<" operators. 0 if no such operator has been encountered
406     /// on a level.
407     unsigned FirstLessLess;
408 
409     /// \brief Whether a newline needs to be inserted before the block's closing
410     /// brace.
411     ///
412     /// We only want to insert a newline before the closing brace if there also
413     /// was a newline after the beginning left brace.
414     bool BreakBeforeClosingBrace;
415 
416     /// \brief The column of a \c ? in a conditional expression;
417     unsigned QuestionColumn;
418 
419     /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
420     /// lines, in this context.
421     bool AvoidBinPacking;
422 
423     /// \brief Break after the next comma (or all the commas in this context if
424     /// \c AvoidBinPacking is \c true).
425     bool BreakBeforeParameter;
426 
427     /// \brief Line breaking in this context would break a formatting rule.
428     bool NoLineBreak;
429 
430     /// \brief The position of the colon in an ObjC method declaration/call.
431     unsigned ColonPos;
432 
433     /// \brief The start of the most recent function in a builder-type call.
434     unsigned StartOfFunctionCall;
435 
436     /// \brief Contains the start of array subscript expressions, so that they
437     /// can be aligned.
438     unsigned StartOfArraySubscripts;
439 
440     /// \brief If a nested name specifier was broken over multiple lines, this
441     /// contains the start column of the second line. Otherwise 0.
442     unsigned NestedNameSpecifierContinuation;
443 
444     /// \brief If a call expression was broken over multiple lines, this
445     /// contains the start column of the second line. Otherwise 0.
446     unsigned CallContinuation;
447 
448     /// \brief The column of the first variable name in a variable declaration.
449     ///
450     /// Used to align further variables if necessary.
451     unsigned VariablePos;
452 
453     /// \brief \c true if this \c ParenState already contains a line-break.
454     ///
455     /// The first line break in a certain \c ParenState causes extra penalty so
456     /// that clang-format prefers similar breaks, i.e. breaks in the same
457     /// parenthesis.
458     bool ContainsLineBreak;
459 
460     bool operator<(const ParenState &Other) const {
461       if (Indent != Other.Indent)
462         return Indent < Other.Indent;
463       if (LastSpace != Other.LastSpace)
464         return LastSpace < Other.LastSpace;
465       if (FirstLessLess != Other.FirstLessLess)
466         return FirstLessLess < Other.FirstLessLess;
467       if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
468         return BreakBeforeClosingBrace;
469       if (QuestionColumn != Other.QuestionColumn)
470         return QuestionColumn < Other.QuestionColumn;
471       if (AvoidBinPacking != Other.AvoidBinPacking)
472         return AvoidBinPacking;
473       if (BreakBeforeParameter != Other.BreakBeforeParameter)
474         return BreakBeforeParameter;
475       if (NoLineBreak != Other.NoLineBreak)
476         return NoLineBreak;
477       if (ColonPos != Other.ColonPos)
478         return ColonPos < Other.ColonPos;
479       if (StartOfFunctionCall != Other.StartOfFunctionCall)
480         return StartOfFunctionCall < Other.StartOfFunctionCall;
481       if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
482         return StartOfArraySubscripts < Other.StartOfArraySubscripts;
483       if (CallContinuation != Other.CallContinuation)
484         return CallContinuation < Other.CallContinuation;
485       if (VariablePos != Other.VariablePos)
486         return VariablePos < Other.VariablePos;
487       if (ContainsLineBreak != Other.ContainsLineBreak)
488         return ContainsLineBreak < Other.ContainsLineBreak;
489       return false;
490     }
491   };
492 
493   /// \brief The current state when indenting a unwrapped line.
494   ///
495   /// As the indenting tries different combinations this is copied by value.
496   struct LineState {
497     /// \brief The number of used columns in the current line.
498     unsigned Column;
499 
500     /// \brief The token that needs to be next formatted.
501     const FormatToken *NextToken;
502 
503     /// \brief \c true if this line contains a continued for-loop section.
504     bool LineContainsContinuedForLoopSection;
505 
506     /// \brief The level of nesting inside (), [], <> and {}.
507     unsigned ParenLevel;
508 
509     /// \brief The \c ParenLevel at the start of this line.
510     unsigned StartOfLineLevel;
511 
512     /// \brief The lowest \c ParenLevel on the current line.
513     unsigned LowestLevelOnLine;
514 
515     /// \brief The start column of the string literal, if we're in a string
516     /// literal sequence, 0 otherwise.
517     unsigned StartOfStringLiteral;
518 
519     /// \brief A stack keeping track of properties applying to parenthesis
520     /// levels.
521     std::vector<ParenState> Stack;
522 
523     /// \brief Ignore the stack of \c ParenStates for state comparison.
524     ///
525     /// In long and deeply nested unwrapped lines, the current algorithm can
526     /// be insufficient for finding the best formatting with a reasonable amount
527     /// of time and memory. Setting this flag will effectively lead to the
528     /// algorithm not analyzing some combinations. However, these combinations
529     /// rarely contain the optimal solution: In short, accepting a higher
530     /// penalty early would need to lead to different values in the \c
531     /// ParenState stack (in an otherwise identical state) and these different
532     /// values would need to lead to a significant amount of avoided penalty
533     /// later.
534     ///
535     /// FIXME: Come up with a better algorithm instead.
536     bool IgnoreStackForComparison;
537 
538     /// \brief Comparison operator to be able to used \c LineState in \c map.
539     bool operator<(const LineState &Other) const {
540       if (NextToken != Other.NextToken)
541         return NextToken < Other.NextToken;
542       if (Column != Other.Column)
543         return Column < Other.Column;
544       if (LineContainsContinuedForLoopSection !=
545           Other.LineContainsContinuedForLoopSection)
546         return LineContainsContinuedForLoopSection;
547       if (ParenLevel != Other.ParenLevel)
548         return ParenLevel < Other.ParenLevel;
549       if (StartOfLineLevel != Other.StartOfLineLevel)
550         return StartOfLineLevel < Other.StartOfLineLevel;
551       if (LowestLevelOnLine != Other.LowestLevelOnLine)
552         return LowestLevelOnLine < Other.LowestLevelOnLine;
553       if (StartOfStringLiteral != Other.StartOfStringLiteral)
554         return StartOfStringLiteral < Other.StartOfStringLiteral;
555       if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
556         return false;
557       return Stack < Other.Stack;
558     }
559   };
560 
561   /// \brief Formats the line starting at \p State, simply keeping all of the
562   /// input's line breaking decisions.
563   void formatWithoutColumnLimit(LineState &State) {
564     while (State.NextToken != NULL) {
565       bool Newline = mustBreak(State) ||
566                      (canBreak(State) && State.NextToken->NewlinesBefore > 0);
567       addTokenToState(Newline, /*DryRun=*/false, State);
568     }
569   }
570 
571   /// \brief Appends the next token to \p State and updates information
572   /// necessary for indentation.
573   ///
574   /// Puts the token on the current line if \p Newline is \c false and adds a
575   /// line break and necessary indentation otherwise.
576   ///
577   /// If \p DryRun is \c false, also creates and stores the required
578   /// \c Replacement.
579   unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
580     const FormatToken &Current = *State.NextToken;
581     const FormatToken &Previous = *State.NextToken->Previous;
582 
583     // Extra penalty that needs to be added because of the way certain line
584     // breaks are chosen.
585     unsigned ExtraPenalty = 0;
586 
587     if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
588       // FIXME: Is this correct?
589       int WhitespaceLength = SourceMgr.getSpellingColumnNumber(
590                                  State.NextToken->WhitespaceRange.getEnd()) -
591                              SourceMgr.getSpellingColumnNumber(
592                                  State.NextToken->WhitespaceRange.getBegin());
593       State.Column += WhitespaceLength + State.NextToken->CodePointCount;
594       State.NextToken = State.NextToken->Next;
595       return 0;
596     }
597 
598     // If we are continuing an expression, we want to indent an extra 4 spaces.
599     unsigned ContinuationIndent =
600         std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
601     if (Newline) {
602       // Breaking before the first "<<" is generally not desirable if the LHS is
603       // short.
604       if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 &&
605           State.Column <= Style.ColumnLimit / 2)
606         ExtraPenalty += Style.PenaltyBreakFirstLessLess;
607 
608       State.Stack.back().ContainsLineBreak = true;
609       if (Current.is(tok::r_brace)) {
610         if (Current.BlockKind == BK_BracedInit)
611           State.Column = State.Stack[State.Stack.size() - 2].LastSpace;
612         else
613           State.Column = FirstIndent;
614       } else if (Current.is(tok::string_literal) &&
615                  State.StartOfStringLiteral != 0) {
616         State.Column = State.StartOfStringLiteral;
617         State.Stack.back().BreakBeforeParameter = true;
618       } else if (Current.is(tok::lessless) &&
619                  State.Stack.back().FirstLessLess != 0) {
620         State.Column = State.Stack.back().FirstLessLess;
621       } else if (Current.isOneOf(tok::period, tok::arrow) &&
622                  Current.Type != TT_DesignatedInitializerPeriod) {
623         if (State.Stack.back().CallContinuation == 0) {
624           State.Column = ContinuationIndent;
625           State.Stack.back().CallContinuation = State.Column;
626         } else {
627           State.Column = State.Stack.back().CallContinuation;
628         }
629       } else if (Current.Type == TT_ConditionalExpr) {
630         State.Column = State.Stack.back().QuestionColumn;
631       } else if (Previous.is(tok::comma) &&
632                  State.Stack.back().VariablePos != 0) {
633         State.Column = State.Stack.back().VariablePos;
634       } else if (Previous.ClosesTemplateDeclaration ||
635                  ((Current.Type == TT_StartOfName ||
636                    Current.is(tok::kw_operator)) &&
637                   State.ParenLevel == 0 &&
638                   (!Style.IndentFunctionDeclarationAfterType ||
639                    Line.StartsDefinition))) {
640         State.Column = State.Stack.back().Indent;
641       } else if (Current.Type == TT_ObjCSelectorName) {
642         if (State.Stack.back().ColonPos > Current.CodePointCount) {
643           State.Column = State.Stack.back().ColonPos - Current.CodePointCount;
644         } else {
645           State.Column = State.Stack.back().Indent;
646           State.Stack.back().ColonPos = State.Column + Current.CodePointCount;
647         }
648       } else if (Current.is(tok::l_square) &&
649                  Current.Type != TT_ObjCMethodExpr) {
650         if (State.Stack.back().StartOfArraySubscripts != 0)
651           State.Column = State.Stack.back().StartOfArraySubscripts;
652         else
653           State.Column = ContinuationIndent;
654       } else if (Current.Type == TT_StartOfName ||
655                  Previous.isOneOf(tok::coloncolon, tok::equal) ||
656                  Previous.Type == TT_ObjCMethodExpr) {
657         State.Column = ContinuationIndent;
658       } else if (Current.Type == TT_CtorInitializerColon) {
659         State.Column = FirstIndent + Style.ConstructorInitializerIndentWidth;
660       } else if (Current.Type == TT_CtorInitializerComma) {
661         State.Column = State.Stack.back().Indent;
662       } else {
663         State.Column = State.Stack.back().Indent;
664         // Ensure that we fall back to indenting 4 spaces instead of just
665         // flushing continuations left.
666         if (State.Column == FirstIndent)
667           State.Column += 4;
668       }
669 
670       if (Current.is(tok::question))
671         State.Stack.back().BreakBeforeParameter = true;
672       if ((Previous.isOneOf(tok::comma, tok::semi) &&
673            !State.Stack.back().AvoidBinPacking) ||
674           Previous.Type == TT_BinaryOperator)
675         State.Stack.back().BreakBeforeParameter = false;
676       if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0)
677         State.Stack.back().BreakBeforeParameter = false;
678 
679       if (!DryRun) {
680         unsigned NewLines = 1;
681         if (Current.is(tok::comment))
682           NewLines = std::max(
683               NewLines,
684               std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
685         Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
686                                       State.Column, Line.InPPDirective);
687       }
688 
689       if (!Current.isTrailingComment())
690         State.Stack.back().LastSpace = State.Column;
691       if (Current.isOneOf(tok::arrow, tok::period) &&
692           Current.Type != TT_DesignatedInitializerPeriod)
693         State.Stack.back().LastSpace += Current.CodePointCount;
694       State.StartOfLineLevel = State.ParenLevel;
695       State.LowestLevelOnLine = State.ParenLevel;
696 
697       // Any break on this level means that the parent level has been broken
698       // and we need to avoid bin packing there.
699       for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
700         State.Stack[i].BreakBeforeParameter = true;
701       }
702       const FormatToken *TokenBefore = Current.getPreviousNonComment();
703       if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) &&
704           TokenBefore->Type != TT_TemplateCloser &&
705           TokenBefore->Type != TT_BinaryOperator && !TokenBefore->opensScope())
706         State.Stack.back().BreakBeforeParameter = true;
707 
708       // If we break after {, we should also break before the corresponding }.
709       if (Previous.is(tok::l_brace))
710         State.Stack.back().BreakBeforeClosingBrace = true;
711 
712       if (State.Stack.back().AvoidBinPacking) {
713         // If we are breaking after '(', '{', '<', this is not bin packing
714         // unless AllowAllParametersOfDeclarationOnNextLine is false.
715         if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) ||
716               Previous.Type == TT_BinaryOperator) ||
717             (!Style.AllowAllParametersOfDeclarationOnNextLine &&
718              Line.MustBeDeclaration))
719           State.Stack.back().BreakBeforeParameter = true;
720       }
721 
722     } else {
723       if (Current.is(tok::equal) &&
724           (RootToken->is(tok::kw_for) || State.ParenLevel == 0) &&
725           State.Stack.back().VariablePos == 0) {
726         State.Stack.back().VariablePos = State.Column;
727         // Move over * and & if they are bound to the variable name.
728         const FormatToken *Tok = &Previous;
729         while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) {
730           State.Stack.back().VariablePos -= Tok->CodePointCount;
731           if (Tok->SpacesRequiredBefore != 0)
732             break;
733           Tok = Tok->Previous;
734         }
735         if (Previous.PartOfMultiVariableDeclStmt)
736           State.Stack.back().LastSpace = State.Stack.back().VariablePos;
737       }
738 
739       unsigned Spaces = State.NextToken->SpacesRequiredBefore;
740 
741       if (!DryRun)
742         Whitespaces.replaceWhitespace(Current, 0, Spaces,
743                                       State.Column + Spaces);
744 
745       if (Current.Type == TT_ObjCSelectorName &&
746           State.Stack.back().ColonPos == 0) {
747         if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
748             State.Column + Spaces + Current.CodePointCount)
749           State.Stack.back().ColonPos =
750               State.Stack.back().Indent + Current.LongestObjCSelectorName;
751         else
752           State.Stack.back().ColonPos =
753               State.Column + Spaces + Current.CodePointCount;
754       }
755 
756       if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
757           Current.Type != TT_LineComment)
758         State.Stack.back().Indent = State.Column + Spaces;
759       if (Previous.is(tok::comma) && !Current.isTrailingComment() &&
760           State.Stack.back().AvoidBinPacking)
761         State.Stack.back().NoLineBreak = true;
762 
763       State.Column += Spaces;
764       if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for))
765         // Treat the condition inside an if as if it was a second function
766         // parameter, i.e. let nested calls have an indent of 4.
767         State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
768       else if (Previous.is(tok::comma))
769         State.Stack.back().LastSpace = State.Column;
770       else if ((Previous.Type == TT_BinaryOperator ||
771                 Previous.Type == TT_ConditionalExpr ||
772                 Previous.Type == TT_CtorInitializerColon) &&
773                !(Previous.getPrecedence() == prec::Assignment &&
774                  Current.FakeLParens.empty()))
775         // Always indent relative to the RHS of the expression unless this is a
776         // simple assignment without binary expression on the RHS.
777         State.Stack.back().LastSpace = State.Column;
778       else if (Previous.Type == TT_InheritanceColon)
779         State.Stack.back().Indent = State.Column;
780       else if (Previous.opensScope()) {
781         // If a function has multiple parameters (including a single parameter
782         // that is a binary expression) or a trailing call, indent all
783         // parameters from the opening parenthesis. This avoids confusing
784         // indents like:
785         //   OuterFunction(InnerFunctionCall(
786         //       ParameterToInnerFunction),
787         //                 SecondParameterToOuterFunction);
788         bool HasMultipleParameters = !Current.FakeLParens.empty();
789         bool HasTrailingCall = false;
790         if (Previous.MatchingParen) {
791           const FormatToken *Next = Previous.MatchingParen->getNextNonComment();
792           if (Next && Next->isOneOf(tok::period, tok::arrow))
793             HasTrailingCall = true;
794         }
795         if (HasMultipleParameters || HasTrailingCall)
796           State.Stack.back().LastSpace = State.Column;
797       }
798     }
799 
800     return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty;
801   }
802 
803   /// \brief Mark the next token as consumed in \p State and modify its stacks
804   /// accordingly.
805   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline) {
806     const FormatToken &Current = *State.NextToken;
807     assert(State.Stack.size());
808 
809     if (Current.Type == TT_InheritanceColon)
810       State.Stack.back().AvoidBinPacking = true;
811     if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
812       State.Stack.back().FirstLessLess = State.Column;
813     if (Current.is(tok::l_square) &&
814         State.Stack.back().StartOfArraySubscripts == 0)
815       State.Stack.back().StartOfArraySubscripts = State.Column;
816     if (Current.is(tok::question))
817       State.Stack.back().QuestionColumn = State.Column;
818     if (!Current.opensScope() && !Current.closesScope())
819       State.LowestLevelOnLine =
820           std::min(State.LowestLevelOnLine, State.ParenLevel);
821     if (Current.isOneOf(tok::period, tok::arrow) &&
822         Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
823       State.Stack.back().StartOfFunctionCall =
824           Current.LastInChainOfCalls ? 0
825                                      : State.Column + Current.CodePointCount;
826     if (Current.Type == TT_CtorInitializerColon) {
827       // Indent 2 from the column, so:
828       // SomeClass::SomeClass()
829       //     : First(...), ...
830       //       Next(...)
831       //       ^ line up here.
832       State.Stack.back().Indent =
833           State.Column +
834           (Style.BreakConstructorInitializersBeforeComma ? 0 : 2);
835       if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
836         State.Stack.back().AvoidBinPacking = true;
837       State.Stack.back().BreakBeforeParameter = false;
838     }
839 
840     // If return returns a binary expression, align after it.
841     if (Current.is(tok::kw_return) && !Current.FakeLParens.empty())
842       State.Stack.back().LastSpace = State.Column + 7;
843 
844     // In ObjC method declaration we align on the ":" of parameters, but we need
845     // to ensure that we indent parameters on subsequent lines by at least 4.
846     if (Current.Type == TT_ObjCMethodSpecifier)
847       State.Stack.back().Indent += 4;
848 
849     // Insert scopes created by fake parenthesis.
850     const FormatToken *Previous = Current.getPreviousNonComment();
851     // Don't add extra indentation for the first fake parenthesis after
852     // 'return', assignements or opening <({[. The indentation for these cases
853     // is special cased.
854     bool SkipFirstExtraIndent =
855         Current.is(tok::kw_return) ||
856         (Previous && (Previous->opensScope() ||
857                       Previous->getPrecedence() == prec::Assignment));
858     for (SmallVectorImpl<prec::Level>::const_reverse_iterator
859              I = Current.FakeLParens.rbegin(),
860              E = Current.FakeLParens.rend();
861          I != E; ++I) {
862       ParenState NewParenState = State.Stack.back();
863       NewParenState.ContainsLineBreak = false;
864       NewParenState.Indent =
865           std::max(std::max(State.Column, NewParenState.Indent),
866                    State.Stack.back().LastSpace);
867 
868       // Always indent conditional expressions. Never indent expression where
869       // the 'operator' is ',', ';' or an assignment (i.e. *I <=
870       // prec::Assignment) as those have different indentation rules. Indent
871       // other expression, unless the indentation needs to be skipped.
872       if (*I == prec::Conditional ||
873           (!SkipFirstExtraIndent && *I > prec::Assignment &&
874            !Style.BreakBeforeBinaryOperators))
875         NewParenState.Indent += 4;
876       if (Previous && !Previous->opensScope())
877         NewParenState.BreakBeforeParameter = false;
878       State.Stack.push_back(NewParenState);
879       SkipFirstExtraIndent = false;
880     }
881 
882     // If we encounter an opening (, [, { or <, we add a level to our stacks to
883     // prepare for the following tokens.
884     if (Current.opensScope()) {
885       unsigned NewIndent;
886       unsigned LastSpace = State.Stack.back().LastSpace;
887       bool AvoidBinPacking;
888       if (Current.is(tok::l_brace)) {
889         NewIndent =
890             LastSpace + (Style.Cpp11BracedListStyle ? 4 : Style.IndentWidth);
891         const FormatToken *NextNoComment = Current.getNextNonComment();
892         AvoidBinPacking = NextNoComment &&
893                           NextNoComment->Type == TT_DesignatedInitializerPeriod;
894       } else {
895         NewIndent =
896             4 + std::max(LastSpace, State.Stack.back().StartOfFunctionCall);
897         AvoidBinPacking = !Style.BinPackParameters ||
898                           (Style.ExperimentalAutoDetectBinPacking &&
899                            (Current.PackingKind == PPK_OnePerLine ||
900                             (!BinPackInconclusiveFunctions &&
901                              Current.PackingKind == PPK_Inconclusive)));
902       }
903 
904       State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking,
905                                        State.Stack.back().NoLineBreak));
906       ++State.ParenLevel;
907     }
908 
909     // If this '[' opens an ObjC call, determine whether all parameters fit into
910     // one line and put one per line if they don't.
911     if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
912         Current.MatchingParen != NULL) {
913       if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
914         State.Stack.back().BreakBeforeParameter = true;
915     }
916 
917     // If we encounter a closing ), ], } or >, we can remove a level from our
918     // stacks.
919     if (Current.isOneOf(tok::r_paren, tok::r_square) ||
920         (Current.is(tok::r_brace) && State.NextToken != RootToken) ||
921         State.NextToken->Type == TT_TemplateCloser) {
922       State.Stack.pop_back();
923       --State.ParenLevel;
924     }
925     if (Current.is(tok::r_square)) {
926       // If this ends the array subscript expr, reset the corresponding value.
927       const FormatToken *NextNonComment = Current.getNextNonComment();
928       if (NextNonComment && NextNonComment->isNot(tok::l_square))
929         State.Stack.back().StartOfArraySubscripts = 0;
930     }
931 
932     // Remove scopes created by fake parenthesis.
933     for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
934       unsigned VariablePos = State.Stack.back().VariablePos;
935       State.Stack.pop_back();
936       State.Stack.back().VariablePos = VariablePos;
937     }
938 
939     if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) {
940       State.StartOfStringLiteral = State.Column;
941     } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash,
942                                 tok::string_literal)) {
943       State.StartOfStringLiteral = 0;
944     }
945 
946     State.Column += Current.CodePointCount;
947 
948     State.NextToken = State.NextToken->Next;
949 
950     if (!Newline && Style.AlwaysBreakBeforeMultilineStrings &&
951         Current.is(tok::string_literal) && Current.CanBreakBefore)
952       return 0;
953 
954     return breakProtrudingToken(Current, State, DryRun);
955   }
956 
957   /// \brief If the current token sticks out over the end of the line, break
958   /// it if possible.
959   ///
960   /// \returns An extra penalty if a token was broken, otherwise 0.
961   ///
962   /// The returned penalty will cover the cost of the additional line breaks and
963   /// column limit violation in all lines except for the last one. The penalty
964   /// for the column limit violation in the last line (and in single line
965   /// tokens) is handled in \c addNextStateToQueue.
966   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
967                                 bool DryRun) {
968     llvm::OwningPtr<BreakableToken> Token;
969     unsigned StartColumn = State.Column - Current.CodePointCount;
970     unsigned OriginalStartColumn =
971         SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
972         1;
973 
974     if (Current.is(tok::string_literal) &&
975         Current.Type != TT_ImplicitStringLiteral) {
976       // Only break up default narrow strings.
977       if (!Current.TokenText.startswith("\""))
978         return 0;
979       // Don't break string literals with escaped newlines. As clang-format must
980       // not change the string's content, it is unlikely that we'll end up with
981       // a better format.
982       if (Current.TokenText.find("\\\n") != StringRef::npos)
983         return 0;
984       // Exempts unterminated string literals from line breaking. The user will
985       // likely want to terminate the string before any line breaking is done.
986       if (Current.IsUnterminatedLiteral)
987         return 0;
988 
989       Token.reset(new BreakableStringLiteral(Current, StartColumn,
990                                              Line.InPPDirective, Encoding));
991     } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
992       Token.reset(new BreakableBlockComment(
993           Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
994           Line.InPPDirective, Encoding));
995     } else if (Current.Type == TT_LineComment &&
996                (Current.Previous == NULL ||
997                 Current.Previous->Type != TT_ImplicitStringLiteral)) {
998       // Don't break line comments with escaped newlines. These look like
999       // separate line comments, but in fact contain a single line comment with
1000       // multiple lines including leading whitespace and the '//' markers.
1001       //
1002       // FIXME: If we want to handle them correctly, we'll need to adjust
1003       // leading whitespace in consecutive lines when changing indentation of
1004       // the first line similar to what we do with block comments.
1005       StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
1006       if (EscapedNewlinePos != StringRef::npos) {
1007         State.Column =
1008             StartColumn +
1009             encoding::getCodePointCount(
1010                 Current.TokenText.substr(0, EscapedNewlinePos), Encoding) +
1011             1;
1012         return 0;
1013       }
1014 
1015       Token.reset(new BreakableLineComment(Current, StartColumn,
1016                                            Line.InPPDirective, Encoding));
1017     } else {
1018       return 0;
1019     }
1020     if (Current.UnbreakableTailLength >= getColumnLimit())
1021       return 0;
1022 
1023     unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength;
1024     bool BreakInserted = false;
1025     unsigned Penalty = 0;
1026     unsigned RemainingTokenColumns = 0;
1027     for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
1028          LineIndex != EndIndex; ++LineIndex) {
1029       if (!DryRun)
1030         Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
1031       unsigned TailOffset = 0;
1032       RemainingTokenColumns = Token->getLineLengthAfterSplit(
1033           LineIndex, TailOffset, StringRef::npos);
1034       while (RemainingTokenColumns > RemainingSpace) {
1035         BreakableToken::Split Split =
1036             Token->getSplit(LineIndex, TailOffset, getColumnLimit());
1037         if (Split.first == StringRef::npos) {
1038           // The last line's penalty is handled in addNextStateToQueue().
1039           if (LineIndex < EndIndex - 1)
1040             Penalty += Style.PenaltyExcessCharacter *
1041                        (RemainingTokenColumns - RemainingSpace);
1042           break;
1043         }
1044         assert(Split.first != 0);
1045         unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
1046             LineIndex, TailOffset + Split.first + Split.second,
1047             StringRef::npos);
1048         assert(NewRemainingTokenColumns < RemainingTokenColumns);
1049         if (!DryRun)
1050           Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
1051         Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString
1052                                                    : Style.PenaltyBreakComment;
1053         unsigned ColumnsUsed =
1054             Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
1055         if (ColumnsUsed > getColumnLimit()) {
1056           Penalty +=
1057               Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
1058         }
1059         TailOffset += Split.first + Split.second;
1060         RemainingTokenColumns = NewRemainingTokenColumns;
1061         BreakInserted = true;
1062       }
1063     }
1064 
1065     State.Column = RemainingTokenColumns;
1066 
1067     if (BreakInserted) {
1068       // If we break the token inside a parameter list, we need to break before
1069       // the next parameter on all levels, so that the next parameter is clearly
1070       // visible. Line comments already introduce a break.
1071       if (Current.Type != TT_LineComment) {
1072         for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
1073           State.Stack[i].BreakBeforeParameter = true;
1074       }
1075 
1076       State.Stack.back().LastSpace = StartColumn;
1077     }
1078     return Penalty;
1079   }
1080 
1081   unsigned getColumnLimit() {
1082     // In preprocessor directives reserve two chars for trailing " \"
1083     return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
1084   }
1085 
1086   /// \brief An edge in the solution space from \c Previous->State to \c State,
1087   /// inserting a newline dependent on the \c NewLine.
1088   struct StateNode {
1089     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
1090         : State(State), NewLine(NewLine), Previous(Previous) {}
1091     LineState State;
1092     bool NewLine;
1093     StateNode *Previous;
1094   };
1095 
1096   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
1097   ///
1098   /// In case of equal penalties, we want to prefer states that were inserted
1099   /// first. During state generation we make sure that we insert states first
1100   /// that break the line as late as possible.
1101   typedef std::pair<unsigned, unsigned> OrderedPenalty;
1102 
1103   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
1104   /// \c State has the given \c OrderedPenalty.
1105   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
1106 
1107   /// \brief The BFS queue type.
1108   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
1109                               std::greater<QueueItem> > QueueType;
1110 
1111   /// \brief Analyze the entire solution space starting from \p InitialState.
1112   ///
1113   /// This implements a variant of Dijkstra's algorithm on the graph that spans
1114   /// the solution space (\c LineStates are the nodes). The algorithm tries to
1115   /// find the shortest path (the one with lowest penalty) from \p InitialState
1116   /// to a state where all tokens are placed.
1117   void analyzeSolutionSpace(LineState &InitialState) {
1118     std::set<LineState> Seen;
1119 
1120     // Insert start element into queue.
1121     StateNode *Node =
1122         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
1123     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
1124     ++Count;
1125 
1126     // While not empty, take first element and follow edges.
1127     while (!Queue.empty()) {
1128       unsigned Penalty = Queue.top().first.first;
1129       StateNode *Node = Queue.top().second;
1130       if (Node->State.NextToken == NULL) {
1131         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
1132         break;
1133       }
1134       Queue.pop();
1135 
1136       // Cut off the analysis of certain solutions if the analysis gets too
1137       // complex. See description of IgnoreStackForComparison.
1138       if (Count > 10000)
1139         Node->State.IgnoreStackForComparison = true;
1140 
1141       if (!Seen.insert(Node->State).second)
1142         // State already examined with lower penalty.
1143         continue;
1144 
1145       addNextStateToQueue(Penalty, Node, /*NewLine=*/false);
1146       addNextStateToQueue(Penalty, Node, /*NewLine=*/true);
1147     }
1148 
1149     if (Queue.empty())
1150       // We were unable to find a solution, do nothing.
1151       // FIXME: Add diagnostic?
1152       return;
1153 
1154     // Reconstruct the solution.
1155     reconstructPath(InitialState, Queue.top().second);
1156     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
1157     DEBUG(llvm::dbgs() << "---\n");
1158   }
1159 
1160   void reconstructPath(LineState &State, StateNode *Current) {
1161     std::deque<StateNode *> Path;
1162     // We do not need a break before the initial token.
1163     while (Current->Previous) {
1164       Path.push_front(Current);
1165       Current = Current->Previous;
1166     }
1167     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
1168          I != E; ++I) {
1169       DEBUG({
1170         if ((*I)->NewLine) {
1171           llvm::dbgs() << "Penalty for splitting before "
1172                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1173                        << (*I)->Previous->State.NextToken->SplitPenalty << "\n";
1174         }
1175       });
1176       addTokenToState((*I)->NewLine, false, State);
1177     }
1178   }
1179 
1180   /// \brief Add the following state to the analysis queue \c Queue.
1181   ///
1182   /// Assume the current state is \p PreviousNode and has been reached with a
1183   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1184   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1185                            bool NewLine) {
1186     if (NewLine && !canBreak(PreviousNode->State))
1187       return;
1188     if (!NewLine && mustBreak(PreviousNode->State))
1189       return;
1190     if (NewLine) {
1191       if (!PreviousNode->State.Stack.back().ContainsLineBreak)
1192         Penalty += 15;
1193       Penalty += PreviousNode->State.NextToken->SplitPenalty;
1194     }
1195 
1196     StateNode *Node = new (Allocator.Allocate())
1197         StateNode(PreviousNode->State, NewLine, PreviousNode);
1198     Penalty += addTokenToState(NewLine, true, Node->State);
1199     if (Node->State.Column > getColumnLimit()) {
1200       unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
1201       Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
1202     }
1203 
1204     Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
1205     ++Count;
1206   }
1207 
1208   /// \brief Returns \c true, if a line break after \p State is allowed.
1209   bool canBreak(const LineState &State) {
1210     const FormatToken &Current = *State.NextToken;
1211     const FormatToken &Previous = *Current.Previous;
1212     assert(&Previous == Current.Previous);
1213     if (!Current.CanBreakBefore &&
1214         !(Current.is(tok::r_brace) &&
1215           State.Stack.back().BreakBeforeClosingBrace))
1216       return false;
1217     // The opening "{" of a braced list has to be on the same line as the first
1218     // element if it is nested in another braced init list or function call.
1219     if (!Current.MustBreakBefore && Previous.is(tok::l_brace) &&
1220         Previous.Previous &&
1221         Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma))
1222       return false;
1223     // This prevents breaks like:
1224     //   ...
1225     //   SomeParameter, OtherParameter).DoSomething(
1226     //   ...
1227     // As they hide "DoSomething" and are generally bad for readability.
1228     if (Previous.opensScope() &&
1229         State.LowestLevelOnLine < State.StartOfLineLevel)
1230       return false;
1231     return !State.Stack.back().NoLineBreak;
1232   }
1233 
1234   /// \brief Returns \c true, if a line break after \p State is mandatory.
1235   bool mustBreak(const LineState &State) {
1236     const FormatToken &Current = *State.NextToken;
1237     const FormatToken &Previous = *Current.Previous;
1238     if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon)
1239       return true;
1240     if (!Style.Cpp11BracedListStyle && Current.is(tok::r_brace) &&
1241         State.Stack.back().BreakBeforeClosingBrace)
1242       return true;
1243     if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
1244       return true;
1245     if (Style.BreakConstructorInitializersBeforeComma) {
1246       if (Previous.Type == TT_CtorInitializerComma)
1247         return false;
1248       if (Current.Type == TT_CtorInitializerComma)
1249         return true;
1250     }
1251     if ((Previous.isOneOf(tok::comma, tok::semi) || Current.is(tok::question) ||
1252          (Current.Type == TT_ConditionalExpr &&
1253           !(Current.is(tok::colon) && Previous.is(tok::question)))) &&
1254         State.Stack.back().BreakBeforeParameter &&
1255         !Current.isTrailingComment() &&
1256         !Current.isOneOf(tok::r_paren, tok::r_brace))
1257       return true;
1258     if (Style.AlwaysBreakBeforeMultilineStrings &&
1259         State.Column > State.Stack.back().Indent &&
1260         Current.is(tok::string_literal) && Previous.isNot(tok::lessless) &&
1261         Previous.Type != TT_InlineASMColon &&
1262         ((Current.getNextNonComment() &&
1263           Current.getNextNonComment()->is(tok::string_literal)) ||
1264          (Current.TokenText.find("\\\n") != StringRef::npos)))
1265       return true;
1266 
1267     if (!Style.BreakBeforeBinaryOperators) {
1268       // If we need to break somewhere inside the LHS of a binary expression, we
1269       // should also break after the operator. Otherwise, the formatting would
1270       // hide the operator precedence, e.g. in:
1271       //   if (aaaaaaaaaaaaaa ==
1272       //           bbbbbbbbbbbbbb && c) {..
1273       // For comparisons, we only apply this rule, if the LHS is a binary
1274       // expression itself as otherwise, the line breaks seem superfluous.
1275       // We need special cases for ">>" which we have split into two ">" while
1276       // lexing in order to make template parsing easier.
1277       //
1278       // FIXME: We'll need something similar for styles that break before binary
1279       // operators.
1280       bool IsComparison = (Previous.getPrecedence() == prec::Relational ||
1281                            Previous.getPrecedence() == prec::Equality) &&
1282                           Previous.Previous && Previous.Previous->Type !=
1283                                                    TT_BinaryOperator; // For >>.
1284       bool LHSIsBinaryExpr =
1285           Previous.Previous && Previous.Previous->FakeRParens > 0;
1286       if (Previous.Type == TT_BinaryOperator &&
1287           (!IsComparison || LHSIsBinaryExpr) &&
1288           Current.Type != TT_BinaryOperator && // For >>.
1289           !Current.isTrailingComment() &&
1290           !Previous.isOneOf(tok::lessless, tok::question) &&
1291           Previous.getPrecedence() != prec::Assignment &&
1292           State.Stack.back().BreakBeforeParameter)
1293         return true;
1294     }
1295 
1296     // Same as above, but for the first "<<" operator.
1297     if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter &&
1298         State.Stack.back().FirstLessLess == 0)
1299       return true;
1300 
1301     // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
1302     // out whether it is the first parameter. Clean this up.
1303     if (Current.Type == TT_ObjCSelectorName &&
1304         Current.LongestObjCSelectorName == 0 &&
1305         State.Stack.back().BreakBeforeParameter)
1306       return true;
1307     if ((Current.Type == TT_CtorInitializerColon ||
1308          (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0)))
1309       return true;
1310 
1311     if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) &&
1312         Line.MightBeFunctionDecl && State.Stack.back().BreakBeforeParameter &&
1313         State.ParenLevel == 0)
1314       return true;
1315     return false;
1316   }
1317 
1318   // Returns the total number of columns required for the remaining tokens.
1319   unsigned getRemainingLength(const LineState &State) {
1320     if (State.NextToken && State.NextToken->Previous)
1321       return Line.Last->TotalLength - State.NextToken->Previous->TotalLength;
1322     return 0;
1323   }
1324 
1325   FormatStyle Style;
1326   SourceManager &SourceMgr;
1327   const AnnotatedLine &Line;
1328   const unsigned FirstIndent;
1329   const FormatToken *RootToken;
1330   WhitespaceManager &Whitespaces;
1331 
1332   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1333   QueueType Queue;
1334   // Increasing count of \c StateNode items we have created. This is used
1335   // to create a deterministic order independent of the container.
1336   unsigned Count;
1337   encoding::Encoding Encoding;
1338   bool BinPackInconclusiveFunctions;
1339 };
1340 
1341 class FormatTokenLexer {
1342 public:
1343   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr,
1344                    encoding::Encoding Encoding)
1345       : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
1346         SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()),
1347         Encoding(Encoding) {
1348     Lex.SetKeepWhitespaceMode(true);
1349   }
1350 
1351   ArrayRef<FormatToken *> lex() {
1352     assert(Tokens.empty());
1353     do {
1354       Tokens.push_back(getNextToken());
1355     } while (Tokens.back()->Tok.isNot(tok::eof));
1356     return Tokens;
1357   }
1358 
1359   IdentifierTable &getIdentTable() { return IdentTable; }
1360 
1361 private:
1362   FormatToken *getNextToken() {
1363     if (GreaterStashed) {
1364       // Create a synthesized second '>' token.
1365       Token Greater = FormatTok->Tok;
1366       FormatTok = new (Allocator.Allocate()) FormatToken;
1367       FormatTok->Tok = Greater;
1368       SourceLocation GreaterLocation =
1369           FormatTok->Tok.getLocation().getLocWithOffset(1);
1370       FormatTok->WhitespaceRange =
1371           SourceRange(GreaterLocation, GreaterLocation);
1372       FormatTok->TokenText = ">";
1373       FormatTok->CodePointCount = 1;
1374       GreaterStashed = false;
1375       return FormatTok;
1376     }
1377 
1378     FormatTok = new (Allocator.Allocate()) FormatToken;
1379     readRawToken(*FormatTok);
1380     SourceLocation WhitespaceStart =
1381         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1382     if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
1383       FormatTok->IsFirst = true;
1384 
1385     // Consume and record whitespace until we find a significant token.
1386     unsigned WhitespaceLength = TrailingWhitespace;
1387     while (FormatTok->Tok.is(tok::unknown)) {
1388       unsigned Newlines = FormatTok->TokenText.count('\n');
1389       if (Newlines > 0)
1390         FormatTok->LastNewlineOffset =
1391             WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
1392       FormatTok->NewlinesBefore += Newlines;
1393       unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
1394       FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
1395       WhitespaceLength += FormatTok->Tok.getLength();
1396 
1397       readRawToken(*FormatTok);
1398     }
1399 
1400     // In case the token starts with escaped newlines, we want to
1401     // take them into account as whitespace - this pattern is quite frequent
1402     // in macro definitions.
1403     // FIXME: What do we want to do with other escaped spaces, and escaped
1404     // spaces or newlines in the middle of tokens?
1405     // FIXME: Add a more explicit test.
1406     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1407            FormatTok->TokenText[1] == '\n') {
1408       // FIXME: ++FormatTok->NewlinesBefore is missing...
1409       WhitespaceLength += 2;
1410       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1411     }
1412 
1413     TrailingWhitespace = 0;
1414     if (FormatTok->Tok.is(tok::comment)) {
1415       StringRef UntrimmedText = FormatTok->TokenText;
1416       FormatTok->TokenText = FormatTok->TokenText.rtrim();
1417       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1418     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1419       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1420       FormatTok->Tok.setIdentifierInfo(&Info);
1421       FormatTok->Tok.setKind(Info.getTokenID());
1422     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1423       FormatTok->Tok.setKind(tok::greater);
1424       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1425       GreaterStashed = true;
1426     }
1427 
1428     // Now FormatTok is the next non-whitespace token.
1429     FormatTok->CodePointCount =
1430         encoding::getCodePointCount(FormatTok->TokenText, Encoding);
1431 
1432     FormatTok->WhitespaceRange = SourceRange(
1433         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1434     return FormatTok;
1435   }
1436 
1437   FormatToken *FormatTok;
1438   bool GreaterStashed;
1439   unsigned TrailingWhitespace;
1440   Lexer &Lex;
1441   SourceManager &SourceMgr;
1442   IdentifierTable IdentTable;
1443   encoding::Encoding Encoding;
1444   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1445   SmallVector<FormatToken *, 16> Tokens;
1446 
1447   void readRawToken(FormatToken &Tok) {
1448     Lex.LexFromRawLexer(Tok.Tok);
1449     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1450                               Tok.Tok.getLength());
1451 
1452     // For formatting, treat unterminated string literals like normal string
1453     // literals.
1454     if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
1455         Tok.TokenText[0] == '"') {
1456       Tok.Tok.setKind(tok::string_literal);
1457       Tok.IsUnterminatedLiteral = true;
1458     }
1459   }
1460 };
1461 
1462 class Formatter : public UnwrappedLineConsumer {
1463 public:
1464   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1465             const std::vector<CharSourceRange> &Ranges)
1466       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1467         Whitespaces(SourceMgr, Style), Ranges(Ranges),
1468         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1469     DEBUG(llvm::dbgs() << "File encoding: "
1470                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1471                                                                : "unknown")
1472                        << "\n");
1473   }
1474 
1475   virtual ~Formatter() {}
1476 
1477   tooling::Replacements format() {
1478     FormatTokenLexer Tokens(Lex, SourceMgr, Encoding);
1479 
1480     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1481     bool StructuralError = Parser.parse();
1482     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1483     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1484       Annotator.annotate(AnnotatedLines[i]);
1485     }
1486     deriveLocalStyle();
1487     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1488       Annotator.calculateFormattingInformation(AnnotatedLines[i]);
1489     }
1490 
1491     // Adapt level to the next line if this is a comment.
1492     // FIXME: Can/should this be done in the UnwrappedLineParser?
1493     const AnnotatedLine *NextNonCommentLine = NULL;
1494     for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) {
1495       if (NextNonCommentLine && AnnotatedLines[i].First->is(tok::comment) &&
1496           !AnnotatedLines[i].First->Next)
1497         AnnotatedLines[i].Level = NextNonCommentLine->Level;
1498       else
1499         NextNonCommentLine = AnnotatedLines[i].First->isNot(tok::r_brace)
1500                                  ? &AnnotatedLines[i]
1501                                  : NULL;
1502     }
1503 
1504     std::vector<int> IndentForLevel;
1505     bool PreviousLineWasTouched = false;
1506     const FormatToken *PreviousLineLastToken = 0;
1507     bool FormatPPDirective = false;
1508     for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
1509                                               E = AnnotatedLines.end();
1510          I != E; ++I) {
1511       const AnnotatedLine &TheLine = *I;
1512       const FormatToken *FirstTok = TheLine.First;
1513       int Offset = getIndentOffset(*TheLine.First);
1514 
1515       // Check whether this line is part of a formatted preprocessor directive.
1516       if (FirstTok->HasUnescapedNewline)
1517         FormatPPDirective = false;
1518       if (!FormatPPDirective && TheLine.InPPDirective &&
1519           (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
1520         FormatPPDirective = true;
1521 
1522       // Determine indent and try to merge multiple unwrapped lines.
1523       while (IndentForLevel.size() <= TheLine.Level)
1524         IndentForLevel.push_back(-1);
1525       IndentForLevel.resize(TheLine.Level + 1);
1526       unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
1527       if (static_cast<int>(Indent) + Offset >= 0)
1528         Indent += Offset;
1529       tryFitMultipleLinesInOne(Indent, I, E);
1530 
1531       bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
1532       if (TheLine.First->is(tok::eof)) {
1533         if (PreviousLineWasTouched) {
1534           unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u);
1535           Whitespaces.replaceWhitespace(*TheLine.First, NewLines, /*Indent*/ 0,
1536                                         /*TargetColumn*/ 0);
1537         }
1538       } else if (TheLine.Type != LT_Invalid &&
1539                  (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
1540         unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
1541         if (FirstTok->WhitespaceRange.isValid() &&
1542             // Insert a break even if there is a structural error in case where
1543             // we break apart a line consisting of multiple unwrapped lines.
1544             (FirstTok->NewlinesBefore == 0 || !StructuralError)) {
1545           formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent,
1546                            TheLine.InPPDirective);
1547         } else {
1548           Indent = LevelIndent =
1549               SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) -
1550               1;
1551         }
1552         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
1553                                          TheLine.First, Whitespaces, Encoding,
1554                                          BinPackInconclusiveFunctions);
1555         Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
1556         IndentForLevel[TheLine.Level] = LevelIndent;
1557         PreviousLineWasTouched = true;
1558       } else {
1559         // Format the first token if necessary, and notify the WhitespaceManager
1560         // about the unchanged whitespace.
1561         for (const FormatToken *Tok = TheLine.First; Tok != NULL;
1562              Tok = Tok->Next) {
1563           if (Tok == TheLine.First &&
1564               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
1565             unsigned LevelIndent =
1566                 SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1;
1567             // Remove trailing whitespace of the previous line if it was
1568             // touched.
1569             if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
1570               formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent,
1571                                TheLine.InPPDirective);
1572             } else {
1573               Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
1574             }
1575 
1576             if (static_cast<int>(LevelIndent) - Offset >= 0)
1577               LevelIndent -= Offset;
1578             if (Tok->isNot(tok::comment))
1579               IndentForLevel[TheLine.Level] = LevelIndent;
1580           } else {
1581             Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
1582           }
1583         }
1584         // If we did not reformat this unwrapped line, the column at the end of
1585         // the last token is unchanged - thus, we can calculate the end of the
1586         // last token.
1587         PreviousLineWasTouched = false;
1588       }
1589       PreviousLineLastToken = I->Last;
1590     }
1591     return Whitespaces.generateReplacements();
1592   }
1593 
1594 private:
1595   void deriveLocalStyle() {
1596     unsigned CountBoundToVariable = 0;
1597     unsigned CountBoundToType = 0;
1598     bool HasCpp03IncompatibleFormat = false;
1599     bool HasBinPackedFunction = false;
1600     bool HasOnePerLineFunction = false;
1601     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1602       if (!AnnotatedLines[i].First->Next)
1603         continue;
1604       FormatToken *Tok = AnnotatedLines[i].First->Next;
1605       while (Tok->Next) {
1606         if (Tok->Type == TT_PointerOrReference) {
1607           bool SpacesBefore =
1608               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1609           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1610                              Tok->Next->WhitespaceRange.getEnd();
1611           if (SpacesBefore && !SpacesAfter)
1612             ++CountBoundToVariable;
1613           else if (!SpacesBefore && SpacesAfter)
1614             ++CountBoundToType;
1615         }
1616 
1617         if (Tok->Type == TT_TemplateCloser &&
1618             Tok->Previous->Type == TT_TemplateCloser &&
1619             Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd())
1620           HasCpp03IncompatibleFormat = true;
1621 
1622         if (Tok->PackingKind == PPK_BinPacked)
1623           HasBinPackedFunction = true;
1624         if (Tok->PackingKind == PPK_OnePerLine)
1625           HasOnePerLineFunction = true;
1626 
1627         Tok = Tok->Next;
1628       }
1629     }
1630     if (Style.DerivePointerBinding) {
1631       if (CountBoundToType > CountBoundToVariable)
1632         Style.PointerBindsToType = true;
1633       else if (CountBoundToType < CountBoundToVariable)
1634         Style.PointerBindsToType = false;
1635     }
1636     if (Style.Standard == FormatStyle::LS_Auto) {
1637       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1638                                                   : FormatStyle::LS_Cpp03;
1639     }
1640     BinPackInconclusiveFunctions =
1641         HasBinPackedFunction || !HasOnePerLineFunction;
1642   }
1643 
1644   /// \brief Get the indent of \p Level from \p IndentForLevel.
1645   ///
1646   /// \p IndentForLevel must contain the indent for the level \c l
1647   /// at \p IndentForLevel[l], or a value < 0 if the indent for
1648   /// that level is unknown.
1649   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1650     if (IndentForLevel[Level] != -1)
1651       return IndentForLevel[Level];
1652     if (Level == 0)
1653       return 0;
1654     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
1655   }
1656 
1657   /// \brief Get the offset of the line relatively to the level.
1658   ///
1659   /// For example, 'public:' labels in classes are offset by 1 or 2
1660   /// characters to the left from their level.
1661   int getIndentOffset(const FormatToken &RootToken) {
1662     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
1663       return Style.AccessModifierOffset;
1664     return 0;
1665   }
1666 
1667   /// \brief Tries to merge lines into one.
1668   ///
1669   /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1670   /// if possible; note that \c I will be incremented when lines are merged.
1671   void tryFitMultipleLinesInOne(unsigned Indent,
1672                                 std::vector<AnnotatedLine>::iterator &I,
1673                                 std::vector<AnnotatedLine>::iterator E) {
1674     // We can never merge stuff if there are trailing line comments.
1675     if (I->Last->Type == TT_LineComment)
1676       return;
1677 
1678     if (Indent > Style.ColumnLimit)
1679       return;
1680 
1681     unsigned Limit = Style.ColumnLimit - Indent;
1682     // If we already exceed the column limit, we set 'Limit' to 0. The different
1683     // tryMerge..() functions can then decide whether to still do merging.
1684     Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
1685 
1686     if (I + 1 == E || (I + 1)->Type == LT_Invalid)
1687       return;
1688 
1689     if (I->Last->is(tok::l_brace)) {
1690       tryMergeSimpleBlock(I, E, Limit);
1691     } else if (Style.AllowShortIfStatementsOnASingleLine &&
1692                I->First->is(tok::kw_if)) {
1693       tryMergeSimpleControlStatement(I, E, Limit);
1694     } else if (Style.AllowShortLoopsOnASingleLine &&
1695                I->First->isOneOf(tok::kw_for, tok::kw_while)) {
1696       tryMergeSimpleControlStatement(I, E, Limit);
1697     } else if (I->InPPDirective &&
1698                (I->First->HasUnescapedNewline || I->First->IsFirst)) {
1699       tryMergeSimplePPDirective(I, E, Limit);
1700     }
1701   }
1702 
1703   void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
1704                                  std::vector<AnnotatedLine>::iterator E,
1705                                  unsigned Limit) {
1706     if (Limit == 0)
1707       return;
1708     AnnotatedLine &Line = *I;
1709     if (!(I + 1)->InPPDirective || (I + 1)->First->HasUnescapedNewline)
1710       return;
1711     if (I + 2 != E && (I + 2)->InPPDirective &&
1712         !(I + 2)->First->HasUnescapedNewline)
1713       return;
1714     if (1 + (I + 1)->Last->TotalLength > Limit)
1715       return;
1716     join(Line, *(++I));
1717   }
1718 
1719   void tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator &I,
1720                                       std::vector<AnnotatedLine>::iterator E,
1721                                       unsigned Limit) {
1722     if (Limit == 0)
1723       return;
1724     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
1725         (I + 1)->First->is(tok::l_brace))
1726       return;
1727     if ((I + 1)->InPPDirective != I->InPPDirective ||
1728         ((I + 1)->InPPDirective && (I + 1)->First->HasUnescapedNewline))
1729       return;
1730     AnnotatedLine &Line = *I;
1731     if (Line.Last->isNot(tok::r_paren))
1732       return;
1733     if (1 + (I + 1)->Last->TotalLength > Limit)
1734       return;
1735     if ((I + 1)->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
1736                                 tok::kw_while) ||
1737         (I + 1)->First->Type == TT_LineComment)
1738       return;
1739     // Only inline simple if's (no nested if or else).
1740     if (I + 2 != E && Line.First->is(tok::kw_if) &&
1741         (I + 2)->First->is(tok::kw_else))
1742       return;
1743     join(Line, *(++I));
1744   }
1745 
1746   void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
1747                            std::vector<AnnotatedLine>::iterator E,
1748                            unsigned Limit) {
1749     // No merging if the brace already is on the next line.
1750     if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
1751       return;
1752 
1753     // First, check that the current line allows merging. This is the case if
1754     // we're not in a control flow statement and the last token is an opening
1755     // brace.
1756     AnnotatedLine &Line = *I;
1757     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1758                             tok::kw_else, tok::kw_try, tok::kw_catch,
1759                             tok::kw_for,
1760                             // This gets rid of all ObjC @ keywords and methods.
1761                             tok::at, tok::minus, tok::plus))
1762       return;
1763 
1764     FormatToken *Tok = (I + 1)->First;
1765     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
1766         (Tok->getNextNonComment() == NULL ||
1767          Tok->getNextNonComment()->is(tok::semi))) {
1768       // We merge empty blocks even if the line exceeds the column limit.
1769       Tok->SpacesRequiredBefore = 0;
1770       Tok->CanBreakBefore = true;
1771       join(Line, *(I + 1));
1772       I += 1;
1773     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
1774       // Check that we still have three lines and they fit into the limit.
1775       if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
1776           !nextTwoLinesFitInto(I, Limit))
1777         return;
1778 
1779       // Second, check that the next line does not contain any braces - if it
1780       // does, readability declines when putting it into a single line.
1781       if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1782         return;
1783       do {
1784         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1785           return;
1786         Tok = Tok->Next;
1787       } while (Tok != NULL);
1788 
1789       // Last, check that the third line contains a single closing brace.
1790       Tok = (I + 2)->First;
1791       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
1792           Tok->MustBreakBefore)
1793         return;
1794 
1795       join(Line, *(I + 1));
1796       join(Line, *(I + 2));
1797       I += 2;
1798     }
1799   }
1800 
1801   bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
1802                            unsigned Limit) {
1803     return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
1804            Limit;
1805   }
1806 
1807   void join(AnnotatedLine &A, const AnnotatedLine &B) {
1808     assert(!A.Last->Next);
1809     assert(!B.First->Previous);
1810     A.Last->Next = B.First;
1811     B.First->Previous = A.Last;
1812     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
1813     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
1814       Tok->TotalLength += LengthA;
1815       A.Last = Tok;
1816     }
1817   }
1818 
1819   bool touchesRanges(const CharSourceRange &Range) {
1820     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1821       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1822                                                Ranges[i].getBegin()) &&
1823           !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1824                                                Range.getBegin()))
1825         return true;
1826     }
1827     return false;
1828   }
1829 
1830   bool touchesLine(const AnnotatedLine &TheLine) {
1831     const FormatToken *First = TheLine.First;
1832     const FormatToken *Last = TheLine.Last;
1833     CharSourceRange LineRange = CharSourceRange::getCharRange(
1834         First->WhitespaceRange.getBegin().getLocWithOffset(
1835             First->LastNewlineOffset),
1836         Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1));
1837     return touchesRanges(LineRange);
1838   }
1839 
1840   bool touchesPPDirective(std::vector<AnnotatedLine>::iterator I,
1841                           std::vector<AnnotatedLine>::iterator E) {
1842     for (; I != E; ++I) {
1843       if (I->First->HasUnescapedNewline)
1844         return false;
1845       if (touchesLine(*I))
1846         return true;
1847     }
1848     return false;
1849   }
1850 
1851   bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1852     const FormatToken *First = TheLine.First;
1853     CharSourceRange LineRange = CharSourceRange::getCharRange(
1854         First->WhitespaceRange.getBegin(),
1855         First->WhitespaceRange.getBegin().getLocWithOffset(
1856             First->LastNewlineOffset));
1857     return touchesRanges(LineRange);
1858   }
1859 
1860   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1861     AnnotatedLines.push_back(AnnotatedLine(TheLine));
1862   }
1863 
1864   /// \brief Add a new line and the required indent before the first Token
1865   /// of the \c UnwrappedLine if there was no structural parsing error.
1866   /// Returns the indent level of the \c UnwrappedLine.
1867   void formatFirstToken(const FormatToken &RootToken,
1868                         const FormatToken *PreviousToken, unsigned Indent,
1869                         bool InPPDirective) {
1870     unsigned Newlines =
1871         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1872     // Remove empty lines before "}" where applicable.
1873     if (RootToken.is(tok::r_brace) &&
1874         (!RootToken.Next ||
1875          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
1876       Newlines = std::min(Newlines, 1u);
1877     if (Newlines == 0 && !RootToken.IsFirst)
1878       Newlines = 1;
1879 
1880     // Insert extra new line before access specifiers.
1881     if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
1882         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
1883       ++Newlines;
1884 
1885     Whitespaces.replaceWhitespace(
1886         RootToken, Newlines, Indent, Indent,
1887         InPPDirective && !RootToken.HasUnescapedNewline);
1888   }
1889 
1890   FormatStyle Style;
1891   Lexer &Lex;
1892   SourceManager &SourceMgr;
1893   WhitespaceManager Whitespaces;
1894   std::vector<CharSourceRange> Ranges;
1895   std::vector<AnnotatedLine> AnnotatedLines;
1896 
1897   encoding::Encoding Encoding;
1898   bool BinPackInconclusiveFunctions;
1899 };
1900 
1901 } // end anonymous namespace
1902 
1903 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1904                                SourceManager &SourceMgr,
1905                                std::vector<CharSourceRange> Ranges) {
1906   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1907   return formatter.format();
1908 }
1909 
1910 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1911                                std::vector<tooling::Range> Ranges,
1912                                StringRef FileName) {
1913   FileManager Files((FileSystemOptions()));
1914   DiagnosticsEngine Diagnostics(
1915       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1916       new DiagnosticOptions);
1917   SourceManager SourceMgr(Diagnostics, Files);
1918   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1919   const clang::FileEntry *Entry =
1920       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1921   SourceMgr.overrideFileContents(Entry, Buf);
1922   FileID ID =
1923       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1924   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1925             getFormattingLangOpts(Style.Standard));
1926   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1927   std::vector<CharSourceRange> CharRanges;
1928   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1929     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1930     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1931     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1932   }
1933   return reformat(Style, Lex, SourceMgr, CharRanges);
1934 }
1935 
1936 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1937   LangOptions LangOpts;
1938   LangOpts.CPlusPlus = 1;
1939   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1940   LangOpts.LineComment = 1;
1941   LangOpts.Bool = 1;
1942   LangOpts.ObjC1 = 1;
1943   LangOpts.ObjC2 = 1;
1944   return LangOpts;
1945 }
1946 
1947 } // namespace format
1948 } // namespace clang
1949