1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/YAMLTraits.h"
30 #include "llvm/Support/Path.h"
31 #include <queue>
32 #include <string>
33 
34 namespace llvm {
35 namespace yaml {
36 template <>
37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
38   static void enumeration(IO &IO,
39                           clang::format::FormatStyle::LanguageStandard &Value) {
40     IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03);
41     IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
42     IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11);
43     IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
44     IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
45   }
46 };
47 
48 template <>
49 struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> {
50   static void enumeration(IO &IO,
51                           clang::format::FormatStyle::UseTabStyle &Value) {
52     IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never);
53     IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never);
54     IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always);
55     IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always);
56     IO.enumCase(Value, "ForIndentation",
57                 clang::format::FormatStyle::UT_ForIndentation);
58   }
59 };
60 
61 template <>
62 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
63   static void
64   enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
65     IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
66     IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
67     IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
68     IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
69   }
70 };
71 
72 template <>
73 struct ScalarEnumerationTraits<
74     clang::format::FormatStyle::NamespaceIndentationKind> {
75   static void
76   enumeration(IO &IO,
77               clang::format::FormatStyle::NamespaceIndentationKind &Value) {
78     IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
79     IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
80     IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
81   }
82 };
83 
84 template <> struct MappingTraits<clang::format::FormatStyle> {
85   static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
86     if (IO.outputting()) {
87       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
88                                   "Mozilla", "WebKit" };
89       ArrayRef<StringRef> Styles(StylesArray);
90       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
91         StringRef StyleName(Styles[i]);
92         clang::format::FormatStyle PredefinedStyle;
93         if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
94             Style == PredefinedStyle) {
95           IO.mapOptional("# BasedOnStyle", StyleName);
96           break;
97         }
98       }
99     } else {
100       StringRef BasedOnStyle;
101       IO.mapOptional("BasedOnStyle", BasedOnStyle);
102       if (!BasedOnStyle.empty())
103         if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
104           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
105           return;
106         }
107     }
108 
109     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
110     IO.mapOptional("ConstructorInitializerIndentWidth",
111                    Style.ConstructorInitializerIndentWidth);
112     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
113     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
114     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
115                    Style.AllowAllParametersOfDeclarationOnNextLine);
116     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
117                    Style.AllowShortIfStatementsOnASingleLine);
118     IO.mapOptional("AllowShortLoopsOnASingleLine",
119                    Style.AllowShortLoopsOnASingleLine);
120     IO.mapOptional("AlwaysBreakTemplateDeclarations",
121                    Style.AlwaysBreakTemplateDeclarations);
122     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
123                    Style.AlwaysBreakBeforeMultilineStrings);
124     IO.mapOptional("BreakBeforeBinaryOperators",
125                    Style.BreakBeforeBinaryOperators);
126     IO.mapOptional("BreakConstructorInitializersBeforeComma",
127                    Style.BreakConstructorInitializersBeforeComma);
128     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
129     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
130     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
131                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
132     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
133     IO.mapOptional("ExperimentalAutoDetectBinPacking",
134                    Style.ExperimentalAutoDetectBinPacking);
135     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
136     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
137     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
138     IO.mapOptional("ObjCSpaceBeforeProtocolList",
139                    Style.ObjCSpaceBeforeProtocolList);
140     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
141                    Style.PenaltyBreakBeforeFirstCallParameter);
142     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
143     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
144     IO.mapOptional("PenaltyBreakFirstLessLess",
145                    Style.PenaltyBreakFirstLessLess);
146     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
147     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
148                    Style.PenaltyReturnTypeOnItsOwnLine);
149     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
150     IO.mapOptional("SpacesBeforeTrailingComments",
151                    Style.SpacesBeforeTrailingComments);
152     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
153     IO.mapOptional("Standard", Style.Standard);
154     IO.mapOptional("IndentWidth", Style.IndentWidth);
155     IO.mapOptional("TabWidth", Style.TabWidth);
156     IO.mapOptional("UseTab", Style.UseTab);
157     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
158     IO.mapOptional("IndentFunctionDeclarationAfterType",
159                    Style.IndentFunctionDeclarationAfterType);
160     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
161     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
162     IO.mapOptional("SpacesInCStyleCastParentheses",
163                    Style.SpacesInCStyleCastParentheses);
164     IO.mapOptional("SpaceAfterControlStatementKeyword",
165                    Style.SpaceAfterControlStatementKeyword);
166     IO.mapOptional("SpaceBeforeAssignmentOperators",
167                    Style.SpaceBeforeAssignmentOperators);
168     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
169   }
170 };
171 }
172 }
173 
174 namespace clang {
175 namespace format {
176 
177 void setDefaultPenalties(FormatStyle &Style) {
178   Style.PenaltyBreakComment = 60;
179   Style.PenaltyBreakFirstLessLess = 120;
180   Style.PenaltyBreakString = 1000;
181   Style.PenaltyExcessCharacter = 1000000;
182 }
183 
184 FormatStyle getLLVMStyle() {
185   FormatStyle LLVMStyle;
186   LLVMStyle.AccessModifierOffset = -2;
187   LLVMStyle.AlignEscapedNewlinesLeft = false;
188   LLVMStyle.AlignTrailingComments = true;
189   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
190   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
191   LLVMStyle.AllowShortLoopsOnASingleLine = false;
192   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
193   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
194   LLVMStyle.BinPackParameters = true;
195   LLVMStyle.BreakBeforeBinaryOperators = false;
196   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
197   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
198   LLVMStyle.ColumnLimit = 80;
199   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
200   LLVMStyle.ConstructorInitializerIndentWidth = 4;
201   LLVMStyle.Cpp11BracedListStyle = false;
202   LLVMStyle.DerivePointerBinding = false;
203   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
204   LLVMStyle.IndentCaseLabels = false;
205   LLVMStyle.IndentFunctionDeclarationAfterType = false;
206   LLVMStyle.IndentWidth = 2;
207   LLVMStyle.TabWidth = 8;
208   LLVMStyle.MaxEmptyLinesToKeep = 1;
209   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
210   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
211   LLVMStyle.PointerBindsToType = false;
212   LLVMStyle.SpacesBeforeTrailingComments = 1;
213   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
214   LLVMStyle.UseTab = FormatStyle::UT_Never;
215   LLVMStyle.SpacesInParentheses = false;
216   LLVMStyle.SpaceInEmptyParentheses = false;
217   LLVMStyle.SpacesInCStyleCastParentheses = false;
218   LLVMStyle.SpaceAfterControlStatementKeyword = true;
219   LLVMStyle.SpaceBeforeAssignmentOperators = true;
220   LLVMStyle.ContinuationIndentWidth = 4;
221 
222   setDefaultPenalties(LLVMStyle);
223   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
224   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
225 
226   return LLVMStyle;
227 }
228 
229 FormatStyle getGoogleStyle() {
230   FormatStyle GoogleStyle;
231   GoogleStyle.AccessModifierOffset = -1;
232   GoogleStyle.AlignEscapedNewlinesLeft = true;
233   GoogleStyle.AlignTrailingComments = true;
234   GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
235   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
236   GoogleStyle.AllowShortLoopsOnASingleLine = true;
237   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
238   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
239   GoogleStyle.BinPackParameters = true;
240   GoogleStyle.BreakBeforeBinaryOperators = false;
241   GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
242   GoogleStyle.BreakConstructorInitializersBeforeComma = false;
243   GoogleStyle.ColumnLimit = 80;
244   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
245   GoogleStyle.ConstructorInitializerIndentWidth = 4;
246   GoogleStyle.Cpp11BracedListStyle = true;
247   GoogleStyle.DerivePointerBinding = true;
248   GoogleStyle.ExperimentalAutoDetectBinPacking = false;
249   GoogleStyle.IndentCaseLabels = true;
250   GoogleStyle.IndentFunctionDeclarationAfterType = true;
251   GoogleStyle.IndentWidth = 2;
252   GoogleStyle.TabWidth = 8;
253   GoogleStyle.MaxEmptyLinesToKeep = 1;
254   GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
255   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
256   GoogleStyle.PointerBindsToType = true;
257   GoogleStyle.SpacesBeforeTrailingComments = 2;
258   GoogleStyle.Standard = FormatStyle::LS_Auto;
259   GoogleStyle.UseTab = FormatStyle::UT_Never;
260   GoogleStyle.SpacesInParentheses = false;
261   GoogleStyle.SpaceInEmptyParentheses = false;
262   GoogleStyle.SpacesInCStyleCastParentheses = false;
263   GoogleStyle.SpaceAfterControlStatementKeyword = true;
264   GoogleStyle.SpaceBeforeAssignmentOperators = true;
265   GoogleStyle.ContinuationIndentWidth = 4;
266 
267   setDefaultPenalties(GoogleStyle);
268   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
269   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
270 
271   return GoogleStyle;
272 }
273 
274 FormatStyle getChromiumStyle() {
275   FormatStyle ChromiumStyle = getGoogleStyle();
276   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
277   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
278   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
279   ChromiumStyle.BinPackParameters = false;
280   ChromiumStyle.DerivePointerBinding = false;
281   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
282   return ChromiumStyle;
283 }
284 
285 FormatStyle getMozillaStyle() {
286   FormatStyle MozillaStyle = getLLVMStyle();
287   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
288   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
289   MozillaStyle.DerivePointerBinding = true;
290   MozillaStyle.IndentCaseLabels = true;
291   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
292   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
293   MozillaStyle.PointerBindsToType = true;
294   return MozillaStyle;
295 }
296 
297 FormatStyle getWebKitStyle() {
298   FormatStyle Style = getLLVMStyle();
299   Style.AccessModifierOffset = -4;
300   Style.AlignTrailingComments = false;
301   Style.BreakBeforeBinaryOperators = true;
302   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
303   Style.BreakConstructorInitializersBeforeComma = true;
304   Style.ColumnLimit = 0;
305   Style.IndentWidth = 4;
306   Style.NamespaceIndentation = FormatStyle::NI_Inner;
307   Style.PointerBindsToType = true;
308   return Style;
309 }
310 
311 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
312   if (Name.equals_lower("llvm"))
313     *Style = getLLVMStyle();
314   else if (Name.equals_lower("chromium"))
315     *Style = getChromiumStyle();
316   else if (Name.equals_lower("mozilla"))
317     *Style = getMozillaStyle();
318   else if (Name.equals_lower("google"))
319     *Style = getGoogleStyle();
320   else if (Name.equals_lower("webkit"))
321     *Style = getWebKitStyle();
322   else
323     return false;
324 
325   return true;
326 }
327 
328 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
329   if (Text.trim().empty())
330     return llvm::make_error_code(llvm::errc::invalid_argument);
331   llvm::yaml::Input Input(Text);
332   Input >> *Style;
333   return Input.error();
334 }
335 
336 std::string configurationAsText(const FormatStyle &Style) {
337   std::string Text;
338   llvm::raw_string_ostream Stream(Text);
339   llvm::yaml::Output Output(Stream);
340   // We use the same mapping method for input and output, so we need a non-const
341   // reference here.
342   FormatStyle NonConstStyle = Style;
343   Output << NonConstStyle;
344   return Stream.str();
345 }
346 
347 namespace {
348 
349 class NoColumnLimitFormatter {
350 public:
351   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
352 
353   /// \brief Formats the line starting at \p State, simply keeping all of the
354   /// input's line breaking decisions.
355   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
356     LineState State =
357         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
358     while (State.NextToken != NULL) {
359       bool Newline =
360           Indenter->mustBreak(State) ||
361           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
362       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
363     }
364   }
365 
366 private:
367   ContinuationIndenter *Indenter;
368 };
369 
370 class UnwrappedLineFormatter {
371 public:
372   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
373                          WhitespaceManager *Whitespaces,
374                          const FormatStyle &Style, const AnnotatedLine &Line)
375       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), Line(Line),
376         Count(0) {}
377 
378   /// \brief Formats an \c UnwrappedLine and returns the penalty.
379   ///
380   /// If \p DryRun is \c false, directly applies the changes.
381   unsigned format(unsigned FirstIndent, bool DryRun = false) {
382     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
383 
384     // If the ObjC method declaration does not fit on a line, we should format
385     // it with one arg per line.
386     if (Line.Type == LT_ObjCMethodDecl)
387       State.Stack.back().BreakBeforeParameter = true;
388 
389     // Find best solution in solution space.
390     return analyzeSolutionSpace(State, DryRun);
391   }
392 
393 private:
394   /// \brief An edge in the solution space from \c Previous->State to \c State,
395   /// inserting a newline dependent on the \c NewLine.
396   struct StateNode {
397     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
398         : State(State), NewLine(NewLine), Previous(Previous) {}
399     LineState State;
400     bool NewLine;
401     StateNode *Previous;
402   };
403 
404   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
405   ///
406   /// In case of equal penalties, we want to prefer states that were inserted
407   /// first. During state generation we make sure that we insert states first
408   /// that break the line as late as possible.
409   typedef std::pair<unsigned, unsigned> OrderedPenalty;
410 
411   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
412   /// \c State has the given \c OrderedPenalty.
413   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
414 
415   /// \brief The BFS queue type.
416   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
417                               std::greater<QueueItem> > QueueType;
418 
419   /// \brief Analyze the entire solution space starting from \p InitialState.
420   ///
421   /// This implements a variant of Dijkstra's algorithm on the graph that spans
422   /// the solution space (\c LineStates are the nodes). The algorithm tries to
423   /// find the shortest path (the one with lowest penalty) from \p InitialState
424   /// to a state where all tokens are placed. Returns the penalty.
425   ///
426   /// If \p DryRun is \c false, directly applies the changes.
427   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
428     std::set<LineState> Seen;
429 
430     // Insert start element into queue.
431     StateNode *Node =
432         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
433     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
434     ++Count;
435 
436     unsigned Penalty = 0;
437 
438     // While not empty, take first element and follow edges.
439     while (!Queue.empty()) {
440       Penalty = Queue.top().first.first;
441       StateNode *Node = Queue.top().second;
442       if (Node->State.NextToken == NULL) {
443         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
444         break;
445       }
446       Queue.pop();
447 
448       // Cut off the analysis of certain solutions if the analysis gets too
449       // complex. See description of IgnoreStackForComparison.
450       if (Count > 10000)
451         Node->State.IgnoreStackForComparison = true;
452 
453       if (!Seen.insert(Node->State).second)
454         // State already examined with lower penalty.
455         continue;
456 
457       FormatDecision LastFormat = Node->State.NextToken->Decision;
458       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
459         addNextStateToQueue(Penalty, Node, /*NewLine=*/false);
460       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
461         addNextStateToQueue(Penalty, Node, /*NewLine=*/true);
462     }
463 
464     if (Queue.empty()) {
465       // We were unable to find a solution, do nothing.
466       // FIXME: Add diagnostic?
467       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
468       return 0;
469     }
470 
471     // Reconstruct the solution.
472     if (!DryRun)
473       reconstructPath(InitialState, Queue.top().second);
474 
475     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
476     DEBUG(llvm::dbgs() << "---\n");
477 
478     return Penalty;
479   }
480 
481   void reconstructPath(LineState &State, StateNode *Current) {
482     std::deque<StateNode *> Path;
483     // We do not need a break before the initial token.
484     while (Current->Previous) {
485       Path.push_front(Current);
486       Current = Current->Previous;
487     }
488     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
489          I != E; ++I) {
490       unsigned Penalty = 0;
491       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
492       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
493 
494       DEBUG({
495         if ((*I)->NewLine) {
496           llvm::dbgs() << "Penalty for placing "
497                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
498                        << Penalty << "\n";
499         }
500       });
501     }
502   }
503 
504   /// \brief Add the following state to the analysis queue \c Queue.
505   ///
506   /// Assume the current state is \p PreviousNode and has been reached with a
507   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
508   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
509                            bool NewLine) {
510     if (NewLine && !Indenter->canBreak(PreviousNode->State))
511       return;
512     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
513       return;
514 
515     StateNode *Node = new (Allocator.Allocate())
516         StateNode(PreviousNode->State, NewLine, PreviousNode);
517     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
518       return;
519 
520     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
521 
522     Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
523     ++Count;
524   }
525 
526   /// \brief If the \p State's next token is an r_brace closing a nested block,
527   /// format the nested block before it.
528   ///
529   /// Returns \c true if all children could be placed successfully and adapts
530   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
531   /// creates changes using \c Whitespaces.
532   ///
533   /// The crucial idea here is that children always get formatted upon
534   /// encountering the closing brace right after the nested block. Now, if we
535   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
536   /// \c false), the entire block has to be kept on the same line (which is only
537   /// possible if it fits on the line, only contains a single statement, etc.
538   ///
539   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
540   /// break after the "{", format all lines with correct indentation and the put
541   /// the closing "}" on yet another new line.
542   ///
543   /// This enables us to keep the simple structure of the
544   /// \c UnwrappedLineFormatter, where we only have two options for each token:
545   /// break or don't break.
546   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
547                       unsigned &Penalty) {
548     const FormatToken &Previous = *State.NextToken->Previous;
549     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
550     if (!LBrace || LBrace->isNot(tok::l_brace) ||
551         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
552       // The previous token does not open a block. Nothing to do. We don't
553       // assert so that we can simply call this function for all tokens.
554       return true;
555 
556     if (NewLine) {
557       unsigned ParentIndent = State.Stack.back().Indent;
558       for (SmallVector<AnnotatedLine *, 1>::const_iterator
559                I = Previous.Children.begin(),
560                E = Previous.Children.end();
561            I != E; ++I) {
562         unsigned Indent =
563             ParentIndent + ((*I)->Level - Line.Level - 1) * Style.IndentWidth;
564         if (!DryRun) {
565           unsigned Newlines = std::min((*I)->First->NewlinesBefore,
566                                        Style.MaxEmptyLinesToKeep + 1);
567           Newlines = std::max(1u, Newlines);
568           Whitespaces->replaceWhitespace(
569               *(*I)->First, Newlines, (*I)->Level, /*Spaces=*/Indent,
570               /*StartOfTokenColumn=*/Indent, Line.InPPDirective);
571         }
572         UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style, **I);
573         Penalty += Formatter.format(Indent, DryRun);
574       }
575       return true;
576     }
577 
578     if (Previous.Children.size() > 1)
579       return false; // Cannot merge multiple statements into a single line.
580 
581     // We can't put the closing "}" on a line with a trailing comment.
582     if (Previous.Children[0]->Last->isTrailingComment())
583       return false;
584 
585     if (!DryRun) {
586       Whitespaces->replaceWhitespace(
587           *Previous.Children[0]->First,
588           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
589           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
590       UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style,
591                                        *Previous.Children[0]);
592       Penalty += Formatter.format(State.Column + 1, DryRun);
593     }
594 
595     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
596     return true;
597   }
598 
599   ContinuationIndenter *Indenter;
600   WhitespaceManager *Whitespaces;
601   FormatStyle Style;
602   const AnnotatedLine &Line;
603 
604   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
605   QueueType Queue;
606   // Increasing count of \c StateNode items we have created. This is used
607   // to create a deterministic order independent of the container.
608   unsigned Count;
609 };
610 
611 class FormatTokenLexer {
612 public:
613   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
614                    encoding::Encoding Encoding)
615       : FormatTok(NULL), GreaterStashed(false), Column(0),
616         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
617         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
618     Lex.SetKeepWhitespaceMode(true);
619   }
620 
621   ArrayRef<FormatToken *> lex() {
622     assert(Tokens.empty());
623     do {
624       Tokens.push_back(getNextToken());
625       maybeJoinPreviousTokens();
626     } while (Tokens.back()->Tok.isNot(tok::eof));
627     return Tokens;
628   }
629 
630   IdentifierTable &getIdentTable() { return IdentTable; }
631 
632 private:
633   void maybeJoinPreviousTokens() {
634     if (Tokens.size() < 4)
635       return;
636     FormatToken *Last = Tokens.back();
637     if (!Last->is(tok::r_paren))
638       return;
639 
640     FormatToken *String = Tokens[Tokens.size() - 2];
641     if (!String->is(tok::string_literal) || String->IsMultiline)
642       return;
643 
644     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
645       return;
646 
647     FormatToken *Macro = Tokens[Tokens.size() - 4];
648     if (Macro->TokenText != "_T")
649       return;
650 
651     const char *Start = Macro->TokenText.data();
652     const char *End = Last->TokenText.data() + Last->TokenText.size();
653     String->TokenText = StringRef(Start, End - Start);
654     String->IsFirst = Macro->IsFirst;
655     String->LastNewlineOffset = Macro->LastNewlineOffset;
656     String->WhitespaceRange = Macro->WhitespaceRange;
657     String->OriginalColumn = Macro->OriginalColumn;
658     String->ColumnWidth = encoding::columnWidthWithTabs(
659         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
660 
661     Tokens.pop_back();
662     Tokens.pop_back();
663     Tokens.pop_back();
664     Tokens.back() = String;
665   }
666 
667   FormatToken *getNextToken() {
668     if (GreaterStashed) {
669       // Create a synthesized second '>' token.
670       // FIXME: Increment Column and set OriginalColumn.
671       Token Greater = FormatTok->Tok;
672       FormatTok = new (Allocator.Allocate()) FormatToken;
673       FormatTok->Tok = Greater;
674       SourceLocation GreaterLocation =
675           FormatTok->Tok.getLocation().getLocWithOffset(1);
676       FormatTok->WhitespaceRange =
677           SourceRange(GreaterLocation, GreaterLocation);
678       FormatTok->TokenText = ">";
679       FormatTok->ColumnWidth = 1;
680       GreaterStashed = false;
681       return FormatTok;
682     }
683 
684     FormatTok = new (Allocator.Allocate()) FormatToken;
685     readRawToken(*FormatTok);
686     SourceLocation WhitespaceStart =
687         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
688     if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
689       FormatTok->IsFirst = true;
690 
691     // Consume and record whitespace until we find a significant token.
692     unsigned WhitespaceLength = TrailingWhitespace;
693     while (FormatTok->Tok.is(tok::unknown)) {
694       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
695         switch (FormatTok->TokenText[i]) {
696         case '\n':
697           ++FormatTok->NewlinesBefore;
698           // FIXME: This is technically incorrect, as it could also
699           // be a literal backslash at the end of the line.
700           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
701                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
702                           FormatTok->TokenText[i - 2] != '\\')))
703             FormatTok->HasUnescapedNewline = true;
704           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
705           Column = 0;
706           break;
707         case '\r':
708         case '\f':
709         case '\v':
710           Column = 0;
711           break;
712         case ' ':
713           ++Column;
714           break;
715         case '\t':
716           Column += Style.TabWidth - Column % Style.TabWidth;
717           break;
718         case '\\':
719           ++Column;
720           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
721                              FormatTok->TokenText[i + 1] != '\n'))
722             FormatTok->Type = TT_ImplicitStringLiteral;
723           break;
724         default:
725           FormatTok->Type = TT_ImplicitStringLiteral;
726           ++Column;
727           break;
728         }
729       }
730 
731       if (FormatTok->Type == TT_ImplicitStringLiteral)
732         break;
733       WhitespaceLength += FormatTok->Tok.getLength();
734 
735       readRawToken(*FormatTok);
736     }
737 
738     // In case the token starts with escaped newlines, we want to
739     // take them into account as whitespace - this pattern is quite frequent
740     // in macro definitions.
741     // FIXME: Add a more explicit test.
742     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
743            FormatTok->TokenText[1] == '\n') {
744       // FIXME: ++FormatTok->NewlinesBefore is missing...
745       WhitespaceLength += 2;
746       Column = 0;
747       FormatTok->TokenText = FormatTok->TokenText.substr(2);
748     }
749 
750     FormatTok->WhitespaceRange = SourceRange(
751         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
752 
753     FormatTok->OriginalColumn = Column;
754 
755     TrailingWhitespace = 0;
756     if (FormatTok->Tok.is(tok::comment)) {
757       // FIXME: Add the trimmed whitespace to Column.
758       StringRef UntrimmedText = FormatTok->TokenText;
759       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
760       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
761     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
762       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
763       FormatTok->Tok.setIdentifierInfo(&Info);
764       FormatTok->Tok.setKind(Info.getTokenID());
765     } else if (FormatTok->Tok.is(tok::greatergreater)) {
766       FormatTok->Tok.setKind(tok::greater);
767       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
768       GreaterStashed = true;
769     }
770 
771     // Now FormatTok is the next non-whitespace token.
772 
773     StringRef Text = FormatTok->TokenText;
774     size_t FirstNewlinePos = Text.find('\n');
775     if (FirstNewlinePos == StringRef::npos) {
776       // FIXME: ColumnWidth actually depends on the start column, we need to
777       // take this into account when the token is moved.
778       FormatTok->ColumnWidth =
779           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
780       Column += FormatTok->ColumnWidth;
781     } else {
782       FormatTok->IsMultiline = true;
783       // FIXME: ColumnWidth actually depends on the start column, we need to
784       // take this into account when the token is moved.
785       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
786           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
787 
788       // The last line of the token always starts in column 0.
789       // Thus, the length can be precomputed even in the presence of tabs.
790       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
791           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
792           Encoding);
793       Column = FormatTok->LastLineColumnWidth;
794     }
795 
796     return FormatTok;
797   }
798 
799   FormatToken *FormatTok;
800   bool GreaterStashed;
801   unsigned Column;
802   unsigned TrailingWhitespace;
803   Lexer &Lex;
804   SourceManager &SourceMgr;
805   FormatStyle &Style;
806   IdentifierTable IdentTable;
807   encoding::Encoding Encoding;
808   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
809   SmallVector<FormatToken *, 16> Tokens;
810 
811   void readRawToken(FormatToken &Tok) {
812     Lex.LexFromRawLexer(Tok.Tok);
813     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
814                               Tok.Tok.getLength());
815     // For formatting, treat unterminated string literals like normal string
816     // literals.
817     if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
818         Tok.TokenText[0] == '"') {
819       Tok.Tok.setKind(tok::string_literal);
820       Tok.IsUnterminatedLiteral = true;
821     }
822   }
823 };
824 
825 class Formatter : public UnwrappedLineConsumer {
826 public:
827   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
828             const std::vector<CharSourceRange> &Ranges)
829       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
830         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
831         Ranges(Ranges), UnwrappedLines(1),
832         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
833     DEBUG(llvm::dbgs() << "File encoding: "
834                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
835                                                                : "unknown")
836                        << "\n");
837   }
838 
839   tooling::Replacements format() {
840     tooling::Replacements Result;
841     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
842 
843     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
844     bool StructuralError = Parser.parse();
845     assert(UnwrappedLines.rbegin()->empty());
846     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
847          ++Run) {
848       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
849       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
850       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
851         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
852       }
853       tooling::Replacements RunResult =
854           format(AnnotatedLines, StructuralError, Tokens);
855       DEBUG({
856         llvm::dbgs() << "Replacements for run " << Run << ":\n";
857         for (tooling::Replacements::iterator I = RunResult.begin(),
858                                              E = RunResult.end();
859              I != E; ++I) {
860           llvm::dbgs() << I->toString() << "\n";
861         }
862       });
863       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
864         delete AnnotatedLines[i];
865       }
866       Result.insert(RunResult.begin(), RunResult.end());
867       Whitespaces.reset();
868     }
869     return Result;
870   }
871 
872   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
873                                bool StructuralError, FormatTokenLexer &Tokens) {
874     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
875     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
876       Annotator.annotate(*AnnotatedLines[i]);
877     }
878     deriveLocalStyle(AnnotatedLines);
879     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
880       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
881     }
882 
883     Annotator.setCommentLineLevels(AnnotatedLines);
884 
885     std::vector<int> IndentForLevel;
886     bool PreviousLineWasTouched = false;
887     const AnnotatedLine *PreviousLine = NULL;
888     bool FormatPPDirective = false;
889     for (SmallVectorImpl<AnnotatedLine *>::iterator I = AnnotatedLines.begin(),
890                                                     E = AnnotatedLines.end();
891          I != E; ++I) {
892       const AnnotatedLine &TheLine = **I;
893       const FormatToken *FirstTok = TheLine.First;
894       int Offset = getIndentOffset(*TheLine.First);
895 
896       // Check whether this line is part of a formatted preprocessor directive.
897       if (FirstTok->HasUnescapedNewline)
898         FormatPPDirective = false;
899       if (!FormatPPDirective && TheLine.InPPDirective &&
900           (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
901         FormatPPDirective = true;
902 
903       // Determine indent and try to merge multiple unwrapped lines.
904       while (IndentForLevel.size() <= TheLine.Level)
905         IndentForLevel.push_back(-1);
906       IndentForLevel.resize(TheLine.Level + 1);
907       unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
908       if (static_cast<int>(Indent) + Offset >= 0)
909         Indent += Offset;
910       tryFitMultipleLinesInOne(Indent, I, E);
911 
912       bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
913       if (TheLine.First->is(tok::eof)) {
914         if (PreviousLineWasTouched) {
915           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
916           Whitespaces.replaceWhitespace(*TheLine.First, Newlines,
917                                         /*IndentLevel=*/0, /*Spaces=*/0,
918                                         /*TargetColumn=*/0);
919         }
920       } else if (TheLine.Type != LT_Invalid &&
921                  (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
922         unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
923         if (FirstTok->WhitespaceRange.isValid()) {
924           formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
925                            TheLine.InPPDirective);
926         } else {
927           Indent = LevelIndent = FirstTok->OriginalColumn;
928         }
929         ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
930                                       BinPackInconclusiveFunctions);
931 
932         // If everything fits on a single line, just put it there.
933         unsigned ColumnLimit = Style.ColumnLimit;
934         AnnotatedLine *NextLine = *(I + 1);
935         if ((I + 1) != E && NextLine->InPPDirective &&
936             !NextLine->First->HasUnescapedNewline)
937           ColumnLimit = getColumnLimit(TheLine.InPPDirective);
938 
939         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
940           LineState State =
941               Indenter.getInitialState(Indent, &TheLine, /*DryRun=*/false);
942           while (State.NextToken != NULL)
943             Indenter.addTokenToState(State, false, false);
944         } else if (Style.ColumnLimit == 0) {
945           NoColumnLimitFormatter Formatter(&Indenter);
946           Formatter.format(Indent, &TheLine);
947         } else {
948           UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style,
949                                            TheLine);
950           Formatter.format(Indent);
951         }
952 
953         IndentForLevel[TheLine.Level] = LevelIndent;
954         PreviousLineWasTouched = true;
955       } else {
956         // Format the first token if necessary, and notify the WhitespaceManager
957         // about the unchanged whitespace.
958         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
959           if (Tok == TheLine.First &&
960               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
961             unsigned LevelIndent = Tok->OriginalColumn;
962             // Remove trailing whitespace of the previous line if it was
963             // touched.
964             if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
965               formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
966                                TheLine.InPPDirective);
967             } else {
968               Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
969             }
970 
971             if (static_cast<int>(LevelIndent) - Offset >= 0)
972               LevelIndent -= Offset;
973             if (Tok->isNot(tok::comment))
974               IndentForLevel[TheLine.Level] = LevelIndent;
975           } else {
976             Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
977           }
978         }
979         // If we did not reformat this unwrapped line, the column at the end of
980         // the last token is unchanged - thus, we can calculate the end of the
981         // last token.
982         PreviousLineWasTouched = false;
983       }
984       for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
985         Tok->Finalized = true;
986       }
987       PreviousLine = *I;
988     }
989     return Whitespaces.generateReplacements();
990   }
991 
992 private:
993   static bool inputUsesCRLF(StringRef Text) {
994     return Text.count('\r') * 2 > Text.count('\n');
995   }
996 
997   void
998   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
999     unsigned CountBoundToVariable = 0;
1000     unsigned CountBoundToType = 0;
1001     bool HasCpp03IncompatibleFormat = false;
1002     bool HasBinPackedFunction = false;
1003     bool HasOnePerLineFunction = false;
1004     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1005       if (!AnnotatedLines[i]->First->Next)
1006         continue;
1007       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1008       while (Tok->Next) {
1009         if (Tok->Type == TT_PointerOrReference) {
1010           bool SpacesBefore =
1011               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1012           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1013                              Tok->Next->WhitespaceRange.getEnd();
1014           if (SpacesBefore && !SpacesAfter)
1015             ++CountBoundToVariable;
1016           else if (!SpacesBefore && SpacesAfter)
1017             ++CountBoundToType;
1018         }
1019 
1020         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1021           if (Tok->is(tok::coloncolon) &&
1022               Tok->Previous->Type == TT_TemplateOpener)
1023             HasCpp03IncompatibleFormat = true;
1024           if (Tok->Type == TT_TemplateCloser &&
1025               Tok->Previous->Type == TT_TemplateCloser)
1026             HasCpp03IncompatibleFormat = true;
1027         }
1028 
1029         if (Tok->PackingKind == PPK_BinPacked)
1030           HasBinPackedFunction = true;
1031         if (Tok->PackingKind == PPK_OnePerLine)
1032           HasOnePerLineFunction = true;
1033 
1034         Tok = Tok->Next;
1035       }
1036     }
1037     if (Style.DerivePointerBinding) {
1038       if (CountBoundToType > CountBoundToVariable)
1039         Style.PointerBindsToType = true;
1040       else if (CountBoundToType < CountBoundToVariable)
1041         Style.PointerBindsToType = false;
1042     }
1043     if (Style.Standard == FormatStyle::LS_Auto) {
1044       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1045                                                   : FormatStyle::LS_Cpp03;
1046     }
1047     BinPackInconclusiveFunctions =
1048         HasBinPackedFunction || !HasOnePerLineFunction;
1049   }
1050 
1051   /// \brief Get the indent of \p Level from \p IndentForLevel.
1052   ///
1053   /// \p IndentForLevel must contain the indent for the level \c l
1054   /// at \p IndentForLevel[l], or a value < 0 if the indent for
1055   /// that level is unknown.
1056   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1057     if (IndentForLevel[Level] != -1)
1058       return IndentForLevel[Level];
1059     if (Level == 0)
1060       return 0;
1061     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
1062   }
1063 
1064   /// \brief Get the offset of the line relatively to the level.
1065   ///
1066   /// For example, 'public:' labels in classes are offset by 1 or 2
1067   /// characters to the left from their level.
1068   int getIndentOffset(const FormatToken &RootToken) {
1069     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
1070       return Style.AccessModifierOffset;
1071     return 0;
1072   }
1073 
1074   /// \brief Tries to merge lines into one.
1075   ///
1076   /// This will change \c Line and \c AnnotatedLine to contain the merged line,
1077   /// if possible; note that \c I will be incremented when lines are merged.
1078   void tryFitMultipleLinesInOne(unsigned Indent,
1079                                 SmallVectorImpl<AnnotatedLine *>::iterator &I,
1080                                 SmallVectorImpl<AnnotatedLine *>::iterator E) {
1081     // We can never merge stuff if there are trailing line comments.
1082     AnnotatedLine *TheLine = *I;
1083     if (TheLine->Last->Type == TT_LineComment)
1084       return;
1085 
1086     if (Indent > Style.ColumnLimit)
1087       return;
1088 
1089     unsigned Limit = Style.ColumnLimit - Indent;
1090     // If we already exceed the column limit, we set 'Limit' to 0. The different
1091     // tryMerge..() functions can then decide whether to still do merging.
1092     Limit = TheLine->Last->TotalLength > Limit
1093                 ? 0
1094                 : Limit - TheLine->Last->TotalLength;
1095 
1096     if (I + 1 == E || (*(I + 1))->Type == LT_Invalid)
1097       return;
1098 
1099     if (TheLine->Last->is(tok::l_brace)) {
1100       tryMergeSimpleBlock(I, E, Limit);
1101     } else if (Style.AllowShortIfStatementsOnASingleLine &&
1102                TheLine->First->is(tok::kw_if)) {
1103       tryMergeSimpleControlStatement(I, E, Limit);
1104     } else if (Style.AllowShortLoopsOnASingleLine &&
1105                TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
1106       tryMergeSimpleControlStatement(I, E, Limit);
1107     } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline ||
1108                                           TheLine->First->IsFirst)) {
1109       tryMergeSimplePPDirective(I, E, Limit);
1110     }
1111   }
1112 
1113   void tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1114                                  SmallVectorImpl<AnnotatedLine *>::iterator E,
1115                                  unsigned Limit) {
1116     if (Limit == 0)
1117       return;
1118     AnnotatedLine &Line = **I;
1119     if (!(*(I + 1))->InPPDirective || (*(I + 1))->First->HasUnescapedNewline)
1120       return;
1121     if (I + 2 != E && (*(I + 2))->InPPDirective &&
1122         !(*(I + 2))->First->HasUnescapedNewline)
1123       return;
1124     if (1 + (*(I + 1))->Last->TotalLength > Limit)
1125       return;
1126     join(Line, **(++I));
1127   }
1128 
1129   void
1130   tryMergeSimpleControlStatement(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1131                                  SmallVectorImpl<AnnotatedLine *>::iterator E,
1132                                  unsigned Limit) {
1133     if (Limit == 0)
1134       return;
1135     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
1136         (*(I + 1))->First->is(tok::l_brace))
1137       return;
1138     if ((*(I + 1))->InPPDirective != (*I)->InPPDirective ||
1139         ((*(I + 1))->InPPDirective && (*(I + 1))->First->HasUnescapedNewline))
1140       return;
1141     AnnotatedLine &Line = **I;
1142     if (Line.Last->isNot(tok::r_paren))
1143       return;
1144     if (1 + (*(I + 1))->Last->TotalLength > Limit)
1145       return;
1146     if ((*(I + 1))->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
1147                                    tok::kw_while) ||
1148         (*(I + 1))->First->Type == TT_LineComment)
1149       return;
1150     // Only inline simple if's (no nested if or else).
1151     if (I + 2 != E && Line.First->is(tok::kw_if) &&
1152         (*(I + 2))->First->is(tok::kw_else))
1153       return;
1154     join(Line, **(++I));
1155   }
1156 
1157   void tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::iterator &I,
1158                            SmallVectorImpl<AnnotatedLine *>::iterator E,
1159                            unsigned Limit) {
1160     // No merging if the brace already is on the next line.
1161     if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
1162       return;
1163 
1164     // First, check that the current line allows merging. This is the case if
1165     // we're not in a control flow statement and the last token is an opening
1166     // brace.
1167     AnnotatedLine &Line = **I;
1168     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
1169                             tok::kw_else, tok::kw_try, tok::kw_catch,
1170                             tok::kw_for,
1171                             // This gets rid of all ObjC @ keywords and methods.
1172                             tok::at, tok::minus, tok::plus))
1173       return;
1174 
1175     FormatToken *Tok = (*(I + 1))->First;
1176     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
1177         (Tok->getNextNonComment() == NULL ||
1178          Tok->getNextNonComment()->is(tok::semi))) {
1179       // We merge empty blocks even if the line exceeds the column limit.
1180       Tok->SpacesRequiredBefore = 0;
1181       Tok->CanBreakBefore = true;
1182       join(Line, **(I + 1));
1183       I += 1;
1184     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
1185       // Check that we still have three lines and they fit into the limit.
1186       if (I + 2 == E || (*(I + 2))->Type == LT_Invalid ||
1187           !nextTwoLinesFitInto(I, Limit))
1188         return;
1189 
1190       // Second, check that the next line does not contain any braces - if it
1191       // does, readability declines when putting it into a single line.
1192       if ((*(I + 1))->Last->Type == TT_LineComment || Tok->MustBreakBefore)
1193         return;
1194       do {
1195         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
1196           return;
1197         Tok = Tok->Next;
1198       } while (Tok != NULL);
1199 
1200       // Last, check that the third line contains a single closing brace.
1201       Tok = (*(I + 2))->First;
1202       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
1203           Tok->MustBreakBefore)
1204         return;
1205 
1206       join(Line, **(I + 1));
1207       join(Line, **(I + 2));
1208       I += 2;
1209     }
1210   }
1211 
1212   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::iterator I,
1213                            unsigned Limit) {
1214     return 1 + (*(I + 1))->Last->TotalLength + 1 +
1215                (*(I + 2))->Last->TotalLength <=
1216            Limit;
1217   }
1218 
1219   void join(AnnotatedLine &A, const AnnotatedLine &B) {
1220     assert(!A.Last->Next);
1221     assert(!B.First->Previous);
1222     A.Last->Next = B.First;
1223     B.First->Previous = A.Last;
1224     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
1225     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
1226       Tok->TotalLength += LengthA;
1227       A.Last = Tok;
1228     }
1229   }
1230 
1231   bool touchesRanges(const CharSourceRange &Range) {
1232     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1233       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
1234                                                Ranges[i].getBegin()) &&
1235           !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1236                                                Range.getBegin()))
1237         return true;
1238     }
1239     return false;
1240   }
1241 
1242   bool touchesLine(const AnnotatedLine &TheLine) {
1243     const FormatToken *First = TheLine.First;
1244     const FormatToken *Last = TheLine.Last;
1245     CharSourceRange LineRange = CharSourceRange::getCharRange(
1246         First->WhitespaceRange.getBegin().getLocWithOffset(
1247             First->LastNewlineOffset),
1248         Last->getStartOfNonWhitespace().getLocWithOffset(
1249             Last->TokenText.size() - 1));
1250     return touchesRanges(LineRange);
1251   }
1252 
1253   bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::iterator I,
1254                           SmallVectorImpl<AnnotatedLine *>::iterator E) {
1255     for (; I != E; ++I) {
1256       if ((*I)->First->HasUnescapedNewline)
1257         return false;
1258       if (touchesLine(**I))
1259         return true;
1260     }
1261     return false;
1262   }
1263 
1264   bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
1265     const FormatToken *First = TheLine.First;
1266     CharSourceRange LineRange = CharSourceRange::getCharRange(
1267         First->WhitespaceRange.getBegin(),
1268         First->WhitespaceRange.getBegin().getLocWithOffset(
1269             First->LastNewlineOffset));
1270     return touchesRanges(LineRange);
1271   }
1272 
1273   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1274     assert(!UnwrappedLines.empty());
1275     UnwrappedLines.back().push_back(TheLine);
1276   }
1277 
1278   virtual void finishRun() {
1279     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1280   }
1281 
1282   /// \brief Add a new line and the required indent before the first Token
1283   /// of the \c UnwrappedLine if there was no structural parsing error.
1284   void formatFirstToken(FormatToken &RootToken,
1285                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
1286                         unsigned Indent, bool InPPDirective) {
1287     unsigned Newlines =
1288         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1289     // Remove empty lines before "}" where applicable.
1290     if (RootToken.is(tok::r_brace) &&
1291         (!RootToken.Next ||
1292          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
1293       Newlines = std::min(Newlines, 1u);
1294     if (Newlines == 0 && !RootToken.IsFirst)
1295       Newlines = 1;
1296 
1297     // Insert extra new line before access specifiers.
1298     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
1299         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
1300       ++Newlines;
1301 
1302     // Remove empty lines after access specifiers.
1303     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
1304       Newlines = std::min(1u, Newlines);
1305 
1306     Whitespaces.replaceWhitespace(
1307         RootToken, Newlines, IndentLevel, Indent, Indent,
1308         InPPDirective && !RootToken.HasUnescapedNewline);
1309   }
1310 
1311   unsigned getColumnLimit(bool InPPDirective) const {
1312     // In preprocessor directives reserve two chars for trailing " \"
1313     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
1314   }
1315 
1316   FormatStyle Style;
1317   Lexer &Lex;
1318   SourceManager &SourceMgr;
1319   WhitespaceManager Whitespaces;
1320   std::vector<CharSourceRange> Ranges;
1321   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1322 
1323   encoding::Encoding Encoding;
1324   bool BinPackInconclusiveFunctions;
1325 };
1326 
1327 } // end anonymous namespace
1328 
1329 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1330                                SourceManager &SourceMgr,
1331                                std::vector<CharSourceRange> Ranges) {
1332   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1333   return formatter.format();
1334 }
1335 
1336 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1337                                std::vector<tooling::Range> Ranges,
1338                                StringRef FileName) {
1339   FileManager Files((FileSystemOptions()));
1340   DiagnosticsEngine Diagnostics(
1341       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1342       new DiagnosticOptions);
1343   SourceManager SourceMgr(Diagnostics, Files);
1344   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1345   const clang::FileEntry *Entry =
1346       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1347   SourceMgr.overrideFileContents(Entry, Buf);
1348   FileID ID =
1349       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1350   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1351             getFormattingLangOpts(Style.Standard));
1352   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1353   std::vector<CharSourceRange> CharRanges;
1354   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1355     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1356     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1357     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1358   }
1359   return reformat(Style, Lex, SourceMgr, CharRanges);
1360 }
1361 
1362 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1363   LangOptions LangOpts;
1364   LangOpts.CPlusPlus = 1;
1365   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1366   LangOpts.LineComment = 1;
1367   LangOpts.Bool = 1;
1368   LangOpts.ObjC1 = 1;
1369   LangOpts.ObjC2 = 1;
1370   return LangOpts;
1371 }
1372 
1373 const char *StyleOptionHelpDescription =
1374     "Coding style, currently supports:\n"
1375     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1376     "Use -style=file to load style configuration from\n"
1377     ".clang-format file located in one of the parent\n"
1378     "directories of the source file (or current\n"
1379     "directory for stdin).\n"
1380     "Use -style=\"{key: value, ...}\" to set specific\n"
1381     "parameters, e.g.:\n"
1382     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1383 
1384 FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
1385   // Fallback style in case the rest of this function can't determine a style.
1386   StringRef FallbackStyle = "LLVM";
1387   FormatStyle Style;
1388   getPredefinedStyle(FallbackStyle, &Style);
1389 
1390   if (StyleName.startswith("{")) {
1391     // Parse YAML/JSON style from the command line.
1392     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1393       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1394                    << FallbackStyle << " style\n";
1395     }
1396     return Style;
1397   }
1398 
1399   if (!StyleName.equals_lower("file")) {
1400     if (!getPredefinedStyle(StyleName, &Style))
1401       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1402                    << " style\n";
1403     return Style;
1404   }
1405 
1406   SmallString<128> Path(FileName);
1407   llvm::sys::fs::make_absolute(Path);
1408   for (StringRef Directory = Path; !Directory.empty();
1409        Directory = llvm::sys::path::parent_path(Directory)) {
1410     if (!llvm::sys::fs::is_directory(Directory))
1411       continue;
1412     SmallString<128> ConfigFile(Directory);
1413 
1414     llvm::sys::path::append(ConfigFile, ".clang-format");
1415     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1416     bool IsFile = false;
1417     // Ignore errors from is_regular_file: we only need to know if we can read
1418     // the file or not.
1419     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1420 
1421     if (!IsFile) {
1422       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1423       ConfigFile = Directory;
1424       llvm::sys::path::append(ConfigFile, "_clang-format");
1425       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1426       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1427     }
1428 
1429     if (IsFile) {
1430       OwningPtr<llvm::MemoryBuffer> Text;
1431       if (llvm::error_code ec =
1432               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1433         llvm::errs() << ec.message() << "\n";
1434         continue;
1435       }
1436       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1437         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1438                      << "\n";
1439         continue;
1440       }
1441       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1442       return Style;
1443     }
1444   }
1445   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1446                << " style\n";
1447   return Style;
1448 }
1449 
1450 } // namespace format
1451 } // namespace clang
1452