1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33 
34 using clang::format::FormatStyle;
35 
36 namespace llvm {
37 namespace yaml {
38 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
39   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
40     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
41     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
42     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
43   }
44 };
45 
46 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
47   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
48     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
49     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
50     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
51     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
52     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
53   }
54 };
55 
56 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
57   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
58     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
59     IO.enumCase(Value, "false", FormatStyle::UT_Never);
60     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
61     IO.enumCase(Value, "true", FormatStyle::UT_Always);
62     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
63   }
64 };
65 
66 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
67   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
68     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
69     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
70     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
71     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
72     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
73   }
74 };
75 
76 template <>
77 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
78   static void enumeration(IO &IO,
79                           FormatStyle::NamespaceIndentationKind &Value) {
80     IO.enumCase(Value, "None", FormatStyle::NI_None);
81     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
82     IO.enumCase(Value, "All", FormatStyle::NI_All);
83   }
84 };
85 
86 template <>
87 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
88   static void enumeration(IO &IO,
89                           FormatStyle::SpaceBeforeParensOptions &Value) {
90     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
91     IO.enumCase(Value, "ControlStatements",
92                 FormatStyle::SBPO_ControlStatements);
93     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
94 
95     // For backward compatibility.
96     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
97     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
98   }
99 };
100 
101 template <> struct MappingTraits<FormatStyle> {
102   static void mapping(IO &IO, FormatStyle &Style) {
103     // When reading, read the language first, we need it for getPredefinedStyle.
104     IO.mapOptional("Language", Style.Language);
105 
106     if (IO.outputting()) {
107       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
108                                   "Mozilla", "WebKit", "GNU" };
109       ArrayRef<StringRef> Styles(StylesArray);
110       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
111         StringRef StyleName(Styles[i]);
112         FormatStyle PredefinedStyle;
113         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
114             Style == PredefinedStyle) {
115           IO.mapOptional("# BasedOnStyle", StyleName);
116           break;
117         }
118       }
119     } else {
120       StringRef BasedOnStyle;
121       IO.mapOptional("BasedOnStyle", BasedOnStyle);
122       if (!BasedOnStyle.empty()) {
123         FormatStyle::LanguageKind OldLanguage = Style.Language;
124         FormatStyle::LanguageKind Language =
125             ((FormatStyle *)IO.getContext())->Language;
126         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
127           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
128           return;
129         }
130         Style.Language = OldLanguage;
131       }
132     }
133 
134     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
135     IO.mapOptional("ConstructorInitializerIndentWidth",
136                    Style.ConstructorInitializerIndentWidth);
137     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
138     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
139     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
140                    Style.AllowAllParametersOfDeclarationOnNextLine);
141     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
142                    Style.AllowShortIfStatementsOnASingleLine);
143     IO.mapOptional("AllowShortLoopsOnASingleLine",
144                    Style.AllowShortLoopsOnASingleLine);
145     IO.mapOptional("AllowShortFunctionsOnASingleLine",
146                    Style.AllowShortFunctionsOnASingleLine);
147     IO.mapOptional("AlwaysBreakTemplateDeclarations",
148                    Style.AlwaysBreakTemplateDeclarations);
149     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
150                    Style.AlwaysBreakBeforeMultilineStrings);
151     IO.mapOptional("BreakBeforeBinaryOperators",
152                    Style.BreakBeforeBinaryOperators);
153     IO.mapOptional("BreakBeforeTernaryOperators",
154                    Style.BreakBeforeTernaryOperators);
155     IO.mapOptional("BreakConstructorInitializersBeforeComma",
156                    Style.BreakConstructorInitializersBeforeComma);
157     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
158     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
159     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
160                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
161     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
162     IO.mapOptional("ExperimentalAutoDetectBinPacking",
163                    Style.ExperimentalAutoDetectBinPacking);
164     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
165     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
166     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
167     IO.mapOptional("ObjCSpaceBeforeProtocolList",
168                    Style.ObjCSpaceBeforeProtocolList);
169     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
170                    Style.PenaltyBreakBeforeFirstCallParameter);
171     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
172     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
173     IO.mapOptional("PenaltyBreakFirstLessLess",
174                    Style.PenaltyBreakFirstLessLess);
175     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
176     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
177                    Style.PenaltyReturnTypeOnItsOwnLine);
178     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
179     IO.mapOptional("SpacesBeforeTrailingComments",
180                    Style.SpacesBeforeTrailingComments);
181     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
182     IO.mapOptional("Standard", Style.Standard);
183     IO.mapOptional("IndentWidth", Style.IndentWidth);
184     IO.mapOptional("TabWidth", Style.TabWidth);
185     IO.mapOptional("UseTab", Style.UseTab);
186     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
187     IO.mapOptional("IndentFunctionDeclarationAfterType",
188                    Style.IndentFunctionDeclarationAfterType);
189     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
190     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
191     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
192     IO.mapOptional("SpacesInCStyleCastParentheses",
193                    Style.SpacesInCStyleCastParentheses);
194     IO.mapOptional("SpacesInContainerLiterals",
195                    Style.SpacesInContainerLiterals);
196     IO.mapOptional("SpaceBeforeAssignmentOperators",
197                    Style.SpaceBeforeAssignmentOperators);
198     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
199     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
200 
201     // For backward compatibility.
202     if (!IO.outputting()) {
203       IO.mapOptional("SpaceAfterControlStatementKeyword",
204                      Style.SpaceBeforeParens);
205     }
206     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
207   }
208 };
209 
210 // Allows to read vector<FormatStyle> while keeping default values.
211 // IO.getContext() should contain a pointer to the FormatStyle structure, that
212 // will be used to get default values for missing keys.
213 // If the first element has no Language specified, it will be treated as the
214 // default one for the following elements.
215 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
216   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
217     return Seq.size();
218   }
219   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
220                               size_t Index) {
221     if (Index >= Seq.size()) {
222       assert(Index == Seq.size());
223       FormatStyle Template;
224       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
225         Template = Seq[0];
226       } else {
227         Template = *((const FormatStyle*)IO.getContext());
228         Template.Language = FormatStyle::LK_None;
229       }
230       Seq.resize(Index + 1, Template);
231     }
232     return Seq[Index];
233   }
234 };
235 }
236 }
237 
238 namespace clang {
239 namespace format {
240 
241 FormatStyle getLLVMStyle() {
242   FormatStyle LLVMStyle;
243   LLVMStyle.Language = FormatStyle::LK_Cpp;
244   LLVMStyle.AccessModifierOffset = -2;
245   LLVMStyle.AlignEscapedNewlinesLeft = false;
246   LLVMStyle.AlignTrailingComments = true;
247   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
248   LLVMStyle.AllowShortFunctionsOnASingleLine = true;
249   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
250   LLVMStyle.AllowShortLoopsOnASingleLine = false;
251   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
252   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
253   LLVMStyle.BinPackParameters = true;
254   LLVMStyle.BreakBeforeBinaryOperators = false;
255   LLVMStyle.BreakBeforeTernaryOperators = true;
256   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
257   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
258   LLVMStyle.ColumnLimit = 80;
259   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
260   LLVMStyle.ConstructorInitializerIndentWidth = 4;
261   LLVMStyle.Cpp11BracedListStyle = false;
262   LLVMStyle.DerivePointerBinding = false;
263   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
264   LLVMStyle.IndentCaseLabels = false;
265   LLVMStyle.IndentFunctionDeclarationAfterType = false;
266   LLVMStyle.IndentWidth = 2;
267   LLVMStyle.TabWidth = 8;
268   LLVMStyle.MaxEmptyLinesToKeep = 1;
269   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
270   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
271   LLVMStyle.PointerBindsToType = false;
272   LLVMStyle.SpacesBeforeTrailingComments = 1;
273   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
274   LLVMStyle.UseTab = FormatStyle::UT_Never;
275   LLVMStyle.SpacesInParentheses = false;
276   LLVMStyle.SpaceInEmptyParentheses = false;
277   LLVMStyle.SpacesInContainerLiterals = true;
278   LLVMStyle.SpacesInCStyleCastParentheses = false;
279   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
280   LLVMStyle.SpaceBeforeAssignmentOperators = true;
281   LLVMStyle.ContinuationIndentWidth = 4;
282   LLVMStyle.SpacesInAngles = false;
283   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
284 
285   LLVMStyle.PenaltyBreakComment = 300;
286   LLVMStyle.PenaltyBreakFirstLessLess = 120;
287   LLVMStyle.PenaltyBreakString = 1000;
288   LLVMStyle.PenaltyExcessCharacter = 1000000;
289   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
290   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
291 
292   return LLVMStyle;
293 }
294 
295 FormatStyle getGoogleStyle() {
296   FormatStyle GoogleStyle = getLLVMStyle();
297   GoogleStyle.AccessModifierOffset = -1;
298   GoogleStyle.AlignEscapedNewlinesLeft = true;
299   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
300   GoogleStyle.AllowShortLoopsOnASingleLine = true;
301   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
302   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
303   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
304   GoogleStyle.Cpp11BracedListStyle = true;
305   GoogleStyle.DerivePointerBinding = true;
306   GoogleStyle.IndentCaseLabels = true;
307   GoogleStyle.IndentFunctionDeclarationAfterType = true;
308   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
309   GoogleStyle.PointerBindsToType = true;
310   GoogleStyle.SpacesBeforeTrailingComments = 2;
311   GoogleStyle.Standard = FormatStyle::LS_Auto;
312 
313   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
314   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
315 
316   return GoogleStyle;
317 }
318 
319 FormatStyle getGoogleJSStyle() {
320   FormatStyle GoogleJSStyle = getGoogleStyle();
321   GoogleJSStyle.Language = FormatStyle::LK_JavaScript;
322   GoogleJSStyle.BreakBeforeTernaryOperators = false;
323   GoogleJSStyle.SpacesInContainerLiterals = false;
324   return GoogleJSStyle;
325 }
326 
327 FormatStyle getGoogleProtoStyle() {
328   FormatStyle GoogleProtoStyle = getGoogleStyle();
329   GoogleProtoStyle.Language = FormatStyle::LK_Proto;
330   GoogleProtoStyle.AllowShortFunctionsOnASingleLine = false;
331   return GoogleProtoStyle;
332 }
333 
334 FormatStyle getChromiumStyle() {
335   FormatStyle ChromiumStyle = getGoogleStyle();
336   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
337   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
338   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
339   ChromiumStyle.BinPackParameters = false;
340   ChromiumStyle.DerivePointerBinding = false;
341   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
342   return ChromiumStyle;
343 }
344 
345 FormatStyle getMozillaStyle() {
346   FormatStyle MozillaStyle = getLLVMStyle();
347   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
348   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
349   MozillaStyle.DerivePointerBinding = true;
350   MozillaStyle.IndentCaseLabels = true;
351   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
352   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
353   MozillaStyle.PointerBindsToType = true;
354   return MozillaStyle;
355 }
356 
357 FormatStyle getWebKitStyle() {
358   FormatStyle Style = getLLVMStyle();
359   Style.AccessModifierOffset = -4;
360   Style.AlignTrailingComments = false;
361   Style.BreakBeforeBinaryOperators = true;
362   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
363   Style.BreakConstructorInitializersBeforeComma = true;
364   Style.ColumnLimit = 0;
365   Style.IndentWidth = 4;
366   Style.NamespaceIndentation = FormatStyle::NI_Inner;
367   Style.PointerBindsToType = true;
368   return Style;
369 }
370 
371 FormatStyle getGNUStyle() {
372   FormatStyle Style = getLLVMStyle();
373   Style.BreakBeforeBinaryOperators = true;
374   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
375   Style.BreakBeforeTernaryOperators = true;
376   Style.ColumnLimit = 79;
377   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
378   return Style;
379 }
380 
381 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
382                         FormatStyle *Style) {
383   if (Name.equals_lower("llvm")) {
384     *Style = getLLVMStyle();
385   } else if (Name.equals_lower("chromium")) {
386     *Style = getChromiumStyle();
387   } else if (Name.equals_lower("mozilla")) {
388     *Style = getMozillaStyle();
389   } else if (Name.equals_lower("google")) {
390     switch (Language) {
391     case FormatStyle::LK_JavaScript:
392       *Style = getGoogleJSStyle();
393       break;
394     case FormatStyle::LK_Proto:
395       *Style = getGoogleProtoStyle();
396       break;
397     default:
398       *Style = getGoogleStyle();
399     }
400   } else if (Name.equals_lower("webkit")) {
401     *Style = getWebKitStyle();
402   } else if (Name.equals_lower("gnu")) {
403     *Style = getGNUStyle();
404   } else {
405     return false;
406   }
407 
408   Style->Language = Language;
409   return true;
410 }
411 
412 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
413   assert(Style);
414   FormatStyle::LanguageKind Language = Style->Language;
415   assert(Language != FormatStyle::LK_None);
416   if (Text.trim().empty())
417     return llvm::make_error_code(llvm::errc::invalid_argument);
418 
419   std::vector<FormatStyle> Styles;
420   llvm::yaml::Input Input(Text);
421   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
422   // values for the fields, keys for which are missing from the configuration.
423   // Mapping also uses the context to get the language to find the correct
424   // base style.
425   Input.setContext(Style);
426   Input >> Styles;
427   if (Input.error())
428     return Input.error();
429 
430   for (unsigned i = 0; i < Styles.size(); ++i) {
431     // Ensures that only the first configuration can skip the Language option.
432     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
433       return llvm::make_error_code(llvm::errc::invalid_argument);
434     // Ensure that each language is configured at most once.
435     for (unsigned j = 0; j < i; ++j) {
436       if (Styles[i].Language == Styles[j].Language) {
437         DEBUG(llvm::dbgs()
438               << "Duplicate languages in the config file on positions " << j
439               << " and " << i << "\n");
440         return llvm::make_error_code(llvm::errc::invalid_argument);
441       }
442     }
443   }
444   // Look for a suitable configuration starting from the end, so we can
445   // find the configuration for the specific language first, and the default
446   // configuration (which can only be at slot 0) after it.
447   for (int i = Styles.size() - 1; i >= 0; --i) {
448     if (Styles[i].Language == Language ||
449         Styles[i].Language == FormatStyle::LK_None) {
450       *Style = Styles[i];
451       Style->Language = Language;
452       return llvm::make_error_code(llvm::errc::success);
453     }
454   }
455   return llvm::make_error_code(llvm::errc::not_supported);
456 }
457 
458 std::string configurationAsText(const FormatStyle &Style) {
459   std::string Text;
460   llvm::raw_string_ostream Stream(Text);
461   llvm::yaml::Output Output(Stream);
462   // We use the same mapping method for input and output, so we need a non-const
463   // reference here.
464   FormatStyle NonConstStyle = Style;
465   Output << NonConstStyle;
466   return Stream.str();
467 }
468 
469 namespace {
470 
471 class NoColumnLimitFormatter {
472 public:
473   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
474 
475   /// \brief Formats the line starting at \p State, simply keeping all of the
476   /// input's line breaking decisions.
477   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
478     LineState State =
479         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
480     while (State.NextToken != NULL) {
481       bool Newline =
482           Indenter->mustBreak(State) ||
483           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
484       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
485     }
486   }
487 
488 private:
489   ContinuationIndenter *Indenter;
490 };
491 
492 class LineJoiner {
493 public:
494   LineJoiner(const FormatStyle &Style) : Style(Style) {}
495 
496   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
497   unsigned
498   tryFitMultipleLinesInOne(unsigned Indent,
499                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
500                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
501     // We can never merge stuff if there are trailing line comments.
502     const AnnotatedLine *TheLine = *I;
503     if (TheLine->Last->Type == TT_LineComment)
504       return 0;
505 
506     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
507       return 0;
508 
509     unsigned Limit =
510         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
511     // If we already exceed the column limit, we set 'Limit' to 0. The different
512     // tryMerge..() functions can then decide whether to still do merging.
513     Limit = TheLine->Last->TotalLength > Limit
514                 ? 0
515                 : Limit - TheLine->Last->TotalLength;
516 
517     if (I + 1 == E || I[1]->Type == LT_Invalid)
518       return 0;
519 
520     if (TheLine->Last->Type == TT_FunctionLBrace &&
521         TheLine->First != TheLine->Last) {
522       return Style.AllowShortFunctionsOnASingleLine
523                  ? tryMergeSimpleBlock(I, E, Limit)
524                  : 0;
525     }
526     if (TheLine->Last->is(tok::l_brace)) {
527       return Style.BreakBeforeBraces == FormatStyle::BS_Attach
528                  ? tryMergeSimpleBlock(I, E, Limit)
529                  : 0;
530     }
531     if (I[1]->First->Type == TT_FunctionLBrace &&
532         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
533       // Check for Limit <= 2 to account for the " {".
534       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
535         return 0;
536       Limit -= 2;
537 
538       unsigned MergedLines = 0;
539       if (Style.AllowShortFunctionsOnASingleLine) {
540         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
541         // If we managed to merge the block, count the function header, which is
542         // on a separate line.
543         if (MergedLines > 0)
544           ++MergedLines;
545       }
546       return MergedLines;
547     }
548     if (TheLine->First->is(tok::kw_if)) {
549       return Style.AllowShortIfStatementsOnASingleLine
550                  ? tryMergeSimpleControlStatement(I, E, Limit)
551                  : 0;
552     }
553     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
554       return Style.AllowShortLoopsOnASingleLine
555                  ? tryMergeSimpleControlStatement(I, E, Limit)
556                  : 0;
557     }
558     if (TheLine->InPPDirective &&
559         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
560       return tryMergeSimplePPDirective(I, E, Limit);
561     }
562     return 0;
563   }
564 
565 private:
566   unsigned
567   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
568                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
569                             unsigned Limit) {
570     if (Limit == 0)
571       return 0;
572     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
573       return 0;
574     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
575       return 0;
576     if (1 + I[1]->Last->TotalLength > Limit)
577       return 0;
578     return 1;
579   }
580 
581   unsigned tryMergeSimpleControlStatement(
582       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
583       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
584     if (Limit == 0)
585       return 0;
586     if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
587          Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
588         I[1]->First->is(tok::l_brace))
589       return 0;
590     if (I[1]->InPPDirective != (*I)->InPPDirective ||
591         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
592       return 0;
593     AnnotatedLine &Line = **I;
594     if (Line.Last->isNot(tok::r_paren))
595       return 0;
596     if (1 + I[1]->Last->TotalLength > Limit)
597       return 0;
598     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
599                              tok::kw_while) ||
600         I[1]->First->Type == TT_LineComment)
601       return 0;
602     // Only inline simple if's (no nested if or else).
603     if (I + 2 != E && Line.First->is(tok::kw_if) &&
604         I[2]->First->is(tok::kw_else))
605       return 0;
606     return 1;
607   }
608 
609   unsigned
610   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
611                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
612                       unsigned Limit) {
613     // First, check that the current line allows merging. This is the case if
614     // we're not in a control flow statement and the last token is an opening
615     // brace.
616     AnnotatedLine &Line = **I;
617     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
618                             tok::kw_else, tok::kw_try, tok::kw_catch,
619                             tok::kw_for,
620                             // This gets rid of all ObjC @ keywords and methods.
621                             tok::at, tok::minus, tok::plus))
622       return 0;
623 
624     FormatToken *Tok = I[1]->First;
625     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
626         (Tok->getNextNonComment() == NULL ||
627          Tok->getNextNonComment()->is(tok::semi))) {
628       // We merge empty blocks even if the line exceeds the column limit.
629       Tok->SpacesRequiredBefore = 0;
630       Tok->CanBreakBefore = true;
631       return 1;
632     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
633       // Check that we still have three lines and they fit into the limit.
634       if (I + 2 == E || I[2]->Type == LT_Invalid)
635         return 0;
636 
637       if (!nextTwoLinesFitInto(I, Limit))
638         return 0;
639 
640       // Second, check that the next line does not contain any braces - if it
641       // does, readability declines when putting it into a single line.
642       if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
643         return 0;
644       do {
645         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
646           return 0;
647         Tok = Tok->Next;
648       } while (Tok != NULL);
649 
650       // Last, check that the third line contains a single closing brace.
651       Tok = I[2]->First;
652       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
653           Tok->MustBreakBefore)
654         return 0;
655 
656       return 2;
657     }
658     return 0;
659   }
660 
661   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
662                            unsigned Limit) {
663     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
664   }
665 
666   bool containsMustBreak(const AnnotatedLine *Line) {
667     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
668       if (Tok->MustBreakBefore)
669         return true;
670     }
671     return false;
672   }
673 
674   const FormatStyle &Style;
675 };
676 
677 class UnwrappedLineFormatter {
678 public:
679   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
680                          WhitespaceManager *Whitespaces,
681                          const FormatStyle &Style)
682       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
683         Joiner(Style) {}
684 
685   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
686                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
687     assert(!Lines.empty());
688     unsigned Penalty = 0;
689     std::vector<int> IndentForLevel;
690     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
691       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
692     const AnnotatedLine *PreviousLine = NULL;
693     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
694                                                           E = Lines.end();
695          I != E; ++I) {
696       const AnnotatedLine &TheLine = **I;
697       const FormatToken *FirstTok = TheLine.First;
698       int Offset = getIndentOffset(*FirstTok);
699 
700       // Determine indent and try to merge multiple unwrapped lines.
701       unsigned Indent;
702       if (TheLine.InPPDirective) {
703         Indent = TheLine.Level * Style.IndentWidth;
704       } else {
705         while (IndentForLevel.size() <= TheLine.Level)
706           IndentForLevel.push_back(-1);
707         IndentForLevel.resize(TheLine.Level + 1);
708         Indent = getIndent(IndentForLevel, TheLine.Level);
709       }
710       unsigned LevelIndent = Indent;
711       if (static_cast<int>(Indent) + Offset >= 0)
712         Indent += Offset;
713 
714       // Merge multiple lines if possible.
715       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
716       if (MergedLines > 0 && Style.ColumnLimit == 0) {
717         // Disallow line merging if there is a break at the start of one of the
718         // input lines.
719         for (unsigned i = 0; i < MergedLines; ++i) {
720           if (I[i + 1]->First->NewlinesBefore > 0)
721             MergedLines = 0;
722         }
723       }
724       if (!DryRun) {
725         for (unsigned i = 0; i < MergedLines; ++i) {
726           join(*I[i], *I[i + 1]);
727         }
728       }
729       I += MergedLines;
730 
731       bool FixIndentation =
732           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
733       if (TheLine.First->is(tok::eof)) {
734         if (PreviousLine && PreviousLine->Affected && !DryRun) {
735           // Remove the file's trailing whitespace.
736           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
737           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
738                                          /*IndentLevel=*/0, /*Spaces=*/0,
739                                          /*TargetColumn=*/0);
740         }
741       } else if (TheLine.Type != LT_Invalid &&
742                  (TheLine.Affected || FixIndentation)) {
743         if (FirstTok->WhitespaceRange.isValid()) {
744           if (!DryRun)
745             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
746                              Indent, TheLine.InPPDirective);
747         } else {
748           Indent = LevelIndent = FirstTok->OriginalColumn;
749         }
750 
751         // If everything fits on a single line, just put it there.
752         unsigned ColumnLimit = Style.ColumnLimit;
753         if (I + 1 != E) {
754           AnnotatedLine *NextLine = I[1];
755           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
756             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
757         }
758 
759         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
760           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
761           while (State.NextToken != NULL)
762             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
763         } else if (Style.ColumnLimit == 0) {
764           // FIXME: Implement nested blocks for ColumnLimit = 0.
765           NoColumnLimitFormatter Formatter(Indenter);
766           if (!DryRun)
767             Formatter.format(Indent, &TheLine);
768         } else {
769           Penalty += format(TheLine, Indent, DryRun);
770         }
771 
772         if (!TheLine.InPPDirective)
773           IndentForLevel[TheLine.Level] = LevelIndent;
774       } else if (TheLine.ChildrenAffected) {
775         format(TheLine.Children, DryRun);
776       } else {
777         // Format the first token if necessary, and notify the WhitespaceManager
778         // about the unchanged whitespace.
779         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
780           if (Tok == TheLine.First &&
781               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
782             unsigned LevelIndent = Tok->OriginalColumn;
783             if (!DryRun) {
784               // Remove trailing whitespace of the previous line.
785               if ((PreviousLine && PreviousLine->Affected) ||
786                   TheLine.LeadingEmptyLinesAffected) {
787                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
788                                  TheLine.InPPDirective);
789               } else {
790                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
791               }
792             }
793 
794             if (static_cast<int>(LevelIndent) - Offset >= 0)
795               LevelIndent -= Offset;
796             if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
797               IndentForLevel[TheLine.Level] = LevelIndent;
798           } else if (!DryRun) {
799             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
800           }
801         }
802       }
803       if (!DryRun) {
804         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
805           Tok->Finalized = true;
806         }
807       }
808       PreviousLine = *I;
809     }
810     return Penalty;
811   }
812 
813 private:
814   /// \brief Formats an \c AnnotatedLine and returns the penalty.
815   ///
816   /// If \p DryRun is \c false, directly applies the changes.
817   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
818                   bool DryRun) {
819     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
820 
821     // If the ObjC method declaration does not fit on a line, we should format
822     // it with one arg per line.
823     if (State.Line->Type == LT_ObjCMethodDecl)
824       State.Stack.back().BreakBeforeParameter = true;
825 
826     // Find best solution in solution space.
827     return analyzeSolutionSpace(State, DryRun);
828   }
829 
830   /// \brief An edge in the solution space from \c Previous->State to \c State,
831   /// inserting a newline dependent on the \c NewLine.
832   struct StateNode {
833     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
834         : State(State), NewLine(NewLine), Previous(Previous) {}
835     LineState State;
836     bool NewLine;
837     StateNode *Previous;
838   };
839 
840   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
841   ///
842   /// In case of equal penalties, we want to prefer states that were inserted
843   /// first. During state generation we make sure that we insert states first
844   /// that break the line as late as possible.
845   typedef std::pair<unsigned, unsigned> OrderedPenalty;
846 
847   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
848   /// \c State has the given \c OrderedPenalty.
849   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
850 
851   /// \brief The BFS queue type.
852   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
853                               std::greater<QueueItem> > QueueType;
854 
855   /// \brief Get the offset of the line relatively to the level.
856   ///
857   /// For example, 'public:' labels in classes are offset by 1 or 2
858   /// characters to the left from their level.
859   int getIndentOffset(const FormatToken &RootToken) {
860     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
861       return Style.AccessModifierOffset;
862     return 0;
863   }
864 
865   /// \brief Add a new line and the required indent before the first Token
866   /// of the \c UnwrappedLine if there was no structural parsing error.
867   void formatFirstToken(FormatToken &RootToken,
868                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
869                         unsigned Indent, bool InPPDirective) {
870     unsigned Newlines =
871         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
872     // Remove empty lines before "}" where applicable.
873     if (RootToken.is(tok::r_brace) &&
874         (!RootToken.Next ||
875          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
876       Newlines = std::min(Newlines, 1u);
877     if (Newlines == 0 && !RootToken.IsFirst)
878       Newlines = 1;
879 
880     // Insert extra new line before access specifiers.
881     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
882         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
883       ++Newlines;
884 
885     // Remove empty lines after access specifiers.
886     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
887       Newlines = std::min(1u, Newlines);
888 
889     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
890                                    Indent, InPPDirective &&
891                                                !RootToken.HasUnescapedNewline);
892   }
893 
894   /// \brief Get the indent of \p Level from \p IndentForLevel.
895   ///
896   /// \p IndentForLevel must contain the indent for the level \c l
897   /// at \p IndentForLevel[l], or a value < 0 if the indent for
898   /// that level is unknown.
899   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
900     if (IndentForLevel[Level] != -1)
901       return IndentForLevel[Level];
902     if (Level == 0)
903       return 0;
904     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
905   }
906 
907   void join(AnnotatedLine &A, const AnnotatedLine &B) {
908     assert(!A.Last->Next);
909     assert(!B.First->Previous);
910     if (B.Affected)
911       A.Affected = true;
912     A.Last->Next = B.First;
913     B.First->Previous = A.Last;
914     B.First->CanBreakBefore = true;
915     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
916     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
917       Tok->TotalLength += LengthA;
918       A.Last = Tok;
919     }
920   }
921 
922   unsigned getColumnLimit(bool InPPDirective) const {
923     // In preprocessor directives reserve two chars for trailing " \"
924     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
925   }
926 
927   /// \brief Analyze the entire solution space starting from \p InitialState.
928   ///
929   /// This implements a variant of Dijkstra's algorithm on the graph that spans
930   /// the solution space (\c LineStates are the nodes). The algorithm tries to
931   /// find the shortest path (the one with lowest penalty) from \p InitialState
932   /// to a state where all tokens are placed. Returns the penalty.
933   ///
934   /// If \p DryRun is \c false, directly applies the changes.
935   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
936     std::set<LineState> Seen;
937 
938     // Increasing count of \c StateNode items we have created. This is used to
939     // create a deterministic order independent of the container.
940     unsigned Count = 0;
941     QueueType Queue;
942 
943     // Insert start element into queue.
944     StateNode *Node =
945         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
946     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
947     ++Count;
948 
949     unsigned Penalty = 0;
950 
951     // While not empty, take first element and follow edges.
952     while (!Queue.empty()) {
953       Penalty = Queue.top().first.first;
954       StateNode *Node = Queue.top().second;
955       if (Node->State.NextToken == NULL) {
956         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
957         break;
958       }
959       Queue.pop();
960 
961       // Cut off the analysis of certain solutions if the analysis gets too
962       // complex. See description of IgnoreStackForComparison.
963       if (Count > 10000)
964         Node->State.IgnoreStackForComparison = true;
965 
966       if (!Seen.insert(Node->State).second)
967         // State already examined with lower penalty.
968         continue;
969 
970       FormatDecision LastFormat = Node->State.NextToken->Decision;
971       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
972         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
973       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
974         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
975     }
976 
977     if (Queue.empty()) {
978       // We were unable to find a solution, do nothing.
979       // FIXME: Add diagnostic?
980       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
981       return 0;
982     }
983 
984     // Reconstruct the solution.
985     if (!DryRun)
986       reconstructPath(InitialState, Queue.top().second);
987 
988     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
989     DEBUG(llvm::dbgs() << "---\n");
990 
991     return Penalty;
992   }
993 
994   void reconstructPath(LineState &State, StateNode *Current) {
995     std::deque<StateNode *> Path;
996     // We do not need a break before the initial token.
997     while (Current->Previous) {
998       Path.push_front(Current);
999       Current = Current->Previous;
1000     }
1001     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
1002          I != E; ++I) {
1003       unsigned Penalty = 0;
1004       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
1005       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
1006 
1007       DEBUG({
1008         if ((*I)->NewLine) {
1009           llvm::dbgs() << "Penalty for placing "
1010                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1011                        << Penalty << "\n";
1012         }
1013       });
1014     }
1015   }
1016 
1017   /// \brief Add the following state to the analysis queue \c Queue.
1018   ///
1019   /// Assume the current state is \p PreviousNode and has been reached with a
1020   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1021   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1022                            bool NewLine, unsigned *Count, QueueType *Queue) {
1023     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1024       return;
1025     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1026       return;
1027 
1028     StateNode *Node = new (Allocator.Allocate())
1029         StateNode(PreviousNode->State, NewLine, PreviousNode);
1030     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1031       return;
1032 
1033     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1034 
1035     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1036     ++(*Count);
1037   }
1038 
1039   /// \brief If the \p State's next token is an r_brace closing a nested block,
1040   /// format the nested block before it.
1041   ///
1042   /// Returns \c true if all children could be placed successfully and adapts
1043   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
1044   /// creates changes using \c Whitespaces.
1045   ///
1046   /// The crucial idea here is that children always get formatted upon
1047   /// encountering the closing brace right after the nested block. Now, if we
1048   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
1049   /// \c false), the entire block has to be kept on the same line (which is only
1050   /// possible if it fits on the line, only contains a single statement, etc.
1051   ///
1052   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1053   /// break after the "{", format all lines with correct indentation and the put
1054   /// the closing "}" on yet another new line.
1055   ///
1056   /// This enables us to keep the simple structure of the
1057   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1058   /// break or don't break.
1059   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1060                       unsigned &Penalty) {
1061     FormatToken &Previous = *State.NextToken->Previous;
1062     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1063     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1064         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1065       // The previous token does not open a block. Nothing to do. We don't
1066       // assert so that we can simply call this function for all tokens.
1067       return true;
1068 
1069     if (NewLine) {
1070       int AdditionalIndent = State.Stack.back().Indent -
1071                              Previous.Children[0]->Level * Style.IndentWidth;
1072       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1073                         /*FixBadIndentation=*/true);
1074       return true;
1075     }
1076 
1077     // Cannot merge multiple statements into a single line.
1078     if (Previous.Children.size() > 1)
1079       return false;
1080 
1081     // We can't put the closing "}" on a line with a trailing comment.
1082     if (Previous.Children[0]->Last->isTrailingComment())
1083       return false;
1084 
1085     if (!DryRun) {
1086       Whitespaces->replaceWhitespace(
1087           *Previous.Children[0]->First,
1088           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1089           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1090     }
1091     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1092 
1093     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1094     return true;
1095   }
1096 
1097   ContinuationIndenter *Indenter;
1098   WhitespaceManager *Whitespaces;
1099   FormatStyle Style;
1100   LineJoiner Joiner;
1101 
1102   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1103 };
1104 
1105 class FormatTokenLexer {
1106 public:
1107   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1108                    encoding::Encoding Encoding)
1109       : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
1110         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
1111         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
1112     Lex.SetKeepWhitespaceMode(true);
1113   }
1114 
1115   ArrayRef<FormatToken *> lex() {
1116     assert(Tokens.empty());
1117     do {
1118       Tokens.push_back(getNextToken());
1119       tryMergePreviousTokens();
1120     } while (Tokens.back()->Tok.isNot(tok::eof));
1121     return Tokens;
1122   }
1123 
1124   IdentifierTable &getIdentTable() { return IdentTable; }
1125 
1126 private:
1127   void tryMergePreviousTokens() {
1128     if (tryMerge_TMacro())
1129       return;
1130 
1131     if (Style.Language == FormatStyle::LK_JavaScript) {
1132       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1133       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1134       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1135                                                tok::greaterequal };
1136       // FIXME: We probably need to change token type to mimic operator with the
1137       // correct priority.
1138       if (tryMergeTokens(JSIdentity))
1139         return;
1140       if (tryMergeTokens(JSNotIdentity))
1141         return;
1142       if (tryMergeTokens(JSShiftEqual))
1143         return;
1144     }
1145   }
1146 
1147   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1148     if (Tokens.size() < Kinds.size())
1149       return false;
1150 
1151     SmallVectorImpl<FormatToken *>::const_iterator First =
1152         Tokens.end() - Kinds.size();
1153     if (!First[0]->is(Kinds[0]))
1154       return false;
1155     unsigned AddLength = 0;
1156     for (unsigned i = 1; i < Kinds.size(); ++i) {
1157       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1158                                          First[i]->WhitespaceRange.getEnd())
1159         return false;
1160       AddLength += First[i]->TokenText.size();
1161     }
1162     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1163     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1164                                     First[0]->TokenText.size() + AddLength);
1165     First[0]->ColumnWidth += AddLength;
1166     return true;
1167   }
1168 
1169   bool tryMerge_TMacro() {
1170     if (Tokens.size() < 4)
1171       return false;
1172     FormatToken *Last = Tokens.back();
1173     if (!Last->is(tok::r_paren))
1174       return false;
1175 
1176     FormatToken *String = Tokens[Tokens.size() - 2];
1177     if (!String->is(tok::string_literal) || String->IsMultiline)
1178       return false;
1179 
1180     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1181       return false;
1182 
1183     FormatToken *Macro = Tokens[Tokens.size() - 4];
1184     if (Macro->TokenText != "_T")
1185       return false;
1186 
1187     const char *Start = Macro->TokenText.data();
1188     const char *End = Last->TokenText.data() + Last->TokenText.size();
1189     String->TokenText = StringRef(Start, End - Start);
1190     String->IsFirst = Macro->IsFirst;
1191     String->LastNewlineOffset = Macro->LastNewlineOffset;
1192     String->WhitespaceRange = Macro->WhitespaceRange;
1193     String->OriginalColumn = Macro->OriginalColumn;
1194     String->ColumnWidth = encoding::columnWidthWithTabs(
1195         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1196 
1197     Tokens.pop_back();
1198     Tokens.pop_back();
1199     Tokens.pop_back();
1200     Tokens.back() = String;
1201     return true;
1202   }
1203 
1204   FormatToken *getNextToken() {
1205     if (GreaterStashed) {
1206       // Create a synthesized second '>' token.
1207       // FIXME: Increment Column and set OriginalColumn.
1208       Token Greater = FormatTok->Tok;
1209       FormatTok = new (Allocator.Allocate()) FormatToken;
1210       FormatTok->Tok = Greater;
1211       SourceLocation GreaterLocation =
1212           FormatTok->Tok.getLocation().getLocWithOffset(1);
1213       FormatTok->WhitespaceRange =
1214           SourceRange(GreaterLocation, GreaterLocation);
1215       FormatTok->TokenText = ">";
1216       FormatTok->ColumnWidth = 1;
1217       GreaterStashed = false;
1218       return FormatTok;
1219     }
1220 
1221     FormatTok = new (Allocator.Allocate()) FormatToken;
1222     readRawToken(*FormatTok);
1223     SourceLocation WhitespaceStart =
1224         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1225     FormatTok->IsFirst = IsFirstToken;
1226     IsFirstToken = false;
1227 
1228     // Consume and record whitespace until we find a significant token.
1229     unsigned WhitespaceLength = TrailingWhitespace;
1230     while (FormatTok->Tok.is(tok::unknown)) {
1231       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1232         switch (FormatTok->TokenText[i]) {
1233         case '\n':
1234           ++FormatTok->NewlinesBefore;
1235           // FIXME: This is technically incorrect, as it could also
1236           // be a literal backslash at the end of the line.
1237           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1238                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1239                           FormatTok->TokenText[i - 2] != '\\')))
1240             FormatTok->HasUnescapedNewline = true;
1241           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1242           Column = 0;
1243           break;
1244         case '\r':
1245         case '\f':
1246         case '\v':
1247           Column = 0;
1248           break;
1249         case ' ':
1250           ++Column;
1251           break;
1252         case '\t':
1253           Column += Style.TabWidth - Column % Style.TabWidth;
1254           break;
1255         case '\\':
1256           ++Column;
1257           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1258                              FormatTok->TokenText[i + 1] != '\n'))
1259             FormatTok->Type = TT_ImplicitStringLiteral;
1260           break;
1261         default:
1262           FormatTok->Type = TT_ImplicitStringLiteral;
1263           ++Column;
1264           break;
1265         }
1266       }
1267 
1268       if (FormatTok->Type == TT_ImplicitStringLiteral)
1269         break;
1270       WhitespaceLength += FormatTok->Tok.getLength();
1271 
1272       readRawToken(*FormatTok);
1273     }
1274 
1275     // In case the token starts with escaped newlines, we want to
1276     // take them into account as whitespace - this pattern is quite frequent
1277     // in macro definitions.
1278     // FIXME: Add a more explicit test.
1279     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1280            FormatTok->TokenText[1] == '\n') {
1281       // FIXME: ++FormatTok->NewlinesBefore is missing...
1282       WhitespaceLength += 2;
1283       Column = 0;
1284       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1285     }
1286 
1287     FormatTok->WhitespaceRange = SourceRange(
1288         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1289 
1290     FormatTok->OriginalColumn = Column;
1291 
1292     TrailingWhitespace = 0;
1293     if (FormatTok->Tok.is(tok::comment)) {
1294       // FIXME: Add the trimmed whitespace to Column.
1295       StringRef UntrimmedText = FormatTok->TokenText;
1296       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1297       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1298     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1299       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1300       FormatTok->Tok.setIdentifierInfo(&Info);
1301       FormatTok->Tok.setKind(Info.getTokenID());
1302     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1303       FormatTok->Tok.setKind(tok::greater);
1304       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1305       GreaterStashed = true;
1306     }
1307 
1308     // Now FormatTok is the next non-whitespace token.
1309 
1310     StringRef Text = FormatTok->TokenText;
1311     size_t FirstNewlinePos = Text.find('\n');
1312     if (FirstNewlinePos == StringRef::npos) {
1313       // FIXME: ColumnWidth actually depends on the start column, we need to
1314       // take this into account when the token is moved.
1315       FormatTok->ColumnWidth =
1316           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1317       Column += FormatTok->ColumnWidth;
1318     } else {
1319       FormatTok->IsMultiline = true;
1320       // FIXME: ColumnWidth actually depends on the start column, we need to
1321       // take this into account when the token is moved.
1322       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1323           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1324 
1325       // The last line of the token always starts in column 0.
1326       // Thus, the length can be precomputed even in the presence of tabs.
1327       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1328           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1329           Encoding);
1330       Column = FormatTok->LastLineColumnWidth;
1331     }
1332 
1333     return FormatTok;
1334   }
1335 
1336   FormatToken *FormatTok;
1337   bool IsFirstToken;
1338   bool GreaterStashed;
1339   unsigned Column;
1340   unsigned TrailingWhitespace;
1341   Lexer &Lex;
1342   SourceManager &SourceMgr;
1343   FormatStyle &Style;
1344   IdentifierTable IdentTable;
1345   encoding::Encoding Encoding;
1346   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1347   SmallVector<FormatToken *, 16> Tokens;
1348 
1349   void readRawToken(FormatToken &Tok) {
1350     Lex.LexFromRawLexer(Tok.Tok);
1351     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1352                               Tok.Tok.getLength());
1353     // For formatting, treat unterminated string literals like normal string
1354     // literals.
1355     if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
1356         Tok.TokenText[0] == '"') {
1357       Tok.Tok.setKind(tok::string_literal);
1358       Tok.IsUnterminatedLiteral = true;
1359     }
1360   }
1361 };
1362 
1363 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1364   switch (Language) {
1365   case FormatStyle::LK_Cpp:
1366     return "C++";
1367   case FormatStyle::LK_JavaScript:
1368     return "JavaScript";
1369   case FormatStyle::LK_Proto:
1370     return "Proto";
1371   default:
1372     return "Unknown";
1373   }
1374 }
1375 
1376 class Formatter : public UnwrappedLineConsumer {
1377 public:
1378   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1379             const std::vector<CharSourceRange> &Ranges)
1380       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1381         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1382         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1383         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1384     DEBUG(llvm::dbgs() << "File encoding: "
1385                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1386                                                                : "unknown")
1387                        << "\n");
1388     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1389                        << "\n");
1390   }
1391 
1392   tooling::Replacements format() {
1393     tooling::Replacements Result;
1394     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1395 
1396     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1397     bool StructuralError = Parser.parse();
1398     assert(UnwrappedLines.rbegin()->empty());
1399     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1400          ++Run) {
1401       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1402       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1403       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1404         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1405       }
1406       tooling::Replacements RunResult =
1407           format(AnnotatedLines, StructuralError, Tokens);
1408       DEBUG({
1409         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1410         for (tooling::Replacements::iterator I = RunResult.begin(),
1411                                              E = RunResult.end();
1412              I != E; ++I) {
1413           llvm::dbgs() << I->toString() << "\n";
1414         }
1415       });
1416       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1417         delete AnnotatedLines[i];
1418       }
1419       Result.insert(RunResult.begin(), RunResult.end());
1420       Whitespaces.reset();
1421     }
1422     return Result;
1423   }
1424 
1425   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1426                                bool StructuralError, FormatTokenLexer &Tokens) {
1427     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1428     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1429       Annotator.annotate(*AnnotatedLines[i]);
1430     }
1431     deriveLocalStyle(AnnotatedLines);
1432     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1433       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1434     }
1435     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1436 
1437     Annotator.setCommentLineLevels(AnnotatedLines);
1438     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1439                                   BinPackInconclusiveFunctions);
1440     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1441     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1442     return Whitespaces.generateReplacements();
1443   }
1444 
1445 private:
1446   // Determines which lines are affected by the SourceRanges given as input.
1447   // Returns \c true if at least one line between I and E or one of their
1448   // children is affected.
1449   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1450                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1451     bool SomeLineAffected = false;
1452     const AnnotatedLine *PreviousLine = NULL;
1453     while (I != E) {
1454       AnnotatedLine *Line = *I;
1455       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1456 
1457       // If a line is part of a preprocessor directive, it needs to be formatted
1458       // if any token within the directive is affected.
1459       if (Line->InPPDirective) {
1460         FormatToken *Last = Line->Last;
1461         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1462         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1463           Last = (*PPEnd)->Last;
1464           ++PPEnd;
1465         }
1466 
1467         if (affectsTokenRange(*Line->First, *Last,
1468                               /*IncludeLeadingNewlines=*/false)) {
1469           SomeLineAffected = true;
1470           markAllAsAffected(I, PPEnd);
1471         }
1472         I = PPEnd;
1473         continue;
1474       }
1475 
1476       if (nonPPLineAffected(Line, PreviousLine))
1477         SomeLineAffected = true;
1478 
1479       PreviousLine = Line;
1480       ++I;
1481     }
1482     return SomeLineAffected;
1483   }
1484 
1485   // Determines whether 'Line' is affected by the SourceRanges given as input.
1486   // Returns \c true if line or one if its children is affected.
1487   bool nonPPLineAffected(AnnotatedLine *Line,
1488                          const AnnotatedLine *PreviousLine) {
1489     bool SomeLineAffected = false;
1490     Line->ChildrenAffected =
1491         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1492     if (Line->ChildrenAffected)
1493       SomeLineAffected = true;
1494 
1495     // Stores whether one of the line's tokens is directly affected.
1496     bool SomeTokenAffected = false;
1497     // Stores whether we need to look at the leading newlines of the next token
1498     // in order to determine whether it was affected.
1499     bool IncludeLeadingNewlines = false;
1500 
1501     // Stores whether the first child line of any of this line's tokens is
1502     // affected.
1503     bool SomeFirstChildAffected = false;
1504 
1505     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1506       // Determine whether 'Tok' was affected.
1507       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1508         SomeTokenAffected = true;
1509 
1510       // Determine whether the first child of 'Tok' was affected.
1511       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1512         SomeFirstChildAffected = true;
1513 
1514       IncludeLeadingNewlines = Tok->Children.empty();
1515     }
1516 
1517     // Was this line moved, i.e. has it previously been on the same line as an
1518     // affected line?
1519     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1520                      Line->First->NewlinesBefore == 0;
1521 
1522     bool IsContinuedComment = Line->First->is(tok::comment) &&
1523                               Line->First->Next == NULL &&
1524                               Line->First->NewlinesBefore < 2 && PreviousLine &&
1525                               PreviousLine->Affected &&
1526                               PreviousLine->Last->is(tok::comment);
1527 
1528     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1529         IsContinuedComment) {
1530       Line->Affected = true;
1531       SomeLineAffected = true;
1532     }
1533     return SomeLineAffected;
1534   }
1535 
1536   // Marks all lines between I and E as well as all their children as affected.
1537   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1538                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1539     while (I != E) {
1540       (*I)->Affected = true;
1541       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1542       ++I;
1543     }
1544   }
1545 
1546   // Returns true if the range from 'First' to 'Last' intersects with one of the
1547   // input ranges.
1548   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1549                          bool IncludeLeadingNewlines) {
1550     SourceLocation Start = First.WhitespaceRange.getBegin();
1551     if (!IncludeLeadingNewlines)
1552       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1553     SourceLocation End = Last.getStartOfNonWhitespace();
1554     if (Last.TokenText.size() > 0)
1555       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1556     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1557     return affectsCharSourceRange(Range);
1558   }
1559 
1560   // Returns true if one of the input ranges intersect the leading empty lines
1561   // before 'Tok'.
1562   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1563     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1564         Tok.WhitespaceRange.getBegin(),
1565         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1566     return affectsCharSourceRange(EmptyLineRange);
1567   }
1568 
1569   // Returns true if 'Range' intersects with one of the input ranges.
1570   bool affectsCharSourceRange(const CharSourceRange &Range) {
1571     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1572                                                           E = Ranges.end();
1573          I != E; ++I) {
1574       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1575           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1576         return true;
1577     }
1578     return false;
1579   }
1580 
1581   static bool inputUsesCRLF(StringRef Text) {
1582     return Text.count('\r') * 2 > Text.count('\n');
1583   }
1584 
1585   void
1586   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1587     unsigned CountBoundToVariable = 0;
1588     unsigned CountBoundToType = 0;
1589     bool HasCpp03IncompatibleFormat = false;
1590     bool HasBinPackedFunction = false;
1591     bool HasOnePerLineFunction = false;
1592     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1593       if (!AnnotatedLines[i]->First->Next)
1594         continue;
1595       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1596       while (Tok->Next) {
1597         if (Tok->Type == TT_PointerOrReference) {
1598           bool SpacesBefore =
1599               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1600           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1601                              Tok->Next->WhitespaceRange.getEnd();
1602           if (SpacesBefore && !SpacesAfter)
1603             ++CountBoundToVariable;
1604           else if (!SpacesBefore && SpacesAfter)
1605             ++CountBoundToType;
1606         }
1607 
1608         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1609           if (Tok->is(tok::coloncolon) &&
1610               Tok->Previous->Type == TT_TemplateOpener)
1611             HasCpp03IncompatibleFormat = true;
1612           if (Tok->Type == TT_TemplateCloser &&
1613               Tok->Previous->Type == TT_TemplateCloser)
1614             HasCpp03IncompatibleFormat = true;
1615         }
1616 
1617         if (Tok->PackingKind == PPK_BinPacked)
1618           HasBinPackedFunction = true;
1619         if (Tok->PackingKind == PPK_OnePerLine)
1620           HasOnePerLineFunction = true;
1621 
1622         Tok = Tok->Next;
1623       }
1624     }
1625     if (Style.DerivePointerBinding) {
1626       if (CountBoundToType > CountBoundToVariable)
1627         Style.PointerBindsToType = true;
1628       else if (CountBoundToType < CountBoundToVariable)
1629         Style.PointerBindsToType = false;
1630     }
1631     if (Style.Standard == FormatStyle::LS_Auto) {
1632       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1633                                                   : FormatStyle::LS_Cpp03;
1634     }
1635     BinPackInconclusiveFunctions =
1636         HasBinPackedFunction || !HasOnePerLineFunction;
1637   }
1638 
1639   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1640     assert(!UnwrappedLines.empty());
1641     UnwrappedLines.back().push_back(TheLine);
1642   }
1643 
1644   virtual void finishRun() {
1645     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1646   }
1647 
1648   FormatStyle Style;
1649   Lexer &Lex;
1650   SourceManager &SourceMgr;
1651   WhitespaceManager Whitespaces;
1652   SmallVector<CharSourceRange, 8> Ranges;
1653   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1654 
1655   encoding::Encoding Encoding;
1656   bool BinPackInconclusiveFunctions;
1657 };
1658 
1659 } // end anonymous namespace
1660 
1661 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1662                                SourceManager &SourceMgr,
1663                                std::vector<CharSourceRange> Ranges) {
1664   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1665   return formatter.format();
1666 }
1667 
1668 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1669                                std::vector<tooling::Range> Ranges,
1670                                StringRef FileName) {
1671   FileManager Files((FileSystemOptions()));
1672   DiagnosticsEngine Diagnostics(
1673       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1674       new DiagnosticOptions);
1675   SourceManager SourceMgr(Diagnostics, Files);
1676   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1677   const clang::FileEntry *Entry =
1678       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1679   SourceMgr.overrideFileContents(Entry, Buf);
1680   FileID ID =
1681       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1682   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1683             getFormattingLangOpts(Style.Standard));
1684   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1685   std::vector<CharSourceRange> CharRanges;
1686   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1687     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1688     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1689     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1690   }
1691   return reformat(Style, Lex, SourceMgr, CharRanges);
1692 }
1693 
1694 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1695   LangOptions LangOpts;
1696   LangOpts.CPlusPlus = 1;
1697   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1698   LangOpts.LineComment = 1;
1699   LangOpts.Bool = 1;
1700   LangOpts.ObjC1 = 1;
1701   LangOpts.ObjC2 = 1;
1702   return LangOpts;
1703 }
1704 
1705 const char *StyleOptionHelpDescription =
1706     "Coding style, currently supports:\n"
1707     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1708     "Use -style=file to load style configuration from\n"
1709     ".clang-format file located in one of the parent\n"
1710     "directories of the source file (or current\n"
1711     "directory for stdin).\n"
1712     "Use -style=\"{key: value, ...}\" to set specific\n"
1713     "parameters, e.g.:\n"
1714     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1715 
1716 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1717   if (FileName.endswith_lower(".js")) {
1718     return FormatStyle::LK_JavaScript;
1719   } else if (FileName.endswith_lower(".proto") ||
1720              FileName.endswith_lower(".protodevel")) {
1721     return FormatStyle::LK_Proto;
1722   }
1723   return FormatStyle::LK_Cpp;
1724 }
1725 
1726 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1727                      StringRef FallbackStyle) {
1728   FormatStyle Style = getLLVMStyle();
1729   Style.Language = getLanguageByFileName(FileName);
1730   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1731     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1732                  << "\" using LLVM style\n";
1733     return Style;
1734   }
1735 
1736   if (StyleName.startswith("{")) {
1737     // Parse YAML/JSON style from the command line.
1738     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1739       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1740                    << FallbackStyle << " style\n";
1741     }
1742     return Style;
1743   }
1744 
1745   if (!StyleName.equals_lower("file")) {
1746     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1747       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1748                    << " style\n";
1749     return Style;
1750   }
1751 
1752   // Look for .clang-format/_clang-format file in the file's parent directories.
1753   SmallString<128> UnsuitableConfigFiles;
1754   SmallString<128> Path(FileName);
1755   llvm::sys::fs::make_absolute(Path);
1756   for (StringRef Directory = Path; !Directory.empty();
1757        Directory = llvm::sys::path::parent_path(Directory)) {
1758     if (!llvm::sys::fs::is_directory(Directory))
1759       continue;
1760     SmallString<128> ConfigFile(Directory);
1761 
1762     llvm::sys::path::append(ConfigFile, ".clang-format");
1763     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1764     bool IsFile = false;
1765     // Ignore errors from is_regular_file: we only need to know if we can read
1766     // the file or not.
1767     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1768 
1769     if (!IsFile) {
1770       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1771       ConfigFile = Directory;
1772       llvm::sys::path::append(ConfigFile, "_clang-format");
1773       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1774       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1775     }
1776 
1777     if (IsFile) {
1778       OwningPtr<llvm::MemoryBuffer> Text;
1779       if (llvm::error_code ec =
1780               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1781         llvm::errs() << ec.message() << "\n";
1782         break;
1783       }
1784       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1785         if (ec == llvm::errc::not_supported) {
1786           if (!UnsuitableConfigFiles.empty())
1787             UnsuitableConfigFiles.append(", ");
1788           UnsuitableConfigFiles.append(ConfigFile);
1789           continue;
1790         }
1791         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1792                      << "\n";
1793         break;
1794       }
1795       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1796       return Style;
1797     }
1798   }
1799   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1800                << " style\n";
1801   if (!UnsuitableConfigFiles.empty()) {
1802     llvm::errs() << "Configuration file(s) do(es) not support "
1803                  << getLanguageName(Style.Language) << ": "
1804                  << UnsuitableConfigFiles << "\n";
1805   }
1806   return Style;
1807 }
1808 
1809 } // namespace format
1810 } // namespace clang
1811