1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33 
34 using clang::format::FormatStyle;
35 
36 namespace llvm {
37 namespace yaml {
38 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
39   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
40     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
41     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
42     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
43   }
44 };
45 
46 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
47   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
48     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
49     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
50     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
51     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
52     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
53   }
54 };
55 
56 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
57   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
58     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
59     IO.enumCase(Value, "false", FormatStyle::UT_Never);
60     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
61     IO.enumCase(Value, "true", FormatStyle::UT_Always);
62     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
63   }
64 };
65 
66 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
67   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
68     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
69     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
70     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
71     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
72     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
73   }
74 };
75 
76 template <>
77 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
78   static void enumeration(IO &IO,
79                           FormatStyle::NamespaceIndentationKind &Value) {
80     IO.enumCase(Value, "None", FormatStyle::NI_None);
81     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
82     IO.enumCase(Value, "All", FormatStyle::NI_All);
83   }
84 };
85 
86 template <>
87 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
88   static void enumeration(IO &IO,
89                           FormatStyle::SpaceBeforeParensOptions &Value) {
90     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
91     IO.enumCase(Value, "ControlStatements",
92                 FormatStyle::SBPO_ControlStatements);
93     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
94 
95     // For backward compatibility.
96     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
97     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
98   }
99 };
100 
101 template <> struct MappingTraits<FormatStyle> {
102   static void mapping(IO &IO, FormatStyle &Style) {
103     // When reading, read the language first, we need it for getPredefinedStyle.
104     IO.mapOptional("Language", Style.Language);
105 
106     if (IO.outputting()) {
107       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
108                                   "Mozilla", "WebKit", "GNU" };
109       ArrayRef<StringRef> Styles(StylesArray);
110       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
111         StringRef StyleName(Styles[i]);
112         FormatStyle PredefinedStyle;
113         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
114             Style == PredefinedStyle) {
115           IO.mapOptional("# BasedOnStyle", StyleName);
116           break;
117         }
118       }
119     } else {
120       StringRef BasedOnStyle;
121       IO.mapOptional("BasedOnStyle", BasedOnStyle);
122       if (!BasedOnStyle.empty()) {
123         FormatStyle::LanguageKind OldLanguage = Style.Language;
124         FormatStyle::LanguageKind Language =
125             ((FormatStyle *)IO.getContext())->Language;
126         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
127           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
128           return;
129         }
130         Style.Language = OldLanguage;
131       }
132     }
133 
134     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
135     IO.mapOptional("ConstructorInitializerIndentWidth",
136                    Style.ConstructorInitializerIndentWidth);
137     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
138     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
139     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
140                    Style.AllowAllParametersOfDeclarationOnNextLine);
141     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
142                    Style.AllowShortIfStatementsOnASingleLine);
143     IO.mapOptional("AllowShortLoopsOnASingleLine",
144                    Style.AllowShortLoopsOnASingleLine);
145     IO.mapOptional("AllowShortFunctionsOnASingleLine",
146                    Style.AllowShortFunctionsOnASingleLine);
147     IO.mapOptional("AlwaysBreakTemplateDeclarations",
148                    Style.AlwaysBreakTemplateDeclarations);
149     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
150                    Style.AlwaysBreakBeforeMultilineStrings);
151     IO.mapOptional("BreakBeforeBinaryOperators",
152                    Style.BreakBeforeBinaryOperators);
153     IO.mapOptional("BreakBeforeTernaryOperators",
154                    Style.BreakBeforeTernaryOperators);
155     IO.mapOptional("BreakConstructorInitializersBeforeComma",
156                    Style.BreakConstructorInitializersBeforeComma);
157     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
158     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
159     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
160                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
161     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
162     IO.mapOptional("ExperimentalAutoDetectBinPacking",
163                    Style.ExperimentalAutoDetectBinPacking);
164     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
165     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
166     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
167     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
168     IO.mapOptional("ObjCSpaceBeforeProtocolList",
169                    Style.ObjCSpaceBeforeProtocolList);
170     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
171                    Style.PenaltyBreakBeforeFirstCallParameter);
172     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
173     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
174     IO.mapOptional("PenaltyBreakFirstLessLess",
175                    Style.PenaltyBreakFirstLessLess);
176     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
177     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
178                    Style.PenaltyReturnTypeOnItsOwnLine);
179     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
180     IO.mapOptional("SpacesBeforeTrailingComments",
181                    Style.SpacesBeforeTrailingComments);
182     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
183     IO.mapOptional("Standard", Style.Standard);
184     IO.mapOptional("IndentWidth", Style.IndentWidth);
185     IO.mapOptional("TabWidth", Style.TabWidth);
186     IO.mapOptional("UseTab", Style.UseTab);
187     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
188     IO.mapOptional("IndentFunctionDeclarationAfterType",
189                    Style.IndentFunctionDeclarationAfterType);
190     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
191     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
192     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
193     IO.mapOptional("SpacesInCStyleCastParentheses",
194                    Style.SpacesInCStyleCastParentheses);
195     IO.mapOptional("SpacesInContainerLiterals",
196                    Style.SpacesInContainerLiterals);
197     IO.mapOptional("SpaceBeforeAssignmentOperators",
198                    Style.SpaceBeforeAssignmentOperators);
199     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
200     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
201 
202     // For backward compatibility.
203     if (!IO.outputting()) {
204       IO.mapOptional("SpaceAfterControlStatementKeyword",
205                      Style.SpaceBeforeParens);
206     }
207     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
208   }
209 };
210 
211 // Allows to read vector<FormatStyle> while keeping default values.
212 // IO.getContext() should contain a pointer to the FormatStyle structure, that
213 // will be used to get default values for missing keys.
214 // If the first element has no Language specified, it will be treated as the
215 // default one for the following elements.
216 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
217   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
218     return Seq.size();
219   }
220   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
221                               size_t Index) {
222     if (Index >= Seq.size()) {
223       assert(Index == Seq.size());
224       FormatStyle Template;
225       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
226         Template = Seq[0];
227       } else {
228         Template = *((const FormatStyle*)IO.getContext());
229         Template.Language = FormatStyle::LK_None;
230       }
231       Seq.resize(Index + 1, Template);
232     }
233     return Seq[Index];
234   }
235 };
236 }
237 }
238 
239 namespace clang {
240 namespace format {
241 
242 FormatStyle getLLVMStyle() {
243   FormatStyle LLVMStyle;
244   LLVMStyle.Language = FormatStyle::LK_Cpp;
245   LLVMStyle.AccessModifierOffset = -2;
246   LLVMStyle.AlignEscapedNewlinesLeft = false;
247   LLVMStyle.AlignTrailingComments = true;
248   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
249   LLVMStyle.AllowShortFunctionsOnASingleLine = true;
250   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
251   LLVMStyle.AllowShortLoopsOnASingleLine = false;
252   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
253   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
254   LLVMStyle.BinPackParameters = true;
255   LLVMStyle.BreakBeforeBinaryOperators = false;
256   LLVMStyle.BreakBeforeTernaryOperators = true;
257   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
258   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
259   LLVMStyle.ColumnLimit = 80;
260   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
261   LLVMStyle.ConstructorInitializerIndentWidth = 4;
262   LLVMStyle.Cpp11BracedListStyle = false;
263   LLVMStyle.DerivePointerBinding = false;
264   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
265   LLVMStyle.IndentCaseLabels = false;
266   LLVMStyle.IndentFunctionDeclarationAfterType = false;
267   LLVMStyle.IndentWidth = 2;
268   LLVMStyle.TabWidth = 8;
269   LLVMStyle.MaxEmptyLinesToKeep = 1;
270   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
271   LLVMStyle.ObjCSpaceAfterProperty = false;
272   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
273   LLVMStyle.PointerBindsToType = false;
274   LLVMStyle.SpacesBeforeTrailingComments = 1;
275   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
276   LLVMStyle.UseTab = FormatStyle::UT_Never;
277   LLVMStyle.SpacesInParentheses = false;
278   LLVMStyle.SpaceInEmptyParentheses = false;
279   LLVMStyle.SpacesInContainerLiterals = true;
280   LLVMStyle.SpacesInCStyleCastParentheses = false;
281   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
282   LLVMStyle.SpaceBeforeAssignmentOperators = true;
283   LLVMStyle.ContinuationIndentWidth = 4;
284   LLVMStyle.SpacesInAngles = false;
285   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
286 
287   LLVMStyle.PenaltyBreakComment = 300;
288   LLVMStyle.PenaltyBreakFirstLessLess = 120;
289   LLVMStyle.PenaltyBreakString = 1000;
290   LLVMStyle.PenaltyExcessCharacter = 1000000;
291   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
292   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
293 
294   return LLVMStyle;
295 }
296 
297 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
298   FormatStyle GoogleStyle = getLLVMStyle();
299   GoogleStyle.Language = Language;
300 
301   GoogleStyle.AccessModifierOffset = -1;
302   GoogleStyle.AlignEscapedNewlinesLeft = true;
303   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
304   GoogleStyle.AllowShortLoopsOnASingleLine = true;
305   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
306   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
307   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
308   GoogleStyle.Cpp11BracedListStyle = true;
309   GoogleStyle.DerivePointerBinding = true;
310   GoogleStyle.IndentCaseLabels = true;
311   GoogleStyle.IndentFunctionDeclarationAfterType = true;
312   GoogleStyle.ObjCSpaceAfterProperty = false;
313   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
314   GoogleStyle.PointerBindsToType = true;
315   GoogleStyle.SpacesBeforeTrailingComments = 2;
316   GoogleStyle.Standard = FormatStyle::LS_Auto;
317 
318   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
319   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
320 
321   if (Language == FormatStyle::LK_JavaScript) {
322     GoogleStyle.BreakBeforeTernaryOperators = false;
323     GoogleStyle.MaxEmptyLinesToKeep = 2;
324     GoogleStyle.SpacesInContainerLiterals = false;
325   } else if (Language == FormatStyle::LK_Proto) {
326     GoogleStyle.AllowShortFunctionsOnASingleLine = false;
327   }
328 
329   return GoogleStyle;
330 }
331 
332 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
333   FormatStyle ChromiumStyle = getGoogleStyle(Language);
334   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
335   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
336   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
337   ChromiumStyle.BinPackParameters = false;
338   ChromiumStyle.DerivePointerBinding = false;
339   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
340   return ChromiumStyle;
341 }
342 
343 FormatStyle getMozillaStyle() {
344   FormatStyle MozillaStyle = getLLVMStyle();
345   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
346   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
347   MozillaStyle.DerivePointerBinding = true;
348   MozillaStyle.IndentCaseLabels = true;
349   MozillaStyle.ObjCSpaceAfterProperty = true;
350   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
351   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
352   MozillaStyle.PointerBindsToType = true;
353   return MozillaStyle;
354 }
355 
356 FormatStyle getWebKitStyle() {
357   FormatStyle Style = getLLVMStyle();
358   Style.AccessModifierOffset = -4;
359   Style.AlignTrailingComments = false;
360   Style.BreakBeforeBinaryOperators = true;
361   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
362   Style.BreakConstructorInitializersBeforeComma = true;
363   Style.ColumnLimit = 0;
364   Style.IndentWidth = 4;
365   Style.NamespaceIndentation = FormatStyle::NI_Inner;
366   Style.ObjCSpaceAfterProperty = true;
367   Style.PointerBindsToType = true;
368   return Style;
369 }
370 
371 FormatStyle getGNUStyle() {
372   FormatStyle Style = getLLVMStyle();
373   Style.BreakBeforeBinaryOperators = true;
374   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
375   Style.BreakBeforeTernaryOperators = true;
376   Style.ColumnLimit = 79;
377   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
378   return Style;
379 }
380 
381 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
382                         FormatStyle *Style) {
383   if (Name.equals_lower("llvm")) {
384     *Style = getLLVMStyle();
385   } else if (Name.equals_lower("chromium")) {
386     *Style = getChromiumStyle(Language);
387   } else if (Name.equals_lower("mozilla")) {
388     *Style = getMozillaStyle();
389   } else if (Name.equals_lower("google")) {
390     *Style = getGoogleStyle(Language);
391   } else if (Name.equals_lower("webkit")) {
392     *Style = getWebKitStyle();
393   } else if (Name.equals_lower("gnu")) {
394     *Style = getGNUStyle();
395   } else {
396     return false;
397   }
398 
399   Style->Language = Language;
400   return true;
401 }
402 
403 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
404   assert(Style);
405   FormatStyle::LanguageKind Language = Style->Language;
406   assert(Language != FormatStyle::LK_None);
407   if (Text.trim().empty())
408     return llvm::make_error_code(llvm::errc::invalid_argument);
409 
410   std::vector<FormatStyle> Styles;
411   llvm::yaml::Input Input(Text);
412   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
413   // values for the fields, keys for which are missing from the configuration.
414   // Mapping also uses the context to get the language to find the correct
415   // base style.
416   Input.setContext(Style);
417   Input >> Styles;
418   if (Input.error())
419     return Input.error();
420 
421   for (unsigned i = 0; i < Styles.size(); ++i) {
422     // Ensures that only the first configuration can skip the Language option.
423     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
424       return llvm::make_error_code(llvm::errc::invalid_argument);
425     // Ensure that each language is configured at most once.
426     for (unsigned j = 0; j < i; ++j) {
427       if (Styles[i].Language == Styles[j].Language) {
428         DEBUG(llvm::dbgs()
429               << "Duplicate languages in the config file on positions " << j
430               << " and " << i << "\n");
431         return llvm::make_error_code(llvm::errc::invalid_argument);
432       }
433     }
434   }
435   // Look for a suitable configuration starting from the end, so we can
436   // find the configuration for the specific language first, and the default
437   // configuration (which can only be at slot 0) after it.
438   for (int i = Styles.size() - 1; i >= 0; --i) {
439     if (Styles[i].Language == Language ||
440         Styles[i].Language == FormatStyle::LK_None) {
441       *Style = Styles[i];
442       Style->Language = Language;
443       return llvm::make_error_code(llvm::errc::success);
444     }
445   }
446   return llvm::make_error_code(llvm::errc::not_supported);
447 }
448 
449 std::string configurationAsText(const FormatStyle &Style) {
450   std::string Text;
451   llvm::raw_string_ostream Stream(Text);
452   llvm::yaml::Output Output(Stream);
453   // We use the same mapping method for input and output, so we need a non-const
454   // reference here.
455   FormatStyle NonConstStyle = Style;
456   Output << NonConstStyle;
457   return Stream.str();
458 }
459 
460 namespace {
461 
462 class NoColumnLimitFormatter {
463 public:
464   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
465 
466   /// \brief Formats the line starting at \p State, simply keeping all of the
467   /// input's line breaking decisions.
468   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
469     LineState State =
470         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
471     while (State.NextToken != NULL) {
472       bool Newline =
473           Indenter->mustBreak(State) ||
474           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
475       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
476     }
477   }
478 
479 private:
480   ContinuationIndenter *Indenter;
481 };
482 
483 class LineJoiner {
484 public:
485   LineJoiner(const FormatStyle &Style) : Style(Style) {}
486 
487   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
488   unsigned
489   tryFitMultipleLinesInOne(unsigned Indent,
490                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
491                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
492     // We can never merge stuff if there are trailing line comments.
493     const AnnotatedLine *TheLine = *I;
494     if (TheLine->Last->Type == TT_LineComment)
495       return 0;
496 
497     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
498       return 0;
499 
500     unsigned Limit =
501         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
502     // If we already exceed the column limit, we set 'Limit' to 0. The different
503     // tryMerge..() functions can then decide whether to still do merging.
504     Limit = TheLine->Last->TotalLength > Limit
505                 ? 0
506                 : Limit - TheLine->Last->TotalLength;
507 
508     if (I + 1 == E || I[1]->Type == LT_Invalid)
509       return 0;
510 
511     if (TheLine->Last->Type == TT_FunctionLBrace &&
512         TheLine->First != TheLine->Last) {
513       return Style.AllowShortFunctionsOnASingleLine
514                  ? tryMergeSimpleBlock(I, E, Limit)
515                  : 0;
516     }
517     if (TheLine->Last->is(tok::l_brace)) {
518       return Style.BreakBeforeBraces == FormatStyle::BS_Attach
519                  ? tryMergeSimpleBlock(I, E, Limit)
520                  : 0;
521     }
522     if (I[1]->First->Type == TT_FunctionLBrace &&
523         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
524       // Check for Limit <= 2 to account for the " {".
525       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
526         return 0;
527       Limit -= 2;
528 
529       unsigned MergedLines = 0;
530       if (Style.AllowShortFunctionsOnASingleLine) {
531         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
532         // If we managed to merge the block, count the function header, which is
533         // on a separate line.
534         if (MergedLines > 0)
535           ++MergedLines;
536       }
537       return MergedLines;
538     }
539     if (TheLine->First->is(tok::kw_if)) {
540       return Style.AllowShortIfStatementsOnASingleLine
541                  ? tryMergeSimpleControlStatement(I, E, Limit)
542                  : 0;
543     }
544     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
545       return Style.AllowShortLoopsOnASingleLine
546                  ? tryMergeSimpleControlStatement(I, E, Limit)
547                  : 0;
548     }
549     if (TheLine->InPPDirective &&
550         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
551       return tryMergeSimplePPDirective(I, E, Limit);
552     }
553     return 0;
554   }
555 
556 private:
557   unsigned
558   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
559                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
560                             unsigned Limit) {
561     if (Limit == 0)
562       return 0;
563     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
564       return 0;
565     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
566       return 0;
567     if (1 + I[1]->Last->TotalLength > Limit)
568       return 0;
569     return 1;
570   }
571 
572   unsigned tryMergeSimpleControlStatement(
573       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
574       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
575     if (Limit == 0)
576       return 0;
577     if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
578          Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
579         I[1]->First->is(tok::l_brace))
580       return 0;
581     if (I[1]->InPPDirective != (*I)->InPPDirective ||
582         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
583       return 0;
584     AnnotatedLine &Line = **I;
585     if (Line.Last->isNot(tok::r_paren))
586       return 0;
587     if (1 + I[1]->Last->TotalLength > Limit)
588       return 0;
589     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
590                              tok::kw_while) ||
591         I[1]->First->Type == TT_LineComment)
592       return 0;
593     // Only inline simple if's (no nested if or else).
594     if (I + 2 != E && Line.First->is(tok::kw_if) &&
595         I[2]->First->is(tok::kw_else))
596       return 0;
597     return 1;
598   }
599 
600   unsigned
601   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
602                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
603                       unsigned Limit) {
604     // First, check that the current line allows merging. This is the case if
605     // we're not in a control flow statement and the last token is an opening
606     // brace.
607     AnnotatedLine &Line = **I;
608     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
609                             tok::kw_else, tok::kw_try, tok::kw_catch,
610                             tok::kw_for,
611                             // This gets rid of all ObjC @ keywords and methods.
612                             tok::at, tok::minus, tok::plus))
613       return 0;
614 
615     FormatToken *Tok = I[1]->First;
616     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
617         (Tok->getNextNonComment() == NULL ||
618          Tok->getNextNonComment()->is(tok::semi))) {
619       // We merge empty blocks even if the line exceeds the column limit.
620       Tok->SpacesRequiredBefore = 0;
621       Tok->CanBreakBefore = true;
622       return 1;
623     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
624       // Check that we still have three lines and they fit into the limit.
625       if (I + 2 == E || I[2]->Type == LT_Invalid)
626         return 0;
627 
628       if (!nextTwoLinesFitInto(I, Limit))
629         return 0;
630 
631       // Second, check that the next line does not contain any braces - if it
632       // does, readability declines when putting it into a single line.
633       if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
634         return 0;
635       do {
636         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
637           return 0;
638         Tok = Tok->Next;
639       } while (Tok != NULL);
640 
641       // Last, check that the third line contains a single closing brace.
642       Tok = I[2]->First;
643       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
644           Tok->MustBreakBefore)
645         return 0;
646 
647       return 2;
648     }
649     return 0;
650   }
651 
652   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
653                            unsigned Limit) {
654     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
655   }
656 
657   bool containsMustBreak(const AnnotatedLine *Line) {
658     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
659       if (Tok->MustBreakBefore)
660         return true;
661     }
662     return false;
663   }
664 
665   const FormatStyle &Style;
666 };
667 
668 class UnwrappedLineFormatter {
669 public:
670   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
671                          WhitespaceManager *Whitespaces,
672                          const FormatStyle &Style)
673       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
674         Joiner(Style) {}
675 
676   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
677                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
678     assert(!Lines.empty());
679     unsigned Penalty = 0;
680     std::vector<int> IndentForLevel;
681     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
682       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
683     const AnnotatedLine *PreviousLine = NULL;
684     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
685                                                           E = Lines.end();
686          I != E; ++I) {
687       const AnnotatedLine &TheLine = **I;
688       const FormatToken *FirstTok = TheLine.First;
689       int Offset = getIndentOffset(*FirstTok);
690 
691       // Determine indent and try to merge multiple unwrapped lines.
692       unsigned Indent;
693       if (TheLine.InPPDirective) {
694         Indent = TheLine.Level * Style.IndentWidth;
695       } else {
696         while (IndentForLevel.size() <= TheLine.Level)
697           IndentForLevel.push_back(-1);
698         IndentForLevel.resize(TheLine.Level + 1);
699         Indent = getIndent(IndentForLevel, TheLine.Level);
700       }
701       unsigned LevelIndent = Indent;
702       if (static_cast<int>(Indent) + Offset >= 0)
703         Indent += Offset;
704 
705       // Merge multiple lines if possible.
706       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
707       if (MergedLines > 0 && Style.ColumnLimit == 0) {
708         // Disallow line merging if there is a break at the start of one of the
709         // input lines.
710         for (unsigned i = 0; i < MergedLines; ++i) {
711           if (I[i + 1]->First->NewlinesBefore > 0)
712             MergedLines = 0;
713         }
714       }
715       if (!DryRun) {
716         for (unsigned i = 0; i < MergedLines; ++i) {
717           join(*I[i], *I[i + 1]);
718         }
719       }
720       I += MergedLines;
721 
722       bool FixIndentation =
723           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
724       if (TheLine.First->is(tok::eof)) {
725         if (PreviousLine && PreviousLine->Affected && !DryRun) {
726           // Remove the file's trailing whitespace.
727           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
728           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
729                                          /*IndentLevel=*/0, /*Spaces=*/0,
730                                          /*TargetColumn=*/0);
731         }
732       } else if (TheLine.Type != LT_Invalid &&
733                  (TheLine.Affected || FixIndentation)) {
734         if (FirstTok->WhitespaceRange.isValid()) {
735           if (!DryRun)
736             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
737                              Indent, TheLine.InPPDirective);
738         } else {
739           Indent = LevelIndent = FirstTok->OriginalColumn;
740         }
741 
742         // If everything fits on a single line, just put it there.
743         unsigned ColumnLimit = Style.ColumnLimit;
744         if (I + 1 != E) {
745           AnnotatedLine *NextLine = I[1];
746           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
747             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
748         }
749 
750         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
751           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
752           while (State.NextToken != NULL)
753             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
754         } else if (Style.ColumnLimit == 0) {
755           // FIXME: Implement nested blocks for ColumnLimit = 0.
756           NoColumnLimitFormatter Formatter(Indenter);
757           if (!DryRun)
758             Formatter.format(Indent, &TheLine);
759         } else {
760           Penalty += format(TheLine, Indent, DryRun);
761         }
762 
763         if (!TheLine.InPPDirective)
764           IndentForLevel[TheLine.Level] = LevelIndent;
765       } else if (TheLine.ChildrenAffected) {
766         format(TheLine.Children, DryRun);
767       } else {
768         // Format the first token if necessary, and notify the WhitespaceManager
769         // about the unchanged whitespace.
770         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
771           if (Tok == TheLine.First &&
772               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
773             unsigned LevelIndent = Tok->OriginalColumn;
774             if (!DryRun) {
775               // Remove trailing whitespace of the previous line.
776               if ((PreviousLine && PreviousLine->Affected) ||
777                   TheLine.LeadingEmptyLinesAffected) {
778                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
779                                  TheLine.InPPDirective);
780               } else {
781                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
782               }
783             }
784 
785             if (static_cast<int>(LevelIndent) - Offset >= 0)
786               LevelIndent -= Offset;
787             if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
788               IndentForLevel[TheLine.Level] = LevelIndent;
789           } else if (!DryRun) {
790             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
791           }
792         }
793       }
794       if (!DryRun) {
795         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
796           Tok->Finalized = true;
797         }
798       }
799       PreviousLine = *I;
800     }
801     return Penalty;
802   }
803 
804 private:
805   /// \brief Formats an \c AnnotatedLine and returns the penalty.
806   ///
807   /// If \p DryRun is \c false, directly applies the changes.
808   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
809                   bool DryRun) {
810     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
811 
812     // If the ObjC method declaration does not fit on a line, we should format
813     // it with one arg per line.
814     if (State.Line->Type == LT_ObjCMethodDecl)
815       State.Stack.back().BreakBeforeParameter = true;
816 
817     // Find best solution in solution space.
818     return analyzeSolutionSpace(State, DryRun);
819   }
820 
821   /// \brief An edge in the solution space from \c Previous->State to \c State,
822   /// inserting a newline dependent on the \c NewLine.
823   struct StateNode {
824     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
825         : State(State), NewLine(NewLine), Previous(Previous) {}
826     LineState State;
827     bool NewLine;
828     StateNode *Previous;
829   };
830 
831   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
832   ///
833   /// In case of equal penalties, we want to prefer states that were inserted
834   /// first. During state generation we make sure that we insert states first
835   /// that break the line as late as possible.
836   typedef std::pair<unsigned, unsigned> OrderedPenalty;
837 
838   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
839   /// \c State has the given \c OrderedPenalty.
840   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
841 
842   /// \brief The BFS queue type.
843   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
844                               std::greater<QueueItem> > QueueType;
845 
846   /// \brief Get the offset of the line relatively to the level.
847   ///
848   /// For example, 'public:' labels in classes are offset by 1 or 2
849   /// characters to the left from their level.
850   int getIndentOffset(const FormatToken &RootToken) {
851     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
852       return Style.AccessModifierOffset;
853     return 0;
854   }
855 
856   /// \brief Add a new line and the required indent before the first Token
857   /// of the \c UnwrappedLine if there was no structural parsing error.
858   void formatFirstToken(FormatToken &RootToken,
859                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
860                         unsigned Indent, bool InPPDirective) {
861     unsigned Newlines =
862         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
863     // Remove empty lines before "}" where applicable.
864     if (RootToken.is(tok::r_brace) &&
865         (!RootToken.Next ||
866          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
867       Newlines = std::min(Newlines, 1u);
868     if (Newlines == 0 && !RootToken.IsFirst)
869       Newlines = 1;
870 
871     // Insert extra new line before access specifiers.
872     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
873         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
874       ++Newlines;
875 
876     // Remove empty lines after access specifiers.
877     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
878       Newlines = std::min(1u, Newlines);
879 
880     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
881                                    Indent, InPPDirective &&
882                                                !RootToken.HasUnescapedNewline);
883   }
884 
885   /// \brief Get the indent of \p Level from \p IndentForLevel.
886   ///
887   /// \p IndentForLevel must contain the indent for the level \c l
888   /// at \p IndentForLevel[l], or a value < 0 if the indent for
889   /// that level is unknown.
890   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
891     if (IndentForLevel[Level] != -1)
892       return IndentForLevel[Level];
893     if (Level == 0)
894       return 0;
895     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
896   }
897 
898   void join(AnnotatedLine &A, const AnnotatedLine &B) {
899     assert(!A.Last->Next);
900     assert(!B.First->Previous);
901     if (B.Affected)
902       A.Affected = true;
903     A.Last->Next = B.First;
904     B.First->Previous = A.Last;
905     B.First->CanBreakBefore = true;
906     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
907     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
908       Tok->TotalLength += LengthA;
909       A.Last = Tok;
910     }
911   }
912 
913   unsigned getColumnLimit(bool InPPDirective) const {
914     // In preprocessor directives reserve two chars for trailing " \"
915     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
916   }
917 
918   /// \brief Analyze the entire solution space starting from \p InitialState.
919   ///
920   /// This implements a variant of Dijkstra's algorithm on the graph that spans
921   /// the solution space (\c LineStates are the nodes). The algorithm tries to
922   /// find the shortest path (the one with lowest penalty) from \p InitialState
923   /// to a state where all tokens are placed. Returns the penalty.
924   ///
925   /// If \p DryRun is \c false, directly applies the changes.
926   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
927     std::set<LineState> Seen;
928 
929     // Increasing count of \c StateNode items we have created. This is used to
930     // create a deterministic order independent of the container.
931     unsigned Count = 0;
932     QueueType Queue;
933 
934     // Insert start element into queue.
935     StateNode *Node =
936         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
937     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
938     ++Count;
939 
940     unsigned Penalty = 0;
941 
942     // While not empty, take first element and follow edges.
943     while (!Queue.empty()) {
944       Penalty = Queue.top().first.first;
945       StateNode *Node = Queue.top().second;
946       if (Node->State.NextToken == NULL) {
947         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
948         break;
949       }
950       Queue.pop();
951 
952       // Cut off the analysis of certain solutions if the analysis gets too
953       // complex. See description of IgnoreStackForComparison.
954       if (Count > 10000)
955         Node->State.IgnoreStackForComparison = true;
956 
957       if (!Seen.insert(Node->State).second)
958         // State already examined with lower penalty.
959         continue;
960 
961       FormatDecision LastFormat = Node->State.NextToken->Decision;
962       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
963         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
964       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
965         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
966     }
967 
968     if (Queue.empty()) {
969       // We were unable to find a solution, do nothing.
970       // FIXME: Add diagnostic?
971       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
972       return 0;
973     }
974 
975     // Reconstruct the solution.
976     if (!DryRun)
977       reconstructPath(InitialState, Queue.top().second);
978 
979     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
980     DEBUG(llvm::dbgs() << "---\n");
981 
982     return Penalty;
983   }
984 
985   void reconstructPath(LineState &State, StateNode *Current) {
986     std::deque<StateNode *> Path;
987     // We do not need a break before the initial token.
988     while (Current->Previous) {
989       Path.push_front(Current);
990       Current = Current->Previous;
991     }
992     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
993          I != E; ++I) {
994       unsigned Penalty = 0;
995       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
996       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
997 
998       DEBUG({
999         if ((*I)->NewLine) {
1000           llvm::dbgs() << "Penalty for placing "
1001                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1002                        << Penalty << "\n";
1003         }
1004       });
1005     }
1006   }
1007 
1008   /// \brief Add the following state to the analysis queue \c Queue.
1009   ///
1010   /// Assume the current state is \p PreviousNode and has been reached with a
1011   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1012   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1013                            bool NewLine, unsigned *Count, QueueType *Queue) {
1014     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1015       return;
1016     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1017       return;
1018 
1019     StateNode *Node = new (Allocator.Allocate())
1020         StateNode(PreviousNode->State, NewLine, PreviousNode);
1021     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1022       return;
1023 
1024     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1025 
1026     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1027     ++(*Count);
1028   }
1029 
1030   /// \brief If the \p State's next token is an r_brace closing a nested block,
1031   /// format the nested block before it.
1032   ///
1033   /// Returns \c true if all children could be placed successfully and adapts
1034   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
1035   /// creates changes using \c Whitespaces.
1036   ///
1037   /// The crucial idea here is that children always get formatted upon
1038   /// encountering the closing brace right after the nested block. Now, if we
1039   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
1040   /// \c false), the entire block has to be kept on the same line (which is only
1041   /// possible if it fits on the line, only contains a single statement, etc.
1042   ///
1043   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1044   /// break after the "{", format all lines with correct indentation and the put
1045   /// the closing "}" on yet another new line.
1046   ///
1047   /// This enables us to keep the simple structure of the
1048   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1049   /// break or don't break.
1050   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1051                       unsigned &Penalty) {
1052     FormatToken &Previous = *State.NextToken->Previous;
1053     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1054     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1055         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1056       // The previous token does not open a block. Nothing to do. We don't
1057       // assert so that we can simply call this function for all tokens.
1058       return true;
1059 
1060     if (NewLine) {
1061       int AdditionalIndent = State.Stack.back().Indent -
1062                              Previous.Children[0]->Level * Style.IndentWidth;
1063       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1064                         /*FixBadIndentation=*/true);
1065       return true;
1066     }
1067 
1068     // Cannot merge multiple statements into a single line.
1069     if (Previous.Children.size() > 1)
1070       return false;
1071 
1072     // We can't put the closing "}" on a line with a trailing comment.
1073     if (Previous.Children[0]->Last->isTrailingComment())
1074       return false;
1075 
1076     if (!DryRun) {
1077       Whitespaces->replaceWhitespace(
1078           *Previous.Children[0]->First,
1079           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1080           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1081     }
1082     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1083 
1084     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1085     return true;
1086   }
1087 
1088   ContinuationIndenter *Indenter;
1089   WhitespaceManager *Whitespaces;
1090   FormatStyle Style;
1091   LineJoiner Joiner;
1092 
1093   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1094 };
1095 
1096 class FormatTokenLexer {
1097 public:
1098   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1099                    encoding::Encoding Encoding)
1100       : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
1101         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
1102         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
1103     Lex.SetKeepWhitespaceMode(true);
1104   }
1105 
1106   ArrayRef<FormatToken *> lex() {
1107     assert(Tokens.empty());
1108     do {
1109       Tokens.push_back(getNextToken());
1110       tryMergePreviousTokens();
1111     } while (Tokens.back()->Tok.isNot(tok::eof));
1112     return Tokens;
1113   }
1114 
1115   IdentifierTable &getIdentTable() { return IdentTable; }
1116 
1117 private:
1118   void tryMergePreviousTokens() {
1119     if (tryMerge_TMacro())
1120       return;
1121 
1122     if (Style.Language == FormatStyle::LK_JavaScript) {
1123       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1124       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1125       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1126                                                tok::greaterequal };
1127       // FIXME: We probably need to change token type to mimic operator with the
1128       // correct priority.
1129       if (tryMergeTokens(JSIdentity))
1130         return;
1131       if (tryMergeTokens(JSNotIdentity))
1132         return;
1133       if (tryMergeTokens(JSShiftEqual))
1134         return;
1135     }
1136   }
1137 
1138   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1139     if (Tokens.size() < Kinds.size())
1140       return false;
1141 
1142     SmallVectorImpl<FormatToken *>::const_iterator First =
1143         Tokens.end() - Kinds.size();
1144     if (!First[0]->is(Kinds[0]))
1145       return false;
1146     unsigned AddLength = 0;
1147     for (unsigned i = 1; i < Kinds.size(); ++i) {
1148       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1149                                          First[i]->WhitespaceRange.getEnd())
1150         return false;
1151       AddLength += First[i]->TokenText.size();
1152     }
1153     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1154     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1155                                     First[0]->TokenText.size() + AddLength);
1156     First[0]->ColumnWidth += AddLength;
1157     return true;
1158   }
1159 
1160   bool tryMerge_TMacro() {
1161     if (Tokens.size() < 4)
1162       return false;
1163     FormatToken *Last = Tokens.back();
1164     if (!Last->is(tok::r_paren))
1165       return false;
1166 
1167     FormatToken *String = Tokens[Tokens.size() - 2];
1168     if (!String->is(tok::string_literal) || String->IsMultiline)
1169       return false;
1170 
1171     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1172       return false;
1173 
1174     FormatToken *Macro = Tokens[Tokens.size() - 4];
1175     if (Macro->TokenText != "_T")
1176       return false;
1177 
1178     const char *Start = Macro->TokenText.data();
1179     const char *End = Last->TokenText.data() + Last->TokenText.size();
1180     String->TokenText = StringRef(Start, End - Start);
1181     String->IsFirst = Macro->IsFirst;
1182     String->LastNewlineOffset = Macro->LastNewlineOffset;
1183     String->WhitespaceRange = Macro->WhitespaceRange;
1184     String->OriginalColumn = Macro->OriginalColumn;
1185     String->ColumnWidth = encoding::columnWidthWithTabs(
1186         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1187 
1188     Tokens.pop_back();
1189     Tokens.pop_back();
1190     Tokens.pop_back();
1191     Tokens.back() = String;
1192     return true;
1193   }
1194 
1195   FormatToken *getNextToken() {
1196     if (GreaterStashed) {
1197       // Create a synthesized second '>' token.
1198       // FIXME: Increment Column and set OriginalColumn.
1199       Token Greater = FormatTok->Tok;
1200       FormatTok = new (Allocator.Allocate()) FormatToken;
1201       FormatTok->Tok = Greater;
1202       SourceLocation GreaterLocation =
1203           FormatTok->Tok.getLocation().getLocWithOffset(1);
1204       FormatTok->WhitespaceRange =
1205           SourceRange(GreaterLocation, GreaterLocation);
1206       FormatTok->TokenText = ">";
1207       FormatTok->ColumnWidth = 1;
1208       GreaterStashed = false;
1209       return FormatTok;
1210     }
1211 
1212     FormatTok = new (Allocator.Allocate()) FormatToken;
1213     readRawToken(*FormatTok);
1214     SourceLocation WhitespaceStart =
1215         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1216     FormatTok->IsFirst = IsFirstToken;
1217     IsFirstToken = false;
1218 
1219     // Consume and record whitespace until we find a significant token.
1220     unsigned WhitespaceLength = TrailingWhitespace;
1221     while (FormatTok->Tok.is(tok::unknown)) {
1222       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1223         switch (FormatTok->TokenText[i]) {
1224         case '\n':
1225           ++FormatTok->NewlinesBefore;
1226           // FIXME: This is technically incorrect, as it could also
1227           // be a literal backslash at the end of the line.
1228           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1229                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1230                           FormatTok->TokenText[i - 2] != '\\')))
1231             FormatTok->HasUnescapedNewline = true;
1232           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1233           Column = 0;
1234           break;
1235         case '\r':
1236         case '\f':
1237         case '\v':
1238           Column = 0;
1239           break;
1240         case ' ':
1241           ++Column;
1242           break;
1243         case '\t':
1244           Column += Style.TabWidth - Column % Style.TabWidth;
1245           break;
1246         case '\\':
1247           ++Column;
1248           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1249                              FormatTok->TokenText[i + 1] != '\n'))
1250             FormatTok->Type = TT_ImplicitStringLiteral;
1251           break;
1252         default:
1253           FormatTok->Type = TT_ImplicitStringLiteral;
1254           ++Column;
1255           break;
1256         }
1257       }
1258 
1259       if (FormatTok->Type == TT_ImplicitStringLiteral)
1260         break;
1261       WhitespaceLength += FormatTok->Tok.getLength();
1262 
1263       readRawToken(*FormatTok);
1264     }
1265 
1266     // In case the token starts with escaped newlines, we want to
1267     // take them into account as whitespace - this pattern is quite frequent
1268     // in macro definitions.
1269     // FIXME: Add a more explicit test.
1270     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1271            FormatTok->TokenText[1] == '\n') {
1272       // FIXME: ++FormatTok->NewlinesBefore is missing...
1273       WhitespaceLength += 2;
1274       Column = 0;
1275       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1276     }
1277 
1278     FormatTok->WhitespaceRange = SourceRange(
1279         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1280 
1281     FormatTok->OriginalColumn = Column;
1282 
1283     TrailingWhitespace = 0;
1284     if (FormatTok->Tok.is(tok::comment)) {
1285       // FIXME: Add the trimmed whitespace to Column.
1286       StringRef UntrimmedText = FormatTok->TokenText;
1287       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1288       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1289     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1290       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1291       FormatTok->Tok.setIdentifierInfo(&Info);
1292       FormatTok->Tok.setKind(Info.getTokenID());
1293     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1294       FormatTok->Tok.setKind(tok::greater);
1295       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1296       GreaterStashed = true;
1297     }
1298 
1299     // Now FormatTok is the next non-whitespace token.
1300 
1301     StringRef Text = FormatTok->TokenText;
1302     size_t FirstNewlinePos = Text.find('\n');
1303     if (FirstNewlinePos == StringRef::npos) {
1304       // FIXME: ColumnWidth actually depends on the start column, we need to
1305       // take this into account when the token is moved.
1306       FormatTok->ColumnWidth =
1307           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1308       Column += FormatTok->ColumnWidth;
1309     } else {
1310       FormatTok->IsMultiline = true;
1311       // FIXME: ColumnWidth actually depends on the start column, we need to
1312       // take this into account when the token is moved.
1313       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1314           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1315 
1316       // The last line of the token always starts in column 0.
1317       // Thus, the length can be precomputed even in the presence of tabs.
1318       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1319           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1320           Encoding);
1321       Column = FormatTok->LastLineColumnWidth;
1322     }
1323 
1324     return FormatTok;
1325   }
1326 
1327   FormatToken *FormatTok;
1328   bool IsFirstToken;
1329   bool GreaterStashed;
1330   unsigned Column;
1331   unsigned TrailingWhitespace;
1332   Lexer &Lex;
1333   SourceManager &SourceMgr;
1334   FormatStyle &Style;
1335   IdentifierTable IdentTable;
1336   encoding::Encoding Encoding;
1337   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1338   SmallVector<FormatToken *, 16> Tokens;
1339 
1340   void readRawToken(FormatToken &Tok) {
1341     Lex.LexFromRawLexer(Tok.Tok);
1342     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1343                               Tok.Tok.getLength());
1344     // For formatting, treat unterminated string literals like normal string
1345     // literals.
1346     if (Tok.is(tok::unknown)) {
1347       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1348         Tok.Tok.setKind(tok::string_literal);
1349         Tok.IsUnterminatedLiteral = true;
1350       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1351                  Tok.TokenText == "''") {
1352         Tok.Tok.setKind(tok::char_constant);
1353       }
1354     }
1355   }
1356 };
1357 
1358 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1359   switch (Language) {
1360   case FormatStyle::LK_Cpp:
1361     return "C++";
1362   case FormatStyle::LK_JavaScript:
1363     return "JavaScript";
1364   case FormatStyle::LK_Proto:
1365     return "Proto";
1366   default:
1367     return "Unknown";
1368   }
1369 }
1370 
1371 class Formatter : public UnwrappedLineConsumer {
1372 public:
1373   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1374             const std::vector<CharSourceRange> &Ranges)
1375       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1376         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1377         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1378         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1379     DEBUG(llvm::dbgs() << "File encoding: "
1380                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1381                                                                : "unknown")
1382                        << "\n");
1383     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1384                        << "\n");
1385   }
1386 
1387   tooling::Replacements format() {
1388     tooling::Replacements Result;
1389     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1390 
1391     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1392     bool StructuralError = Parser.parse();
1393     assert(UnwrappedLines.rbegin()->empty());
1394     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1395          ++Run) {
1396       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1397       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1398       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1399         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1400       }
1401       tooling::Replacements RunResult =
1402           format(AnnotatedLines, StructuralError, Tokens);
1403       DEBUG({
1404         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1405         for (tooling::Replacements::iterator I = RunResult.begin(),
1406                                              E = RunResult.end();
1407              I != E; ++I) {
1408           llvm::dbgs() << I->toString() << "\n";
1409         }
1410       });
1411       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1412         delete AnnotatedLines[i];
1413       }
1414       Result.insert(RunResult.begin(), RunResult.end());
1415       Whitespaces.reset();
1416     }
1417     return Result;
1418   }
1419 
1420   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1421                                bool StructuralError, FormatTokenLexer &Tokens) {
1422     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1423     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1424       Annotator.annotate(*AnnotatedLines[i]);
1425     }
1426     deriveLocalStyle(AnnotatedLines);
1427     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1428       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1429     }
1430     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1431 
1432     Annotator.setCommentLineLevels(AnnotatedLines);
1433     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1434                                   BinPackInconclusiveFunctions);
1435     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1436     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1437     return Whitespaces.generateReplacements();
1438   }
1439 
1440 private:
1441   // Determines which lines are affected by the SourceRanges given as input.
1442   // Returns \c true if at least one line between I and E or one of their
1443   // children is affected.
1444   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1445                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1446     bool SomeLineAffected = false;
1447     const AnnotatedLine *PreviousLine = NULL;
1448     while (I != E) {
1449       AnnotatedLine *Line = *I;
1450       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1451 
1452       // If a line is part of a preprocessor directive, it needs to be formatted
1453       // if any token within the directive is affected.
1454       if (Line->InPPDirective) {
1455         FormatToken *Last = Line->Last;
1456         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1457         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1458           Last = (*PPEnd)->Last;
1459           ++PPEnd;
1460         }
1461 
1462         if (affectsTokenRange(*Line->First, *Last,
1463                               /*IncludeLeadingNewlines=*/false)) {
1464           SomeLineAffected = true;
1465           markAllAsAffected(I, PPEnd);
1466         }
1467         I = PPEnd;
1468         continue;
1469       }
1470 
1471       if (nonPPLineAffected(Line, PreviousLine))
1472         SomeLineAffected = true;
1473 
1474       PreviousLine = Line;
1475       ++I;
1476     }
1477     return SomeLineAffected;
1478   }
1479 
1480   // Determines whether 'Line' is affected by the SourceRanges given as input.
1481   // Returns \c true if line or one if its children is affected.
1482   bool nonPPLineAffected(AnnotatedLine *Line,
1483                          const AnnotatedLine *PreviousLine) {
1484     bool SomeLineAffected = false;
1485     Line->ChildrenAffected =
1486         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1487     if (Line->ChildrenAffected)
1488       SomeLineAffected = true;
1489 
1490     // Stores whether one of the line's tokens is directly affected.
1491     bool SomeTokenAffected = false;
1492     // Stores whether we need to look at the leading newlines of the next token
1493     // in order to determine whether it was affected.
1494     bool IncludeLeadingNewlines = false;
1495 
1496     // Stores whether the first child line of any of this line's tokens is
1497     // affected.
1498     bool SomeFirstChildAffected = false;
1499 
1500     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1501       // Determine whether 'Tok' was affected.
1502       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1503         SomeTokenAffected = true;
1504 
1505       // Determine whether the first child of 'Tok' was affected.
1506       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1507         SomeFirstChildAffected = true;
1508 
1509       IncludeLeadingNewlines = Tok->Children.empty();
1510     }
1511 
1512     // Was this line moved, i.e. has it previously been on the same line as an
1513     // affected line?
1514     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1515                      Line->First->NewlinesBefore == 0;
1516 
1517     bool IsContinuedComment = Line->First->is(tok::comment) &&
1518                               Line->First->Next == NULL &&
1519                               Line->First->NewlinesBefore < 2 && PreviousLine &&
1520                               PreviousLine->Affected &&
1521                               PreviousLine->Last->is(tok::comment);
1522 
1523     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1524         IsContinuedComment) {
1525       Line->Affected = true;
1526       SomeLineAffected = true;
1527     }
1528     return SomeLineAffected;
1529   }
1530 
1531   // Marks all lines between I and E as well as all their children as affected.
1532   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1533                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1534     while (I != E) {
1535       (*I)->Affected = true;
1536       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1537       ++I;
1538     }
1539   }
1540 
1541   // Returns true if the range from 'First' to 'Last' intersects with one of the
1542   // input ranges.
1543   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1544                          bool IncludeLeadingNewlines) {
1545     SourceLocation Start = First.WhitespaceRange.getBegin();
1546     if (!IncludeLeadingNewlines)
1547       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1548     SourceLocation End = Last.getStartOfNonWhitespace();
1549     if (Last.TokenText.size() > 0)
1550       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1551     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1552     return affectsCharSourceRange(Range);
1553   }
1554 
1555   // Returns true if one of the input ranges intersect the leading empty lines
1556   // before 'Tok'.
1557   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1558     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1559         Tok.WhitespaceRange.getBegin(),
1560         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1561     return affectsCharSourceRange(EmptyLineRange);
1562   }
1563 
1564   // Returns true if 'Range' intersects with one of the input ranges.
1565   bool affectsCharSourceRange(const CharSourceRange &Range) {
1566     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1567                                                           E = Ranges.end();
1568          I != E; ++I) {
1569       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1570           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1571         return true;
1572     }
1573     return false;
1574   }
1575 
1576   static bool inputUsesCRLF(StringRef Text) {
1577     return Text.count('\r') * 2 > Text.count('\n');
1578   }
1579 
1580   void
1581   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1582     unsigned CountBoundToVariable = 0;
1583     unsigned CountBoundToType = 0;
1584     bool HasCpp03IncompatibleFormat = false;
1585     bool HasBinPackedFunction = false;
1586     bool HasOnePerLineFunction = false;
1587     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1588       if (!AnnotatedLines[i]->First->Next)
1589         continue;
1590       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1591       while (Tok->Next) {
1592         if (Tok->Type == TT_PointerOrReference) {
1593           bool SpacesBefore =
1594               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1595           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1596                              Tok->Next->WhitespaceRange.getEnd();
1597           if (SpacesBefore && !SpacesAfter)
1598             ++CountBoundToVariable;
1599           else if (!SpacesBefore && SpacesAfter)
1600             ++CountBoundToType;
1601         }
1602 
1603         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1604           if (Tok->is(tok::coloncolon) &&
1605               Tok->Previous->Type == TT_TemplateOpener)
1606             HasCpp03IncompatibleFormat = true;
1607           if (Tok->Type == TT_TemplateCloser &&
1608               Tok->Previous->Type == TT_TemplateCloser)
1609             HasCpp03IncompatibleFormat = true;
1610         }
1611 
1612         if (Tok->PackingKind == PPK_BinPacked)
1613           HasBinPackedFunction = true;
1614         if (Tok->PackingKind == PPK_OnePerLine)
1615           HasOnePerLineFunction = true;
1616 
1617         Tok = Tok->Next;
1618       }
1619     }
1620     if (Style.DerivePointerBinding) {
1621       if (CountBoundToType > CountBoundToVariable)
1622         Style.PointerBindsToType = true;
1623       else if (CountBoundToType < CountBoundToVariable)
1624         Style.PointerBindsToType = false;
1625     }
1626     if (Style.Standard == FormatStyle::LS_Auto) {
1627       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1628                                                   : FormatStyle::LS_Cpp03;
1629     }
1630     BinPackInconclusiveFunctions =
1631         HasBinPackedFunction || !HasOnePerLineFunction;
1632   }
1633 
1634   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1635     assert(!UnwrappedLines.empty());
1636     UnwrappedLines.back().push_back(TheLine);
1637   }
1638 
1639   virtual void finishRun() {
1640     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1641   }
1642 
1643   FormatStyle Style;
1644   Lexer &Lex;
1645   SourceManager &SourceMgr;
1646   WhitespaceManager Whitespaces;
1647   SmallVector<CharSourceRange, 8> Ranges;
1648   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1649 
1650   encoding::Encoding Encoding;
1651   bool BinPackInconclusiveFunctions;
1652 };
1653 
1654 } // end anonymous namespace
1655 
1656 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1657                                SourceManager &SourceMgr,
1658                                std::vector<CharSourceRange> Ranges) {
1659   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1660   return formatter.format();
1661 }
1662 
1663 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1664                                std::vector<tooling::Range> Ranges,
1665                                StringRef FileName) {
1666   FileManager Files((FileSystemOptions()));
1667   DiagnosticsEngine Diagnostics(
1668       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1669       new DiagnosticOptions);
1670   SourceManager SourceMgr(Diagnostics, Files);
1671   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1672   const clang::FileEntry *Entry =
1673       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1674   SourceMgr.overrideFileContents(Entry, Buf);
1675   FileID ID =
1676       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1677   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1678             getFormattingLangOpts(Style.Standard));
1679   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1680   std::vector<CharSourceRange> CharRanges;
1681   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1682     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1683     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1684     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1685   }
1686   return reformat(Style, Lex, SourceMgr, CharRanges);
1687 }
1688 
1689 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1690   LangOptions LangOpts;
1691   LangOpts.CPlusPlus = 1;
1692   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1693   LangOpts.LineComment = 1;
1694   LangOpts.Bool = 1;
1695   LangOpts.ObjC1 = 1;
1696   LangOpts.ObjC2 = 1;
1697   return LangOpts;
1698 }
1699 
1700 const char *StyleOptionHelpDescription =
1701     "Coding style, currently supports:\n"
1702     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1703     "Use -style=file to load style configuration from\n"
1704     ".clang-format file located in one of the parent\n"
1705     "directories of the source file (or current\n"
1706     "directory for stdin).\n"
1707     "Use -style=\"{key: value, ...}\" to set specific\n"
1708     "parameters, e.g.:\n"
1709     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1710 
1711 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1712   if (FileName.endswith_lower(".js")) {
1713     return FormatStyle::LK_JavaScript;
1714   } else if (FileName.endswith_lower(".proto") ||
1715              FileName.endswith_lower(".protodevel")) {
1716     return FormatStyle::LK_Proto;
1717   }
1718   return FormatStyle::LK_Cpp;
1719 }
1720 
1721 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1722                      StringRef FallbackStyle) {
1723   FormatStyle Style = getLLVMStyle();
1724   Style.Language = getLanguageByFileName(FileName);
1725   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1726     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1727                  << "\" using LLVM style\n";
1728     return Style;
1729   }
1730 
1731   if (StyleName.startswith("{")) {
1732     // Parse YAML/JSON style from the command line.
1733     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1734       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1735                    << FallbackStyle << " style\n";
1736     }
1737     return Style;
1738   }
1739 
1740   if (!StyleName.equals_lower("file")) {
1741     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1742       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1743                    << " style\n";
1744     return Style;
1745   }
1746 
1747   // Look for .clang-format/_clang-format file in the file's parent directories.
1748   SmallString<128> UnsuitableConfigFiles;
1749   SmallString<128> Path(FileName);
1750   llvm::sys::fs::make_absolute(Path);
1751   for (StringRef Directory = Path; !Directory.empty();
1752        Directory = llvm::sys::path::parent_path(Directory)) {
1753     if (!llvm::sys::fs::is_directory(Directory))
1754       continue;
1755     SmallString<128> ConfigFile(Directory);
1756 
1757     llvm::sys::path::append(ConfigFile, ".clang-format");
1758     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1759     bool IsFile = false;
1760     // Ignore errors from is_regular_file: we only need to know if we can read
1761     // the file or not.
1762     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1763 
1764     if (!IsFile) {
1765       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1766       ConfigFile = Directory;
1767       llvm::sys::path::append(ConfigFile, "_clang-format");
1768       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1769       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1770     }
1771 
1772     if (IsFile) {
1773       OwningPtr<llvm::MemoryBuffer> Text;
1774       if (llvm::error_code ec =
1775               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1776         llvm::errs() << ec.message() << "\n";
1777         break;
1778       }
1779       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1780         if (ec == llvm::errc::not_supported) {
1781           if (!UnsuitableConfigFiles.empty())
1782             UnsuitableConfigFiles.append(", ");
1783           UnsuitableConfigFiles.append(ConfigFile);
1784           continue;
1785         }
1786         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1787                      << "\n";
1788         break;
1789       }
1790       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1791       return Style;
1792     }
1793   }
1794   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1795                << " style\n";
1796   if (!UnsuitableConfigFiles.empty()) {
1797     llvm::errs() << "Configuration file(s) do(es) not support "
1798                  << getLanguageName(Style.Language) << ": "
1799                  << UnsuitableConfigFiles << "\n";
1800   }
1801   return Style;
1802 }
1803 
1804 } // namespace format
1805 } // namespace clang
1806