1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33 
34 using clang::format::FormatStyle;
35 
36 namespace llvm {
37 namespace yaml {
38 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
39   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
40     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
41     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
42   }
43 };
44 
45 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
46   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
47     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
48     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
49     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
50     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
51     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
52   }
53 };
54 
55 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
56   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
57     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
58     IO.enumCase(Value, "false", FormatStyle::UT_Never);
59     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
60     IO.enumCase(Value, "true", FormatStyle::UT_Always);
61     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
62   }
63 };
64 
65 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
66   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
67     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
68     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
69     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
70     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
71     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
72   }
73 };
74 
75 template <>
76 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
77   static void enumeration(IO &IO,
78                           FormatStyle::NamespaceIndentationKind &Value) {
79     IO.enumCase(Value, "None", FormatStyle::NI_None);
80     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
81     IO.enumCase(Value, "All", FormatStyle::NI_All);
82   }
83 };
84 
85 template <>
86 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
87   static void enumeration(IO &IO,
88                           FormatStyle::SpaceBeforeParensOptions &Value) {
89     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
90     IO.enumCase(Value, "ControlStatements",
91                 FormatStyle::SBPO_ControlStatements);
92     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
93 
94     // For backward compatibility.
95     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
96     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
97   }
98 };
99 
100 template <> struct MappingTraits<FormatStyle> {
101   static void mapping(IO &IO, FormatStyle &Style) {
102     // When reading, read the language first, we need it for getPredefinedStyle.
103     IO.mapOptional("Language", Style.Language);
104 
105     if (IO.outputting()) {
106       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
107                                   "Mozilla", "WebKit", "GNU" };
108       ArrayRef<StringRef> Styles(StylesArray);
109       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
110         StringRef StyleName(Styles[i]);
111         FormatStyle PredefinedStyle;
112         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
113             Style == PredefinedStyle) {
114           IO.mapOptional("# BasedOnStyle", StyleName);
115           break;
116         }
117       }
118     } else {
119       StringRef BasedOnStyle;
120       IO.mapOptional("BasedOnStyle", BasedOnStyle);
121       if (!BasedOnStyle.empty()) {
122         FormatStyle::LanguageKind OldLanguage = Style.Language;
123         FormatStyle::LanguageKind Language =
124             ((FormatStyle *)IO.getContext())->Language;
125         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
126           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
127           return;
128         }
129         Style.Language = OldLanguage;
130       }
131     }
132 
133     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
134     IO.mapOptional("ConstructorInitializerIndentWidth",
135                    Style.ConstructorInitializerIndentWidth);
136     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
137     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
138     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
139                    Style.AllowAllParametersOfDeclarationOnNextLine);
140     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
141                    Style.AllowShortIfStatementsOnASingleLine);
142     IO.mapOptional("AllowShortLoopsOnASingleLine",
143                    Style.AllowShortLoopsOnASingleLine);
144     IO.mapOptional("AllowShortFunctionsOnASingleLine",
145                    Style.AllowShortFunctionsOnASingleLine);
146     IO.mapOptional("AlwaysBreakTemplateDeclarations",
147                    Style.AlwaysBreakTemplateDeclarations);
148     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
149                    Style.AlwaysBreakBeforeMultilineStrings);
150     IO.mapOptional("BreakBeforeBinaryOperators",
151                    Style.BreakBeforeBinaryOperators);
152     IO.mapOptional("BreakBeforeTernaryOperators",
153                    Style.BreakBeforeTernaryOperators);
154     IO.mapOptional("BreakConstructorInitializersBeforeComma",
155                    Style.BreakConstructorInitializersBeforeComma);
156     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
157     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
158     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
159                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
160     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
161     IO.mapOptional("ExperimentalAutoDetectBinPacking",
162                    Style.ExperimentalAutoDetectBinPacking);
163     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
164     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
165     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
166     IO.mapOptional("ObjCSpaceBeforeProtocolList",
167                    Style.ObjCSpaceBeforeProtocolList);
168     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
169                    Style.PenaltyBreakBeforeFirstCallParameter);
170     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
171     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
172     IO.mapOptional("PenaltyBreakFirstLessLess",
173                    Style.PenaltyBreakFirstLessLess);
174     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
175     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
176                    Style.PenaltyReturnTypeOnItsOwnLine);
177     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
178     IO.mapOptional("SpacesBeforeTrailingComments",
179                    Style.SpacesBeforeTrailingComments);
180     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
181     IO.mapOptional("Standard", Style.Standard);
182     IO.mapOptional("IndentWidth", Style.IndentWidth);
183     IO.mapOptional("TabWidth", Style.TabWidth);
184     IO.mapOptional("UseTab", Style.UseTab);
185     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
186     IO.mapOptional("IndentFunctionDeclarationAfterType",
187                    Style.IndentFunctionDeclarationAfterType);
188     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
189     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
190     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
191     IO.mapOptional("SpacesInCStyleCastParentheses",
192                    Style.SpacesInCStyleCastParentheses);
193     IO.mapOptional("SpacesInContainerLiterals",
194                    Style.SpacesInContainerLiterals);
195     IO.mapOptional("SpaceBeforeAssignmentOperators",
196                    Style.SpaceBeforeAssignmentOperators);
197     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
198     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
199 
200     // For backward compatibility.
201     if (!IO.outputting()) {
202       IO.mapOptional("SpaceAfterControlStatementKeyword",
203                      Style.SpaceBeforeParens);
204     }
205     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
206   }
207 };
208 
209 // Allows to read vector<FormatStyle> while keeping default values.
210 // IO.getContext() should contain a pointer to the FormatStyle structure, that
211 // will be used to get default values for missing keys.
212 // If the first element has no Language specified, it will be treated as the
213 // default one for the following elements.
214 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
215   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
216     return Seq.size();
217   }
218   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
219                               size_t Index) {
220     if (Index >= Seq.size()) {
221       assert(Index == Seq.size());
222       FormatStyle Template;
223       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
224         Template = Seq[0];
225       } else {
226         Template = *((const FormatStyle*)IO.getContext());
227         Template.Language = FormatStyle::LK_None;
228       }
229       Seq.resize(Index + 1, Template);
230     }
231     return Seq[Index];
232   }
233 };
234 }
235 }
236 
237 namespace clang {
238 namespace format {
239 
240 FormatStyle getLLVMStyle() {
241   FormatStyle LLVMStyle;
242   LLVMStyle.Language = FormatStyle::LK_Cpp;
243   LLVMStyle.AccessModifierOffset = -2;
244   LLVMStyle.AlignEscapedNewlinesLeft = false;
245   LLVMStyle.AlignTrailingComments = true;
246   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
247   LLVMStyle.AllowShortFunctionsOnASingleLine = true;
248   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
249   LLVMStyle.AllowShortLoopsOnASingleLine = false;
250   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
251   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
252   LLVMStyle.BinPackParameters = true;
253   LLVMStyle.BreakBeforeBinaryOperators = false;
254   LLVMStyle.BreakBeforeTernaryOperators = true;
255   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
256   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
257   LLVMStyle.ColumnLimit = 80;
258   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
259   LLVMStyle.ConstructorInitializerIndentWidth = 4;
260   LLVMStyle.Cpp11BracedListStyle = false;
261   LLVMStyle.DerivePointerBinding = false;
262   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
263   LLVMStyle.IndentCaseLabels = false;
264   LLVMStyle.IndentFunctionDeclarationAfterType = false;
265   LLVMStyle.IndentWidth = 2;
266   LLVMStyle.TabWidth = 8;
267   LLVMStyle.MaxEmptyLinesToKeep = 1;
268   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
269   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
270   LLVMStyle.PointerBindsToType = false;
271   LLVMStyle.SpacesBeforeTrailingComments = 1;
272   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
273   LLVMStyle.UseTab = FormatStyle::UT_Never;
274   LLVMStyle.SpacesInParentheses = false;
275   LLVMStyle.SpaceInEmptyParentheses = false;
276   LLVMStyle.SpacesInContainerLiterals = true;
277   LLVMStyle.SpacesInCStyleCastParentheses = false;
278   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
279   LLVMStyle.SpaceBeforeAssignmentOperators = true;
280   LLVMStyle.ContinuationIndentWidth = 4;
281   LLVMStyle.SpacesInAngles = false;
282   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
283 
284   LLVMStyle.PenaltyBreakComment = 300;
285   LLVMStyle.PenaltyBreakFirstLessLess = 120;
286   LLVMStyle.PenaltyBreakString = 1000;
287   LLVMStyle.PenaltyExcessCharacter = 1000000;
288   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
289   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
290 
291   return LLVMStyle;
292 }
293 
294 FormatStyle getGoogleStyle() {
295   FormatStyle GoogleStyle = getLLVMStyle();
296   GoogleStyle.AccessModifierOffset = -1;
297   GoogleStyle.AlignEscapedNewlinesLeft = true;
298   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
299   GoogleStyle.AllowShortLoopsOnASingleLine = true;
300   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
301   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
302   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
303   GoogleStyle.Cpp11BracedListStyle = true;
304   GoogleStyle.DerivePointerBinding = true;
305   GoogleStyle.IndentCaseLabels = true;
306   GoogleStyle.IndentFunctionDeclarationAfterType = true;
307   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
308   GoogleStyle.PointerBindsToType = true;
309   GoogleStyle.SpacesBeforeTrailingComments = 2;
310   GoogleStyle.Standard = FormatStyle::LS_Auto;
311 
312   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
313   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
314 
315   return GoogleStyle;
316 }
317 
318 FormatStyle getGoogleJSStyle() {
319   FormatStyle GoogleJSStyle = getGoogleStyle();
320   GoogleJSStyle.Language = FormatStyle::LK_JavaScript;
321   GoogleJSStyle.BreakBeforeTernaryOperators = false;
322   GoogleJSStyle.SpacesInContainerLiterals = false;
323   return GoogleJSStyle;
324 }
325 
326 FormatStyle getChromiumStyle() {
327   FormatStyle ChromiumStyle = getGoogleStyle();
328   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
329   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
330   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
331   ChromiumStyle.BinPackParameters = false;
332   ChromiumStyle.DerivePointerBinding = false;
333   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
334   return ChromiumStyle;
335 }
336 
337 FormatStyle getMozillaStyle() {
338   FormatStyle MozillaStyle = getLLVMStyle();
339   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
340   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
341   MozillaStyle.DerivePointerBinding = true;
342   MozillaStyle.IndentCaseLabels = true;
343   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
344   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
345   MozillaStyle.PointerBindsToType = true;
346   return MozillaStyle;
347 }
348 
349 FormatStyle getWebKitStyle() {
350   FormatStyle Style = getLLVMStyle();
351   Style.AccessModifierOffset = -4;
352   Style.AlignTrailingComments = false;
353   Style.BreakBeforeBinaryOperators = true;
354   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
355   Style.BreakConstructorInitializersBeforeComma = true;
356   Style.ColumnLimit = 0;
357   Style.IndentWidth = 4;
358   Style.NamespaceIndentation = FormatStyle::NI_Inner;
359   Style.PointerBindsToType = true;
360   return Style;
361 }
362 
363 FormatStyle getGNUStyle() {
364   FormatStyle Style = getLLVMStyle();
365   Style.BreakBeforeBinaryOperators = true;
366   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
367   Style.BreakBeforeTernaryOperators = true;
368   Style.ColumnLimit = 79;
369   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
370   return Style;
371 }
372 
373 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
374                         FormatStyle *Style) {
375   if (Name.equals_lower("llvm")) {
376     *Style = getLLVMStyle();
377   } else if (Name.equals_lower("chromium")) {
378     *Style = getChromiumStyle();
379   } else if (Name.equals_lower("mozilla")) {
380     *Style = getMozillaStyle();
381   } else if (Name.equals_lower("google")) {
382     *Style = Language == FormatStyle::LK_JavaScript ? getGoogleJSStyle()
383                                                     : getGoogleStyle();
384   } else if (Name.equals_lower("webkit")) {
385     *Style = getWebKitStyle();
386   } else if (Name.equals_lower("gnu")) {
387     *Style = getGNUStyle();
388   } else {
389     return false;
390   }
391 
392   Style->Language = Language;
393   return true;
394 }
395 
396 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
397   assert(Style);
398   FormatStyle::LanguageKind Language = Style->Language;
399   assert(Language != FormatStyle::LK_None);
400   if (Text.trim().empty())
401     return llvm::make_error_code(llvm::errc::invalid_argument);
402 
403   std::vector<FormatStyle> Styles;
404   llvm::yaml::Input Input(Text);
405   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
406   // values for the fields, keys for which are missing from the configuration.
407   // Mapping also uses the context to get the language to find the correct
408   // base style.
409   Input.setContext(Style);
410   Input >> Styles;
411   if (Input.error())
412     return Input.error();
413 
414   for (unsigned i = 0; i < Styles.size(); ++i) {
415     // Ensures that only the first configuration can skip the Language option.
416     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
417       return llvm::make_error_code(llvm::errc::invalid_argument);
418     // Ensure that each language is configured at most once.
419     for (unsigned j = 0; j < i; ++j) {
420       if (Styles[i].Language == Styles[j].Language) {
421         DEBUG(llvm::dbgs()
422               << "Duplicate languages in the config file on positions " << j
423               << " and " << i << "\n");
424         return llvm::make_error_code(llvm::errc::invalid_argument);
425       }
426     }
427   }
428   // Look for a suitable configuration starting from the end, so we can
429   // find the configuration for the specific language first, and the default
430   // configuration (which can only be at slot 0) after it.
431   for (int i = Styles.size() - 1; i >= 0; --i) {
432     if (Styles[i].Language == Language ||
433         Styles[i].Language == FormatStyle::LK_None) {
434       *Style = Styles[i];
435       Style->Language = Language;
436       return llvm::make_error_code(llvm::errc::success);
437     }
438   }
439   return llvm::make_error_code(llvm::errc::not_supported);
440 }
441 
442 std::string configurationAsText(const FormatStyle &Style) {
443   std::string Text;
444   llvm::raw_string_ostream Stream(Text);
445   llvm::yaml::Output Output(Stream);
446   // We use the same mapping method for input and output, so we need a non-const
447   // reference here.
448   FormatStyle NonConstStyle = Style;
449   Output << NonConstStyle;
450   return Stream.str();
451 }
452 
453 namespace {
454 
455 class NoColumnLimitFormatter {
456 public:
457   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
458 
459   /// \brief Formats the line starting at \p State, simply keeping all of the
460   /// input's line breaking decisions.
461   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
462     LineState State =
463         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
464     while (State.NextToken != NULL) {
465       bool Newline =
466           Indenter->mustBreak(State) ||
467           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
468       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
469     }
470   }
471 
472 private:
473   ContinuationIndenter *Indenter;
474 };
475 
476 class LineJoiner {
477 public:
478   LineJoiner(const FormatStyle &Style) : Style(Style) {}
479 
480   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
481   unsigned
482   tryFitMultipleLinesInOne(unsigned Indent,
483                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
484                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
485     // We can never merge stuff if there are trailing line comments.
486     const AnnotatedLine *TheLine = *I;
487     if (TheLine->Last->Type == TT_LineComment)
488       return 0;
489 
490     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
491       return 0;
492 
493     unsigned Limit =
494         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
495     // If we already exceed the column limit, we set 'Limit' to 0. The different
496     // tryMerge..() functions can then decide whether to still do merging.
497     Limit = TheLine->Last->TotalLength > Limit
498                 ? 0
499                 : Limit - TheLine->Last->TotalLength;
500 
501     if (I + 1 == E || I[1]->Type == LT_Invalid)
502       return 0;
503 
504     if (TheLine->Last->Type == TT_FunctionLBrace &&
505         TheLine->First != TheLine->Last) {
506       return Style.AllowShortFunctionsOnASingleLine
507                  ? tryMergeSimpleBlock(I, E, Limit)
508                  : 0;
509     }
510     if (TheLine->Last->is(tok::l_brace)) {
511       return Style.BreakBeforeBraces == FormatStyle::BS_Attach
512                  ? tryMergeSimpleBlock(I, E, Limit)
513                  : 0;
514     }
515     if (I[1]->First->Type == TT_FunctionLBrace &&
516         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
517       // Check for Limit <= 2 to account for the " {".
518       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
519         return 0;
520       Limit -= 2;
521 
522       unsigned MergedLines = 0;
523       if (Style.AllowShortFunctionsOnASingleLine) {
524         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
525         // If we managed to merge the block, count the function header, which is
526         // on a separate line.
527         if (MergedLines > 0)
528           ++MergedLines;
529       }
530       return MergedLines;
531     }
532     if (TheLine->First->is(tok::kw_if)) {
533       return Style.AllowShortIfStatementsOnASingleLine
534                  ? tryMergeSimpleControlStatement(I, E, Limit)
535                  : 0;
536     }
537     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
538       return Style.AllowShortLoopsOnASingleLine
539                  ? tryMergeSimpleControlStatement(I, E, Limit)
540                  : 0;
541     }
542     if (TheLine->InPPDirective &&
543         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
544       return tryMergeSimplePPDirective(I, E, Limit);
545     }
546     return 0;
547   }
548 
549 private:
550   unsigned
551   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
552                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
553                             unsigned Limit) {
554     if (Limit == 0)
555       return 0;
556     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
557       return 0;
558     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
559       return 0;
560     if (1 + I[1]->Last->TotalLength > Limit)
561       return 0;
562     return 1;
563   }
564 
565   unsigned tryMergeSimpleControlStatement(
566       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
567       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
568     if (Limit == 0)
569       return 0;
570     if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
571          Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
572         I[1]->First->is(tok::l_brace))
573       return 0;
574     if (I[1]->InPPDirective != (*I)->InPPDirective ||
575         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
576       return 0;
577     AnnotatedLine &Line = **I;
578     if (Line.Last->isNot(tok::r_paren))
579       return 0;
580     if (1 + I[1]->Last->TotalLength > Limit)
581       return 0;
582     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
583                              tok::kw_while) ||
584         I[1]->First->Type == TT_LineComment)
585       return 0;
586     // Only inline simple if's (no nested if or else).
587     if (I + 2 != E && Line.First->is(tok::kw_if) &&
588         I[2]->First->is(tok::kw_else))
589       return 0;
590     return 1;
591   }
592 
593   unsigned
594   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
595                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
596                       unsigned Limit) {
597     // First, check that the current line allows merging. This is the case if
598     // we're not in a control flow statement and the last token is an opening
599     // brace.
600     AnnotatedLine &Line = **I;
601     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
602                             tok::kw_else, tok::kw_try, tok::kw_catch,
603                             tok::kw_for,
604                             // This gets rid of all ObjC @ keywords and methods.
605                             tok::at, tok::minus, tok::plus))
606       return 0;
607 
608     FormatToken *Tok = I[1]->First;
609     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
610         (Tok->getNextNonComment() == NULL ||
611          Tok->getNextNonComment()->is(tok::semi))) {
612       // We merge empty blocks even if the line exceeds the column limit.
613       Tok->SpacesRequiredBefore = 0;
614       Tok->CanBreakBefore = true;
615       return 1;
616     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
617       // Check that we still have three lines and they fit into the limit.
618       if (I + 2 == E || I[2]->Type == LT_Invalid)
619         return 0;
620 
621       if (!nextTwoLinesFitInto(I, Limit))
622         return 0;
623 
624       // Second, check that the next line does not contain any braces - if it
625       // does, readability declines when putting it into a single line.
626       if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
627         return 0;
628       do {
629         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
630           return 0;
631         Tok = Tok->Next;
632       } while (Tok != NULL);
633 
634       // Last, check that the third line contains a single closing brace.
635       Tok = I[2]->First;
636       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
637           Tok->MustBreakBefore)
638         return 0;
639 
640       return 2;
641     }
642     return 0;
643   }
644 
645   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
646                            unsigned Limit) {
647     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
648   }
649 
650   bool containsMustBreak(const AnnotatedLine *Line) {
651     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
652       if (Tok->MustBreakBefore)
653         return true;
654     }
655     return false;
656   }
657 
658   const FormatStyle &Style;
659 };
660 
661 class UnwrappedLineFormatter {
662 public:
663   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
664                          WhitespaceManager *Whitespaces,
665                          const FormatStyle &Style)
666       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
667         Joiner(Style) {}
668 
669   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
670                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
671     assert(!Lines.empty());
672     unsigned Penalty = 0;
673     std::vector<int> IndentForLevel;
674     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
675       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
676     const AnnotatedLine *PreviousLine = NULL;
677     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
678                                                           E = Lines.end();
679          I != E; ++I) {
680       const AnnotatedLine &TheLine = **I;
681       const FormatToken *FirstTok = TheLine.First;
682       int Offset = getIndentOffset(*FirstTok);
683 
684       // Determine indent and try to merge multiple unwrapped lines.
685       unsigned Indent;
686       if (TheLine.InPPDirective) {
687         Indent = TheLine.Level * Style.IndentWidth;
688       } else {
689         while (IndentForLevel.size() <= TheLine.Level)
690           IndentForLevel.push_back(-1);
691         IndentForLevel.resize(TheLine.Level + 1);
692         Indent = getIndent(IndentForLevel, TheLine.Level);
693       }
694       unsigned LevelIndent = Indent;
695       if (static_cast<int>(Indent) + Offset >= 0)
696         Indent += Offset;
697 
698       // Merge multiple lines if possible.
699       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
700       if (MergedLines > 0 && Style.ColumnLimit == 0) {
701         // Disallow line merging if there is a break at the start of one of the
702         // input lines.
703         for (unsigned i = 0; i < MergedLines; ++i) {
704           if (I[i + 1]->First->NewlinesBefore > 0)
705             MergedLines = 0;
706         }
707       }
708       if (!DryRun) {
709         for (unsigned i = 0; i < MergedLines; ++i) {
710           join(*I[i], *I[i + 1]);
711         }
712       }
713       I += MergedLines;
714 
715       bool FixIndentation =
716           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
717       if (TheLine.First->is(tok::eof)) {
718         if (PreviousLine && PreviousLine->Affected && !DryRun) {
719           // Remove the file's trailing whitespace.
720           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
721           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
722                                          /*IndentLevel=*/0, /*Spaces=*/0,
723                                          /*TargetColumn=*/0);
724         }
725       } else if (TheLine.Type != LT_Invalid &&
726                  (TheLine.Affected || FixIndentation)) {
727         if (FirstTok->WhitespaceRange.isValid()) {
728           if (!DryRun)
729             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
730                              Indent, TheLine.InPPDirective);
731         } else {
732           Indent = LevelIndent = FirstTok->OriginalColumn;
733         }
734 
735         // If everything fits on a single line, just put it there.
736         unsigned ColumnLimit = Style.ColumnLimit;
737         if (I + 1 != E) {
738           AnnotatedLine *NextLine = I[1];
739           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
740             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
741         }
742 
743         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
744           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
745           while (State.NextToken != NULL)
746             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
747         } else if (Style.ColumnLimit == 0) {
748           // FIXME: Implement nested blocks for ColumnLimit = 0.
749           NoColumnLimitFormatter Formatter(Indenter);
750           if (!DryRun)
751             Formatter.format(Indent, &TheLine);
752         } else {
753           Penalty += format(TheLine, Indent, DryRun);
754         }
755 
756         if (!TheLine.InPPDirective)
757           IndentForLevel[TheLine.Level] = LevelIndent;
758       } else if (TheLine.ChildrenAffected) {
759         format(TheLine.Children, DryRun);
760       } else {
761         // Format the first token if necessary, and notify the WhitespaceManager
762         // about the unchanged whitespace.
763         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
764           if (Tok == TheLine.First &&
765               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
766             unsigned LevelIndent = Tok->OriginalColumn;
767             if (!DryRun) {
768               // Remove trailing whitespace of the previous line.
769               if ((PreviousLine && PreviousLine->Affected) ||
770                   TheLine.LeadingEmptyLinesAffected) {
771                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
772                                  TheLine.InPPDirective);
773               } else {
774                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
775               }
776             }
777 
778             if (static_cast<int>(LevelIndent) - Offset >= 0)
779               LevelIndent -= Offset;
780             if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
781               IndentForLevel[TheLine.Level] = LevelIndent;
782           } else if (!DryRun) {
783             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
784           }
785         }
786       }
787       if (!DryRun) {
788         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
789           Tok->Finalized = true;
790         }
791       }
792       PreviousLine = *I;
793     }
794     return Penalty;
795   }
796 
797 private:
798   /// \brief Formats an \c AnnotatedLine and returns the penalty.
799   ///
800   /// If \p DryRun is \c false, directly applies the changes.
801   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
802                   bool DryRun) {
803     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
804 
805     // If the ObjC method declaration does not fit on a line, we should format
806     // it with one arg per line.
807     if (State.Line->Type == LT_ObjCMethodDecl)
808       State.Stack.back().BreakBeforeParameter = true;
809 
810     // Find best solution in solution space.
811     return analyzeSolutionSpace(State, DryRun);
812   }
813 
814   /// \brief An edge in the solution space from \c Previous->State to \c State,
815   /// inserting a newline dependent on the \c NewLine.
816   struct StateNode {
817     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
818         : State(State), NewLine(NewLine), Previous(Previous) {}
819     LineState State;
820     bool NewLine;
821     StateNode *Previous;
822   };
823 
824   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
825   ///
826   /// In case of equal penalties, we want to prefer states that were inserted
827   /// first. During state generation we make sure that we insert states first
828   /// that break the line as late as possible.
829   typedef std::pair<unsigned, unsigned> OrderedPenalty;
830 
831   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
832   /// \c State has the given \c OrderedPenalty.
833   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
834 
835   /// \brief The BFS queue type.
836   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
837                               std::greater<QueueItem> > QueueType;
838 
839   /// \brief Get the offset of the line relatively to the level.
840   ///
841   /// For example, 'public:' labels in classes are offset by 1 or 2
842   /// characters to the left from their level.
843   int getIndentOffset(const FormatToken &RootToken) {
844     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
845       return Style.AccessModifierOffset;
846     return 0;
847   }
848 
849   /// \brief Add a new line and the required indent before the first Token
850   /// of the \c UnwrappedLine if there was no structural parsing error.
851   void formatFirstToken(FormatToken &RootToken,
852                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
853                         unsigned Indent, bool InPPDirective) {
854     unsigned Newlines =
855         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
856     // Remove empty lines before "}" where applicable.
857     if (RootToken.is(tok::r_brace) &&
858         (!RootToken.Next ||
859          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
860       Newlines = std::min(Newlines, 1u);
861     if (Newlines == 0 && !RootToken.IsFirst)
862       Newlines = 1;
863 
864     // Insert extra new line before access specifiers.
865     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
866         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
867       ++Newlines;
868 
869     // Remove empty lines after access specifiers.
870     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
871       Newlines = std::min(1u, Newlines);
872 
873     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
874                                    Indent, InPPDirective &&
875                                                !RootToken.HasUnescapedNewline);
876   }
877 
878   /// \brief Get the indent of \p Level from \p IndentForLevel.
879   ///
880   /// \p IndentForLevel must contain the indent for the level \c l
881   /// at \p IndentForLevel[l], or a value < 0 if the indent for
882   /// that level is unknown.
883   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
884     if (IndentForLevel[Level] != -1)
885       return IndentForLevel[Level];
886     if (Level == 0)
887       return 0;
888     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
889   }
890 
891   void join(AnnotatedLine &A, const AnnotatedLine &B) {
892     assert(!A.Last->Next);
893     assert(!B.First->Previous);
894     if (B.Affected)
895       A.Affected = true;
896     A.Last->Next = B.First;
897     B.First->Previous = A.Last;
898     B.First->CanBreakBefore = true;
899     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
900     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
901       Tok->TotalLength += LengthA;
902       A.Last = Tok;
903     }
904   }
905 
906   unsigned getColumnLimit(bool InPPDirective) const {
907     // In preprocessor directives reserve two chars for trailing " \"
908     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
909   }
910 
911   /// \brief Analyze the entire solution space starting from \p InitialState.
912   ///
913   /// This implements a variant of Dijkstra's algorithm on the graph that spans
914   /// the solution space (\c LineStates are the nodes). The algorithm tries to
915   /// find the shortest path (the one with lowest penalty) from \p InitialState
916   /// to a state where all tokens are placed. Returns the penalty.
917   ///
918   /// If \p DryRun is \c false, directly applies the changes.
919   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
920     std::set<LineState> Seen;
921 
922     // Increasing count of \c StateNode items we have created. This is used to
923     // create a deterministic order independent of the container.
924     unsigned Count = 0;
925     QueueType Queue;
926 
927     // Insert start element into queue.
928     StateNode *Node =
929         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
930     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
931     ++Count;
932 
933     unsigned Penalty = 0;
934 
935     // While not empty, take first element and follow edges.
936     while (!Queue.empty()) {
937       Penalty = Queue.top().first.first;
938       StateNode *Node = Queue.top().second;
939       if (Node->State.NextToken == NULL) {
940         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
941         break;
942       }
943       Queue.pop();
944 
945       // Cut off the analysis of certain solutions if the analysis gets too
946       // complex. See description of IgnoreStackForComparison.
947       if (Count > 10000)
948         Node->State.IgnoreStackForComparison = true;
949 
950       if (!Seen.insert(Node->State).second)
951         // State already examined with lower penalty.
952         continue;
953 
954       FormatDecision LastFormat = Node->State.NextToken->Decision;
955       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
956         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
957       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
958         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
959     }
960 
961     if (Queue.empty()) {
962       // We were unable to find a solution, do nothing.
963       // FIXME: Add diagnostic?
964       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
965       return 0;
966     }
967 
968     // Reconstruct the solution.
969     if (!DryRun)
970       reconstructPath(InitialState, Queue.top().second);
971 
972     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
973     DEBUG(llvm::dbgs() << "---\n");
974 
975     return Penalty;
976   }
977 
978   void reconstructPath(LineState &State, StateNode *Current) {
979     std::deque<StateNode *> Path;
980     // We do not need a break before the initial token.
981     while (Current->Previous) {
982       Path.push_front(Current);
983       Current = Current->Previous;
984     }
985     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
986          I != E; ++I) {
987       unsigned Penalty = 0;
988       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
989       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
990 
991       DEBUG({
992         if ((*I)->NewLine) {
993           llvm::dbgs() << "Penalty for placing "
994                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
995                        << Penalty << "\n";
996         }
997       });
998     }
999   }
1000 
1001   /// \brief Add the following state to the analysis queue \c Queue.
1002   ///
1003   /// Assume the current state is \p PreviousNode and has been reached with a
1004   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1005   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1006                            bool NewLine, unsigned *Count, QueueType *Queue) {
1007     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1008       return;
1009     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1010       return;
1011 
1012     StateNode *Node = new (Allocator.Allocate())
1013         StateNode(PreviousNode->State, NewLine, PreviousNode);
1014     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1015       return;
1016 
1017     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1018 
1019     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1020     ++(*Count);
1021   }
1022 
1023   /// \brief If the \p State's next token is an r_brace closing a nested block,
1024   /// format the nested block before it.
1025   ///
1026   /// Returns \c true if all children could be placed successfully and adapts
1027   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
1028   /// creates changes using \c Whitespaces.
1029   ///
1030   /// The crucial idea here is that children always get formatted upon
1031   /// encountering the closing brace right after the nested block. Now, if we
1032   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
1033   /// \c false), the entire block has to be kept on the same line (which is only
1034   /// possible if it fits on the line, only contains a single statement, etc.
1035   ///
1036   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1037   /// break after the "{", format all lines with correct indentation and the put
1038   /// the closing "}" on yet another new line.
1039   ///
1040   /// This enables us to keep the simple structure of the
1041   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1042   /// break or don't break.
1043   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1044                       unsigned &Penalty) {
1045     FormatToken &Previous = *State.NextToken->Previous;
1046     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1047     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1048         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1049       // The previous token does not open a block. Nothing to do. We don't
1050       // assert so that we can simply call this function for all tokens.
1051       return true;
1052 
1053     if (NewLine) {
1054       int AdditionalIndent = State.Stack.back().Indent -
1055                              Previous.Children[0]->Level * Style.IndentWidth;
1056       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1057                         /*FixBadIndentation=*/true);
1058       return true;
1059     }
1060 
1061     // Cannot merge multiple statements into a single line.
1062     if (Previous.Children.size() > 1)
1063       return false;
1064 
1065     // We can't put the closing "}" on a line with a trailing comment.
1066     if (Previous.Children[0]->Last->isTrailingComment())
1067       return false;
1068 
1069     if (!DryRun) {
1070       Whitespaces->replaceWhitespace(
1071           *Previous.Children[0]->First,
1072           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1073           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1074     }
1075     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1076 
1077     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1078     return true;
1079   }
1080 
1081   ContinuationIndenter *Indenter;
1082   WhitespaceManager *Whitespaces;
1083   FormatStyle Style;
1084   LineJoiner Joiner;
1085 
1086   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1087 };
1088 
1089 class FormatTokenLexer {
1090 public:
1091   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1092                    encoding::Encoding Encoding)
1093       : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
1094         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
1095         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
1096     Lex.SetKeepWhitespaceMode(true);
1097   }
1098 
1099   ArrayRef<FormatToken *> lex() {
1100     assert(Tokens.empty());
1101     do {
1102       Tokens.push_back(getNextToken());
1103       tryMergePreviousTokens();
1104     } while (Tokens.back()->Tok.isNot(tok::eof));
1105     return Tokens;
1106   }
1107 
1108   IdentifierTable &getIdentTable() { return IdentTable; }
1109 
1110 private:
1111   void tryMergePreviousTokens() {
1112     if (tryMerge_TMacro())
1113       return;
1114 
1115     if (Style.Language == FormatStyle::LK_JavaScript) {
1116       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1117       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1118       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1119                                                tok::greaterequal };
1120       // FIXME: We probably need to change token type to mimic operator with the
1121       // correct priority.
1122       if (tryMergeTokens(JSIdentity))
1123         return;
1124       if (tryMergeTokens(JSNotIdentity))
1125         return;
1126       if (tryMergeTokens(JSShiftEqual))
1127         return;
1128     }
1129   }
1130 
1131   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1132     if (Tokens.size() < Kinds.size())
1133       return false;
1134 
1135     SmallVectorImpl<FormatToken *>::const_iterator First =
1136         Tokens.end() - Kinds.size();
1137     if (!First[0]->is(Kinds[0]))
1138       return false;
1139     unsigned AddLength = 0;
1140     for (unsigned i = 1; i < Kinds.size(); ++i) {
1141       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1142                                          First[i]->WhitespaceRange.getEnd())
1143         return false;
1144       AddLength += First[i]->TokenText.size();
1145     }
1146     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1147     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1148                                     First[0]->TokenText.size() + AddLength);
1149     First[0]->ColumnWidth += AddLength;
1150     return true;
1151   }
1152 
1153   bool tryMerge_TMacro() {
1154     if (Tokens.size() < 4)
1155       return false;
1156     FormatToken *Last = Tokens.back();
1157     if (!Last->is(tok::r_paren))
1158       return false;
1159 
1160     FormatToken *String = Tokens[Tokens.size() - 2];
1161     if (!String->is(tok::string_literal) || String->IsMultiline)
1162       return false;
1163 
1164     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1165       return false;
1166 
1167     FormatToken *Macro = Tokens[Tokens.size() - 4];
1168     if (Macro->TokenText != "_T")
1169       return false;
1170 
1171     const char *Start = Macro->TokenText.data();
1172     const char *End = Last->TokenText.data() + Last->TokenText.size();
1173     String->TokenText = StringRef(Start, End - Start);
1174     String->IsFirst = Macro->IsFirst;
1175     String->LastNewlineOffset = Macro->LastNewlineOffset;
1176     String->WhitespaceRange = Macro->WhitespaceRange;
1177     String->OriginalColumn = Macro->OriginalColumn;
1178     String->ColumnWidth = encoding::columnWidthWithTabs(
1179         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1180 
1181     Tokens.pop_back();
1182     Tokens.pop_back();
1183     Tokens.pop_back();
1184     Tokens.back() = String;
1185     return true;
1186   }
1187 
1188   FormatToken *getNextToken() {
1189     if (GreaterStashed) {
1190       // Create a synthesized second '>' token.
1191       // FIXME: Increment Column and set OriginalColumn.
1192       Token Greater = FormatTok->Tok;
1193       FormatTok = new (Allocator.Allocate()) FormatToken;
1194       FormatTok->Tok = Greater;
1195       SourceLocation GreaterLocation =
1196           FormatTok->Tok.getLocation().getLocWithOffset(1);
1197       FormatTok->WhitespaceRange =
1198           SourceRange(GreaterLocation, GreaterLocation);
1199       FormatTok->TokenText = ">";
1200       FormatTok->ColumnWidth = 1;
1201       GreaterStashed = false;
1202       return FormatTok;
1203     }
1204 
1205     FormatTok = new (Allocator.Allocate()) FormatToken;
1206     readRawToken(*FormatTok);
1207     SourceLocation WhitespaceStart =
1208         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1209     FormatTok->IsFirst = IsFirstToken;
1210     IsFirstToken = false;
1211 
1212     // Consume and record whitespace until we find a significant token.
1213     unsigned WhitespaceLength = TrailingWhitespace;
1214     while (FormatTok->Tok.is(tok::unknown)) {
1215       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1216         switch (FormatTok->TokenText[i]) {
1217         case '\n':
1218           ++FormatTok->NewlinesBefore;
1219           // FIXME: This is technically incorrect, as it could also
1220           // be a literal backslash at the end of the line.
1221           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1222                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1223                           FormatTok->TokenText[i - 2] != '\\')))
1224             FormatTok->HasUnescapedNewline = true;
1225           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1226           Column = 0;
1227           break;
1228         case '\r':
1229         case '\f':
1230         case '\v':
1231           Column = 0;
1232           break;
1233         case ' ':
1234           ++Column;
1235           break;
1236         case '\t':
1237           Column += Style.TabWidth - Column % Style.TabWidth;
1238           break;
1239         case '\\':
1240           ++Column;
1241           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1242                              FormatTok->TokenText[i + 1] != '\n'))
1243             FormatTok->Type = TT_ImplicitStringLiteral;
1244           break;
1245         default:
1246           FormatTok->Type = TT_ImplicitStringLiteral;
1247           ++Column;
1248           break;
1249         }
1250       }
1251 
1252       if (FormatTok->Type == TT_ImplicitStringLiteral)
1253         break;
1254       WhitespaceLength += FormatTok->Tok.getLength();
1255 
1256       readRawToken(*FormatTok);
1257     }
1258 
1259     // In case the token starts with escaped newlines, we want to
1260     // take them into account as whitespace - this pattern is quite frequent
1261     // in macro definitions.
1262     // FIXME: Add a more explicit test.
1263     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1264            FormatTok->TokenText[1] == '\n') {
1265       // FIXME: ++FormatTok->NewlinesBefore is missing...
1266       WhitespaceLength += 2;
1267       Column = 0;
1268       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1269     }
1270 
1271     FormatTok->WhitespaceRange = SourceRange(
1272         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1273 
1274     FormatTok->OriginalColumn = Column;
1275 
1276     TrailingWhitespace = 0;
1277     if (FormatTok->Tok.is(tok::comment)) {
1278       // FIXME: Add the trimmed whitespace to Column.
1279       StringRef UntrimmedText = FormatTok->TokenText;
1280       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1281       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1282     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1283       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1284       FormatTok->Tok.setIdentifierInfo(&Info);
1285       FormatTok->Tok.setKind(Info.getTokenID());
1286     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1287       FormatTok->Tok.setKind(tok::greater);
1288       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1289       GreaterStashed = true;
1290     }
1291 
1292     // Now FormatTok is the next non-whitespace token.
1293 
1294     StringRef Text = FormatTok->TokenText;
1295     size_t FirstNewlinePos = Text.find('\n');
1296     if (FirstNewlinePos == StringRef::npos) {
1297       // FIXME: ColumnWidth actually depends on the start column, we need to
1298       // take this into account when the token is moved.
1299       FormatTok->ColumnWidth =
1300           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1301       Column += FormatTok->ColumnWidth;
1302     } else {
1303       FormatTok->IsMultiline = true;
1304       // FIXME: ColumnWidth actually depends on the start column, we need to
1305       // take this into account when the token is moved.
1306       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1307           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1308 
1309       // The last line of the token always starts in column 0.
1310       // Thus, the length can be precomputed even in the presence of tabs.
1311       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1312           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1313           Encoding);
1314       Column = FormatTok->LastLineColumnWidth;
1315     }
1316 
1317     return FormatTok;
1318   }
1319 
1320   FormatToken *FormatTok;
1321   bool IsFirstToken;
1322   bool GreaterStashed;
1323   unsigned Column;
1324   unsigned TrailingWhitespace;
1325   Lexer &Lex;
1326   SourceManager &SourceMgr;
1327   FormatStyle &Style;
1328   IdentifierTable IdentTable;
1329   encoding::Encoding Encoding;
1330   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1331   SmallVector<FormatToken *, 16> Tokens;
1332 
1333   void readRawToken(FormatToken &Tok) {
1334     Lex.LexFromRawLexer(Tok.Tok);
1335     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1336                               Tok.Tok.getLength());
1337     // For formatting, treat unterminated string literals like normal string
1338     // literals.
1339     if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
1340         Tok.TokenText[0] == '"') {
1341       Tok.Tok.setKind(tok::string_literal);
1342       Tok.IsUnterminatedLiteral = true;
1343     }
1344   }
1345 };
1346 
1347 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1348   switch (Language) {
1349   case FormatStyle::LK_Cpp:
1350     return "C++";
1351   case FormatStyle::LK_JavaScript:
1352     return "JavaScript";
1353   default:
1354     return "Unknown";
1355   }
1356 }
1357 
1358 class Formatter : public UnwrappedLineConsumer {
1359 public:
1360   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1361             const std::vector<CharSourceRange> &Ranges)
1362       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1363         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1364         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1365         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1366     DEBUG(llvm::dbgs() << "File encoding: "
1367                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1368                                                                : "unknown")
1369                        << "\n");
1370     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1371                        << "\n");
1372   }
1373 
1374   tooling::Replacements format() {
1375     tooling::Replacements Result;
1376     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1377 
1378     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1379     bool StructuralError = Parser.parse();
1380     assert(UnwrappedLines.rbegin()->empty());
1381     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1382          ++Run) {
1383       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1384       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1385       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1386         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1387       }
1388       tooling::Replacements RunResult =
1389           format(AnnotatedLines, StructuralError, Tokens);
1390       DEBUG({
1391         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1392         for (tooling::Replacements::iterator I = RunResult.begin(),
1393                                              E = RunResult.end();
1394              I != E; ++I) {
1395           llvm::dbgs() << I->toString() << "\n";
1396         }
1397       });
1398       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1399         delete AnnotatedLines[i];
1400       }
1401       Result.insert(RunResult.begin(), RunResult.end());
1402       Whitespaces.reset();
1403     }
1404     return Result;
1405   }
1406 
1407   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1408                                bool StructuralError, FormatTokenLexer &Tokens) {
1409     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1410     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1411       Annotator.annotate(*AnnotatedLines[i]);
1412     }
1413     deriveLocalStyle(AnnotatedLines);
1414     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1415       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1416     }
1417     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1418 
1419     Annotator.setCommentLineLevels(AnnotatedLines);
1420     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1421                                   BinPackInconclusiveFunctions);
1422     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1423     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1424     return Whitespaces.generateReplacements();
1425   }
1426 
1427 private:
1428   // Determines which lines are affected by the SourceRanges given as input.
1429   // Returns \c true if at least one line between I and E or one of their
1430   // children is affected.
1431   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1432                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1433     bool SomeLineAffected = false;
1434     const AnnotatedLine *PreviousLine = NULL;
1435     while (I != E) {
1436       AnnotatedLine *Line = *I;
1437       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1438 
1439       // If a line is part of a preprocessor directive, it needs to be formatted
1440       // if any token within the directive is affected.
1441       if (Line->InPPDirective) {
1442         FormatToken *Last = Line->Last;
1443         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1444         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1445           Last = (*PPEnd)->Last;
1446           ++PPEnd;
1447         }
1448 
1449         if (affectsTokenRange(*Line->First, *Last,
1450                               /*IncludeLeadingNewlines=*/false)) {
1451           SomeLineAffected = true;
1452           markAllAsAffected(I, PPEnd);
1453         }
1454         I = PPEnd;
1455         continue;
1456       }
1457 
1458       if (nonPPLineAffected(Line, PreviousLine))
1459         SomeLineAffected = true;
1460 
1461       PreviousLine = Line;
1462       ++I;
1463     }
1464     return SomeLineAffected;
1465   }
1466 
1467   // Determines whether 'Line' is affected by the SourceRanges given as input.
1468   // Returns \c true if line or one if its children is affected.
1469   bool nonPPLineAffected(AnnotatedLine *Line,
1470                          const AnnotatedLine *PreviousLine) {
1471     bool SomeLineAffected = false;
1472     Line->ChildrenAffected =
1473         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1474     if (Line->ChildrenAffected)
1475       SomeLineAffected = true;
1476 
1477     // Stores whether one of the line's tokens is directly affected.
1478     bool SomeTokenAffected = false;
1479     // Stores whether we need to look at the leading newlines of the next token
1480     // in order to determine whether it was affected.
1481     bool IncludeLeadingNewlines = false;
1482 
1483     // Stores whether the first child line of any of this line's tokens is
1484     // affected.
1485     bool SomeFirstChildAffected = false;
1486 
1487     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1488       // Determine whether 'Tok' was affected.
1489       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1490         SomeTokenAffected = true;
1491 
1492       // Determine whether the first child of 'Tok' was affected.
1493       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1494         SomeFirstChildAffected = true;
1495 
1496       IncludeLeadingNewlines = Tok->Children.empty();
1497     }
1498 
1499     // Was this line moved, i.e. has it previously been on the same line as an
1500     // affected line?
1501     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1502                      Line->First->NewlinesBefore == 0;
1503 
1504     bool IsContinuedComment = Line->First->is(tok::comment) &&
1505                               Line->First->Next == NULL &&
1506                               Line->First->NewlinesBefore < 2 && PreviousLine &&
1507                               PreviousLine->Affected &&
1508                               PreviousLine->Last->is(tok::comment);
1509 
1510     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1511         IsContinuedComment) {
1512       Line->Affected = true;
1513       SomeLineAffected = true;
1514     }
1515     return SomeLineAffected;
1516   }
1517 
1518   // Marks all lines between I and E as well as all their children as affected.
1519   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1520                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1521     while (I != E) {
1522       (*I)->Affected = true;
1523       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1524       ++I;
1525     }
1526   }
1527 
1528   // Returns true if the range from 'First' to 'Last' intersects with one of the
1529   // input ranges.
1530   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1531                          bool IncludeLeadingNewlines) {
1532     SourceLocation Start = First.WhitespaceRange.getBegin();
1533     if (!IncludeLeadingNewlines)
1534       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1535     SourceLocation End = Last.getStartOfNonWhitespace();
1536     if (Last.TokenText.size() > 0)
1537       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1538     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1539     return affectsCharSourceRange(Range);
1540   }
1541 
1542   // Returns true if one of the input ranges intersect the leading empty lines
1543   // before 'Tok'.
1544   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1545     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1546         Tok.WhitespaceRange.getBegin(),
1547         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1548     return affectsCharSourceRange(EmptyLineRange);
1549   }
1550 
1551   // Returns true if 'Range' intersects with one of the input ranges.
1552   bool affectsCharSourceRange(const CharSourceRange &Range) {
1553     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1554                                                           E = Ranges.end();
1555          I != E; ++I) {
1556       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1557           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1558         return true;
1559     }
1560     return false;
1561   }
1562 
1563   static bool inputUsesCRLF(StringRef Text) {
1564     return Text.count('\r') * 2 > Text.count('\n');
1565   }
1566 
1567   void
1568   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1569     unsigned CountBoundToVariable = 0;
1570     unsigned CountBoundToType = 0;
1571     bool HasCpp03IncompatibleFormat = false;
1572     bool HasBinPackedFunction = false;
1573     bool HasOnePerLineFunction = false;
1574     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1575       if (!AnnotatedLines[i]->First->Next)
1576         continue;
1577       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1578       while (Tok->Next) {
1579         if (Tok->Type == TT_PointerOrReference) {
1580           bool SpacesBefore =
1581               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1582           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1583                              Tok->Next->WhitespaceRange.getEnd();
1584           if (SpacesBefore && !SpacesAfter)
1585             ++CountBoundToVariable;
1586           else if (!SpacesBefore && SpacesAfter)
1587             ++CountBoundToType;
1588         }
1589 
1590         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1591           if (Tok->is(tok::coloncolon) &&
1592               Tok->Previous->Type == TT_TemplateOpener)
1593             HasCpp03IncompatibleFormat = true;
1594           if (Tok->Type == TT_TemplateCloser &&
1595               Tok->Previous->Type == TT_TemplateCloser)
1596             HasCpp03IncompatibleFormat = true;
1597         }
1598 
1599         if (Tok->PackingKind == PPK_BinPacked)
1600           HasBinPackedFunction = true;
1601         if (Tok->PackingKind == PPK_OnePerLine)
1602           HasOnePerLineFunction = true;
1603 
1604         Tok = Tok->Next;
1605       }
1606     }
1607     if (Style.DerivePointerBinding) {
1608       if (CountBoundToType > CountBoundToVariable)
1609         Style.PointerBindsToType = true;
1610       else if (CountBoundToType < CountBoundToVariable)
1611         Style.PointerBindsToType = false;
1612     }
1613     if (Style.Standard == FormatStyle::LS_Auto) {
1614       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1615                                                   : FormatStyle::LS_Cpp03;
1616     }
1617     BinPackInconclusiveFunctions =
1618         HasBinPackedFunction || !HasOnePerLineFunction;
1619   }
1620 
1621   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1622     assert(!UnwrappedLines.empty());
1623     UnwrappedLines.back().push_back(TheLine);
1624   }
1625 
1626   virtual void finishRun() {
1627     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1628   }
1629 
1630   FormatStyle Style;
1631   Lexer &Lex;
1632   SourceManager &SourceMgr;
1633   WhitespaceManager Whitespaces;
1634   SmallVector<CharSourceRange, 8> Ranges;
1635   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1636 
1637   encoding::Encoding Encoding;
1638   bool BinPackInconclusiveFunctions;
1639 };
1640 
1641 } // end anonymous namespace
1642 
1643 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1644                                SourceManager &SourceMgr,
1645                                std::vector<CharSourceRange> Ranges) {
1646   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1647   return formatter.format();
1648 }
1649 
1650 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1651                                std::vector<tooling::Range> Ranges,
1652                                StringRef FileName) {
1653   FileManager Files((FileSystemOptions()));
1654   DiagnosticsEngine Diagnostics(
1655       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1656       new DiagnosticOptions);
1657   SourceManager SourceMgr(Diagnostics, Files);
1658   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1659   const clang::FileEntry *Entry =
1660       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1661   SourceMgr.overrideFileContents(Entry, Buf);
1662   FileID ID =
1663       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1664   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1665             getFormattingLangOpts(Style.Standard));
1666   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1667   std::vector<CharSourceRange> CharRanges;
1668   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1669     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1670     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1671     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1672   }
1673   return reformat(Style, Lex, SourceMgr, CharRanges);
1674 }
1675 
1676 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1677   LangOptions LangOpts;
1678   LangOpts.CPlusPlus = 1;
1679   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1680   LangOpts.LineComment = 1;
1681   LangOpts.Bool = 1;
1682   LangOpts.ObjC1 = 1;
1683   LangOpts.ObjC2 = 1;
1684   return LangOpts;
1685 }
1686 
1687 const char *StyleOptionHelpDescription =
1688     "Coding style, currently supports:\n"
1689     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1690     "Use -style=file to load style configuration from\n"
1691     ".clang-format file located in one of the parent\n"
1692     "directories of the source file (or current\n"
1693     "directory for stdin).\n"
1694     "Use -style=\"{key: value, ...}\" to set specific\n"
1695     "parameters, e.g.:\n"
1696     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1697 
1698 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1699   if (FileName.endswith_lower(".js")) {
1700     return FormatStyle::LK_JavaScript;
1701   }
1702   return FormatStyle::LK_Cpp;
1703 }
1704 
1705 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1706                      StringRef FallbackStyle) {
1707   FormatStyle Style = getLLVMStyle();
1708   Style.Language = getLanguageByFileName(FileName);
1709   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1710     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1711                  << "\" using LLVM style\n";
1712     return Style;
1713   }
1714 
1715   if (StyleName.startswith("{")) {
1716     // Parse YAML/JSON style from the command line.
1717     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1718       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1719                    << FallbackStyle << " style\n";
1720     }
1721     return Style;
1722   }
1723 
1724   if (!StyleName.equals_lower("file")) {
1725     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1726       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1727                    << " style\n";
1728     return Style;
1729   }
1730 
1731   // Look for .clang-format/_clang-format file in the file's parent directories.
1732   SmallString<128> UnsuitableConfigFiles;
1733   SmallString<128> Path(FileName);
1734   llvm::sys::fs::make_absolute(Path);
1735   for (StringRef Directory = Path; !Directory.empty();
1736        Directory = llvm::sys::path::parent_path(Directory)) {
1737     if (!llvm::sys::fs::is_directory(Directory))
1738       continue;
1739     SmallString<128> ConfigFile(Directory);
1740 
1741     llvm::sys::path::append(ConfigFile, ".clang-format");
1742     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1743     bool IsFile = false;
1744     // Ignore errors from is_regular_file: we only need to know if we can read
1745     // the file or not.
1746     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1747 
1748     if (!IsFile) {
1749       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1750       ConfigFile = Directory;
1751       llvm::sys::path::append(ConfigFile, "_clang-format");
1752       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1753       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1754     }
1755 
1756     if (IsFile) {
1757       OwningPtr<llvm::MemoryBuffer> Text;
1758       if (llvm::error_code ec =
1759               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1760         llvm::errs() << ec.message() << "\n";
1761         break;
1762       }
1763       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1764         if (ec == llvm::errc::not_supported) {
1765           if (!UnsuitableConfigFiles.empty())
1766             UnsuitableConfigFiles.append(", ");
1767           UnsuitableConfigFiles.append(ConfigFile);
1768           continue;
1769         }
1770         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1771                      << "\n";
1772         break;
1773       }
1774       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1775       return Style;
1776     }
1777   }
1778   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1779                << " style\n";
1780   if (!UnsuitableConfigFiles.empty()) {
1781     llvm::errs() << "Configuration file(s) do(es) not support "
1782                  << getLanguageName(Style.Language) << ": "
1783                  << UnsuitableConfigFiles << "\n";
1784   }
1785   return Style;
1786 }
1787 
1788 } // namespace format
1789 } // namespace clang
1790