1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33 
34 using clang::format::FormatStyle;
35 
36 namespace llvm {
37 namespace yaml {
38 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
39   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
40     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
41     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
42     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
43   }
44 };
45 
46 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
47   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
48     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
49     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
50     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
51     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
52     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
53   }
54 };
55 
56 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
57   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
58     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
59     IO.enumCase(Value, "false", FormatStyle::UT_Never);
60     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
61     IO.enumCase(Value, "true", FormatStyle::UT_Always);
62     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
63   }
64 };
65 
66 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
67   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
68     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
69     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
70     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
71     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
72     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
73   }
74 };
75 
76 template <>
77 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
78   static void enumeration(IO &IO,
79                           FormatStyle::NamespaceIndentationKind &Value) {
80     IO.enumCase(Value, "None", FormatStyle::NI_None);
81     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
82     IO.enumCase(Value, "All", FormatStyle::NI_All);
83   }
84 };
85 
86 template <>
87 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
88   static void enumeration(IO &IO,
89                           FormatStyle::SpaceBeforeParensOptions &Value) {
90     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
91     IO.enumCase(Value, "ControlStatements",
92                 FormatStyle::SBPO_ControlStatements);
93     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
94 
95     // For backward compatibility.
96     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
97     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
98   }
99 };
100 
101 template <> struct MappingTraits<FormatStyle> {
102   static void mapping(IO &IO, FormatStyle &Style) {
103     // When reading, read the language first, we need it for getPredefinedStyle.
104     IO.mapOptional("Language", Style.Language);
105 
106     if (IO.outputting()) {
107       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
108                                   "Mozilla", "WebKit", "GNU" };
109       ArrayRef<StringRef> Styles(StylesArray);
110       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
111         StringRef StyleName(Styles[i]);
112         FormatStyle PredefinedStyle;
113         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
114             Style == PredefinedStyle) {
115           IO.mapOptional("# BasedOnStyle", StyleName);
116           break;
117         }
118       }
119     } else {
120       StringRef BasedOnStyle;
121       IO.mapOptional("BasedOnStyle", BasedOnStyle);
122       if (!BasedOnStyle.empty()) {
123         FormatStyle::LanguageKind OldLanguage = Style.Language;
124         FormatStyle::LanguageKind Language =
125             ((FormatStyle *)IO.getContext())->Language;
126         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
127           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
128           return;
129         }
130         Style.Language = OldLanguage;
131       }
132     }
133 
134     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
135     IO.mapOptional("ConstructorInitializerIndentWidth",
136                    Style.ConstructorInitializerIndentWidth);
137     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
138     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
139     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
140                    Style.AllowAllParametersOfDeclarationOnNextLine);
141     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
142                    Style.AllowShortIfStatementsOnASingleLine);
143     IO.mapOptional("AllowShortLoopsOnASingleLine",
144                    Style.AllowShortLoopsOnASingleLine);
145     IO.mapOptional("AllowShortFunctionsOnASingleLine",
146                    Style.AllowShortFunctionsOnASingleLine);
147     IO.mapOptional("AlwaysBreakTemplateDeclarations",
148                    Style.AlwaysBreakTemplateDeclarations);
149     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
150                    Style.AlwaysBreakBeforeMultilineStrings);
151     IO.mapOptional("BreakBeforeBinaryOperators",
152                    Style.BreakBeforeBinaryOperators);
153     IO.mapOptional("BreakBeforeTernaryOperators",
154                    Style.BreakBeforeTernaryOperators);
155     IO.mapOptional("BreakConstructorInitializersBeforeComma",
156                    Style.BreakConstructorInitializersBeforeComma);
157     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
158     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
159     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
160                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
161     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
162     IO.mapOptional("ExperimentalAutoDetectBinPacking",
163                    Style.ExperimentalAutoDetectBinPacking);
164     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
165     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
166     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
167     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
168     IO.mapOptional("ObjCSpaceBeforeProtocolList",
169                    Style.ObjCSpaceBeforeProtocolList);
170     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
171                    Style.PenaltyBreakBeforeFirstCallParameter);
172     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
173     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
174     IO.mapOptional("PenaltyBreakFirstLessLess",
175                    Style.PenaltyBreakFirstLessLess);
176     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
177     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
178                    Style.PenaltyReturnTypeOnItsOwnLine);
179     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
180     IO.mapOptional("SpacesBeforeTrailingComments",
181                    Style.SpacesBeforeTrailingComments);
182     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
183     IO.mapOptional("Standard", Style.Standard);
184     IO.mapOptional("IndentWidth", Style.IndentWidth);
185     IO.mapOptional("TabWidth", Style.TabWidth);
186     IO.mapOptional("UseTab", Style.UseTab);
187     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
188     IO.mapOptional("IndentFunctionDeclarationAfterType",
189                    Style.IndentFunctionDeclarationAfterType);
190     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
191     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
192     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
193     IO.mapOptional("SpacesInCStyleCastParentheses",
194                    Style.SpacesInCStyleCastParentheses);
195     IO.mapOptional("SpacesInContainerLiterals",
196                    Style.SpacesInContainerLiterals);
197     IO.mapOptional("SpaceBeforeAssignmentOperators",
198                    Style.SpaceBeforeAssignmentOperators);
199     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
200     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
201 
202     // For backward compatibility.
203     if (!IO.outputting()) {
204       IO.mapOptional("SpaceAfterControlStatementKeyword",
205                      Style.SpaceBeforeParens);
206     }
207     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
208   }
209 };
210 
211 // Allows to read vector<FormatStyle> while keeping default values.
212 // IO.getContext() should contain a pointer to the FormatStyle structure, that
213 // will be used to get default values for missing keys.
214 // If the first element has no Language specified, it will be treated as the
215 // default one for the following elements.
216 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
217   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
218     return Seq.size();
219   }
220   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
221                               size_t Index) {
222     if (Index >= Seq.size()) {
223       assert(Index == Seq.size());
224       FormatStyle Template;
225       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
226         Template = Seq[0];
227       } else {
228         Template = *((const FormatStyle*)IO.getContext());
229         Template.Language = FormatStyle::LK_None;
230       }
231       Seq.resize(Index + 1, Template);
232     }
233     return Seq[Index];
234   }
235 };
236 }
237 }
238 
239 namespace clang {
240 namespace format {
241 
242 FormatStyle getLLVMStyle() {
243   FormatStyle LLVMStyle;
244   LLVMStyle.Language = FormatStyle::LK_Cpp;
245   LLVMStyle.AccessModifierOffset = -2;
246   LLVMStyle.AlignEscapedNewlinesLeft = false;
247   LLVMStyle.AlignTrailingComments = true;
248   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
249   LLVMStyle.AllowShortFunctionsOnASingleLine = true;
250   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
251   LLVMStyle.AllowShortLoopsOnASingleLine = false;
252   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
253   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
254   LLVMStyle.BinPackParameters = true;
255   LLVMStyle.BreakBeforeBinaryOperators = false;
256   LLVMStyle.BreakBeforeTernaryOperators = true;
257   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
258   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
259   LLVMStyle.ColumnLimit = 80;
260   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
261   LLVMStyle.ConstructorInitializerIndentWidth = 4;
262   LLVMStyle.Cpp11BracedListStyle = true;
263   LLVMStyle.DerivePointerBinding = false;
264   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
265   LLVMStyle.IndentCaseLabels = false;
266   LLVMStyle.IndentFunctionDeclarationAfterType = false;
267   LLVMStyle.IndentWidth = 2;
268   LLVMStyle.TabWidth = 8;
269   LLVMStyle.MaxEmptyLinesToKeep = 1;
270   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
271   LLVMStyle.ObjCSpaceAfterProperty = false;
272   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
273   LLVMStyle.PointerBindsToType = false;
274   LLVMStyle.SpacesBeforeTrailingComments = 1;
275   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
276   LLVMStyle.UseTab = FormatStyle::UT_Never;
277   LLVMStyle.SpacesInParentheses = false;
278   LLVMStyle.SpaceInEmptyParentheses = false;
279   LLVMStyle.SpacesInContainerLiterals = true;
280   LLVMStyle.SpacesInCStyleCastParentheses = false;
281   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
282   LLVMStyle.SpaceBeforeAssignmentOperators = true;
283   LLVMStyle.ContinuationIndentWidth = 4;
284   LLVMStyle.SpacesInAngles = false;
285   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
286 
287   LLVMStyle.PenaltyBreakComment = 300;
288   LLVMStyle.PenaltyBreakFirstLessLess = 120;
289   LLVMStyle.PenaltyBreakString = 1000;
290   LLVMStyle.PenaltyExcessCharacter = 1000000;
291   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
292   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
293 
294   return LLVMStyle;
295 }
296 
297 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
298   FormatStyle GoogleStyle = getLLVMStyle();
299   GoogleStyle.Language = Language;
300 
301   GoogleStyle.AccessModifierOffset = -1;
302   GoogleStyle.AlignEscapedNewlinesLeft = true;
303   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
304   GoogleStyle.AllowShortLoopsOnASingleLine = true;
305   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
306   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
307   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
308   GoogleStyle.DerivePointerBinding = true;
309   GoogleStyle.IndentCaseLabels = true;
310   GoogleStyle.IndentFunctionDeclarationAfterType = true;
311   GoogleStyle.ObjCSpaceAfterProperty = false;
312   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
313   GoogleStyle.PointerBindsToType = true;
314   GoogleStyle.SpacesBeforeTrailingComments = 2;
315   GoogleStyle.Standard = FormatStyle::LS_Auto;
316 
317   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
318   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
319 
320   if (Language == FormatStyle::LK_JavaScript) {
321     GoogleStyle.BreakBeforeTernaryOperators = false;
322     GoogleStyle.MaxEmptyLinesToKeep = 2;
323     GoogleStyle.SpacesInContainerLiterals = false;
324   } else if (Language == FormatStyle::LK_Proto) {
325     GoogleStyle.AllowShortFunctionsOnASingleLine = false;
326   }
327 
328   return GoogleStyle;
329 }
330 
331 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
332   FormatStyle ChromiumStyle = getGoogleStyle(Language);
333   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
334   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
335   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
336   ChromiumStyle.BinPackParameters = false;
337   ChromiumStyle.DerivePointerBinding = false;
338   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
339   return ChromiumStyle;
340 }
341 
342 FormatStyle getMozillaStyle() {
343   FormatStyle MozillaStyle = getLLVMStyle();
344   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
345   MozillaStyle.Cpp11BracedListStyle = false;
346   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
347   MozillaStyle.DerivePointerBinding = true;
348   MozillaStyle.IndentCaseLabels = true;
349   MozillaStyle.ObjCSpaceAfterProperty = true;
350   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
351   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
352   MozillaStyle.PointerBindsToType = true;
353   MozillaStyle.Standard = FormatStyle::LS_Cpp03;
354   return MozillaStyle;
355 }
356 
357 FormatStyle getWebKitStyle() {
358   FormatStyle Style = getLLVMStyle();
359   Style.AccessModifierOffset = -4;
360   Style.AlignTrailingComments = false;
361   Style.BreakBeforeBinaryOperators = true;
362   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
363   Style.BreakConstructorInitializersBeforeComma = true;
364   Style.Cpp11BracedListStyle = false;
365   Style.ColumnLimit = 0;
366   Style.IndentWidth = 4;
367   Style.NamespaceIndentation = FormatStyle::NI_Inner;
368   Style.ObjCSpaceAfterProperty = true;
369   Style.PointerBindsToType = true;
370   Style.Standard = FormatStyle::LS_Cpp03;
371   return Style;
372 }
373 
374 FormatStyle getGNUStyle() {
375   FormatStyle Style = getLLVMStyle();
376   Style.BreakBeforeBinaryOperators = true;
377   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
378   Style.BreakBeforeTernaryOperators = true;
379   Style.Cpp11BracedListStyle = false;
380   Style.ColumnLimit = 79;
381   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
382   Style.Standard = FormatStyle::LS_Cpp03;
383   return Style;
384 }
385 
386 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
387                         FormatStyle *Style) {
388   if (Name.equals_lower("llvm")) {
389     *Style = getLLVMStyle();
390   } else if (Name.equals_lower("chromium")) {
391     *Style = getChromiumStyle(Language);
392   } else if (Name.equals_lower("mozilla")) {
393     *Style = getMozillaStyle();
394   } else if (Name.equals_lower("google")) {
395     *Style = getGoogleStyle(Language);
396   } else if (Name.equals_lower("webkit")) {
397     *Style = getWebKitStyle();
398   } else if (Name.equals_lower("gnu")) {
399     *Style = getGNUStyle();
400   } else {
401     return false;
402   }
403 
404   Style->Language = Language;
405   return true;
406 }
407 
408 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
409   assert(Style);
410   FormatStyle::LanguageKind Language = Style->Language;
411   assert(Language != FormatStyle::LK_None);
412   if (Text.trim().empty())
413     return llvm::make_error_code(llvm::errc::invalid_argument);
414 
415   std::vector<FormatStyle> Styles;
416   llvm::yaml::Input Input(Text);
417   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
418   // values for the fields, keys for which are missing from the configuration.
419   // Mapping also uses the context to get the language to find the correct
420   // base style.
421   Input.setContext(Style);
422   Input >> Styles;
423   if (Input.error())
424     return Input.error();
425 
426   for (unsigned i = 0; i < Styles.size(); ++i) {
427     // Ensures that only the first configuration can skip the Language option.
428     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
429       return llvm::make_error_code(llvm::errc::invalid_argument);
430     // Ensure that each language is configured at most once.
431     for (unsigned j = 0; j < i; ++j) {
432       if (Styles[i].Language == Styles[j].Language) {
433         DEBUG(llvm::dbgs()
434               << "Duplicate languages in the config file on positions " << j
435               << " and " << i << "\n");
436         return llvm::make_error_code(llvm::errc::invalid_argument);
437       }
438     }
439   }
440   // Look for a suitable configuration starting from the end, so we can
441   // find the configuration for the specific language first, and the default
442   // configuration (which can only be at slot 0) after it.
443   for (int i = Styles.size() - 1; i >= 0; --i) {
444     if (Styles[i].Language == Language ||
445         Styles[i].Language == FormatStyle::LK_None) {
446       *Style = Styles[i];
447       Style->Language = Language;
448       return llvm::make_error_code(llvm::errc::success);
449     }
450   }
451   return llvm::make_error_code(llvm::errc::not_supported);
452 }
453 
454 std::string configurationAsText(const FormatStyle &Style) {
455   std::string Text;
456   llvm::raw_string_ostream Stream(Text);
457   llvm::yaml::Output Output(Stream);
458   // We use the same mapping method for input and output, so we need a non-const
459   // reference here.
460   FormatStyle NonConstStyle = Style;
461   Output << NonConstStyle;
462   return Stream.str();
463 }
464 
465 namespace {
466 
467 class NoColumnLimitFormatter {
468 public:
469   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
470 
471   /// \brief Formats the line starting at \p State, simply keeping all of the
472   /// input's line breaking decisions.
473   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
474     LineState State =
475         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
476     while (State.NextToken != NULL) {
477       bool Newline =
478           Indenter->mustBreak(State) ||
479           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
480       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
481     }
482   }
483 
484 private:
485   ContinuationIndenter *Indenter;
486 };
487 
488 class LineJoiner {
489 public:
490   LineJoiner(const FormatStyle &Style) : Style(Style) {}
491 
492   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
493   unsigned
494   tryFitMultipleLinesInOne(unsigned Indent,
495                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
496                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
497     // We can never merge stuff if there are trailing line comments.
498     const AnnotatedLine *TheLine = *I;
499     if (TheLine->Last->Type == TT_LineComment)
500       return 0;
501 
502     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
503       return 0;
504 
505     unsigned Limit =
506         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
507     // If we already exceed the column limit, we set 'Limit' to 0. The different
508     // tryMerge..() functions can then decide whether to still do merging.
509     Limit = TheLine->Last->TotalLength > Limit
510                 ? 0
511                 : Limit - TheLine->Last->TotalLength;
512 
513     if (I + 1 == E || I[1]->Type == LT_Invalid)
514       return 0;
515 
516     if (TheLine->Last->Type == TT_FunctionLBrace &&
517         TheLine->First != TheLine->Last) {
518       return Style.AllowShortFunctionsOnASingleLine
519                  ? tryMergeSimpleBlock(I, E, Limit)
520                  : 0;
521     }
522     if (TheLine->Last->is(tok::l_brace)) {
523       return Style.BreakBeforeBraces == FormatStyle::BS_Attach
524                  ? tryMergeSimpleBlock(I, E, Limit)
525                  : 0;
526     }
527     if (I[1]->First->Type == TT_FunctionLBrace &&
528         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
529       // Check for Limit <= 2 to account for the " {".
530       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
531         return 0;
532       Limit -= 2;
533 
534       unsigned MergedLines = 0;
535       if (Style.AllowShortFunctionsOnASingleLine) {
536         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
537         // If we managed to merge the block, count the function header, which is
538         // on a separate line.
539         if (MergedLines > 0)
540           ++MergedLines;
541       }
542       return MergedLines;
543     }
544     if (TheLine->First->is(tok::kw_if)) {
545       return Style.AllowShortIfStatementsOnASingleLine
546                  ? tryMergeSimpleControlStatement(I, E, Limit)
547                  : 0;
548     }
549     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
550       return Style.AllowShortLoopsOnASingleLine
551                  ? tryMergeSimpleControlStatement(I, E, Limit)
552                  : 0;
553     }
554     if (TheLine->InPPDirective &&
555         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
556       return tryMergeSimplePPDirective(I, E, Limit);
557     }
558     return 0;
559   }
560 
561 private:
562   unsigned
563   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
564                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
565                             unsigned Limit) {
566     if (Limit == 0)
567       return 0;
568     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
569       return 0;
570     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
571       return 0;
572     if (1 + I[1]->Last->TotalLength > Limit)
573       return 0;
574     return 1;
575   }
576 
577   unsigned tryMergeSimpleControlStatement(
578       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
579       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
580     if (Limit == 0)
581       return 0;
582     if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
583          Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
584         I[1]->First->is(tok::l_brace))
585       return 0;
586     if (I[1]->InPPDirective != (*I)->InPPDirective ||
587         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
588       return 0;
589     Limit = limitConsideringMacros(I + 1, E, Limit);
590     AnnotatedLine &Line = **I;
591     if (Line.Last->isNot(tok::r_paren))
592       return 0;
593     if (1 + I[1]->Last->TotalLength > Limit)
594       return 0;
595     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
596                              tok::kw_while) ||
597         I[1]->First->Type == TT_LineComment)
598       return 0;
599     // Only inline simple if's (no nested if or else).
600     if (I + 2 != E && Line.First->is(tok::kw_if) &&
601         I[2]->First->is(tok::kw_else))
602       return 0;
603     return 1;
604   }
605 
606   unsigned
607   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
608                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
609                       unsigned Limit) {
610     // First, check that the current line allows merging. This is the case if
611     // we're not in a control flow statement and the last token is an opening
612     // brace.
613     AnnotatedLine &Line = **I;
614     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
615                             tok::kw_else, tok::kw_try, tok::kw_catch,
616                             tok::kw_for,
617                             // This gets rid of all ObjC @ keywords and methods.
618                             tok::at, tok::minus, tok::plus))
619       return 0;
620 
621     FormatToken *Tok = I[1]->First;
622     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
623         (Tok->getNextNonComment() == NULL ||
624          Tok->getNextNonComment()->is(tok::semi))) {
625       // We merge empty blocks even if the line exceeds the column limit.
626       Tok->SpacesRequiredBefore = 0;
627       Tok->CanBreakBefore = true;
628       return 1;
629     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
630       // Check that we still have three lines and they fit into the limit.
631       if (I + 2 == E || I[2]->Type == LT_Invalid)
632         return 0;
633       Limit = limitConsideringMacros(I + 2, E, Limit);
634 
635       if (!nextTwoLinesFitInto(I, Limit))
636         return 0;
637 
638       // Second, check that the next line does not contain any braces - if it
639       // does, readability declines when putting it into a single line.
640       if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
641         return 0;
642       do {
643         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
644           return 0;
645         Tok = Tok->Next;
646       } while (Tok != NULL);
647 
648       // Last, check that the third line contains a single closing brace.
649       Tok = I[2]->First;
650       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
651           Tok->MustBreakBefore)
652         return 0;
653 
654       return 2;
655     }
656     return 0;
657   }
658 
659   /// Returns the modified column limit for \p I if it is inside a macro and
660   /// needs a trailing '\'.
661   unsigned
662   limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
663                          SmallVectorImpl<AnnotatedLine *>::const_iterator E,
664                          unsigned Limit) {
665     if (I[0]->InPPDirective && I + 1 != E &&
666         !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
667       return Limit < 2 ? 0 : Limit - 2;
668     }
669     return Limit;
670   }
671 
672   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
673                            unsigned Limit) {
674     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
675   }
676 
677   bool containsMustBreak(const AnnotatedLine *Line) {
678     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
679       if (Tok->MustBreakBefore)
680         return true;
681     }
682     return false;
683   }
684 
685   const FormatStyle &Style;
686 };
687 
688 class UnwrappedLineFormatter {
689 public:
690   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
691                          WhitespaceManager *Whitespaces,
692                          const FormatStyle &Style)
693       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
694         Joiner(Style) {}
695 
696   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
697                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
698     assert(!Lines.empty());
699     unsigned Penalty = 0;
700     std::vector<int> IndentForLevel;
701     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
702       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
703     const AnnotatedLine *PreviousLine = NULL;
704     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
705                                                           E = Lines.end();
706          I != E; ++I) {
707       const AnnotatedLine &TheLine = **I;
708       const FormatToken *FirstTok = TheLine.First;
709       int Offset = getIndentOffset(*FirstTok);
710 
711       // Determine indent and try to merge multiple unwrapped lines.
712       unsigned Indent;
713       if (TheLine.InPPDirective) {
714         Indent = TheLine.Level * Style.IndentWidth;
715       } else {
716         while (IndentForLevel.size() <= TheLine.Level)
717           IndentForLevel.push_back(-1);
718         IndentForLevel.resize(TheLine.Level + 1);
719         Indent = getIndent(IndentForLevel, TheLine.Level);
720       }
721       unsigned LevelIndent = Indent;
722       if (static_cast<int>(Indent) + Offset >= 0)
723         Indent += Offset;
724 
725       // Merge multiple lines if possible.
726       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
727       if (MergedLines > 0 && Style.ColumnLimit == 0) {
728         // Disallow line merging if there is a break at the start of one of the
729         // input lines.
730         for (unsigned i = 0; i < MergedLines; ++i) {
731           if (I[i + 1]->First->NewlinesBefore > 0)
732             MergedLines = 0;
733         }
734       }
735       if (!DryRun) {
736         for (unsigned i = 0; i < MergedLines; ++i) {
737           join(*I[i], *I[i + 1]);
738         }
739       }
740       I += MergedLines;
741 
742       bool FixIndentation =
743           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
744       if (TheLine.First->is(tok::eof)) {
745         if (PreviousLine && PreviousLine->Affected && !DryRun) {
746           // Remove the file's trailing whitespace.
747           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
748           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
749                                          /*IndentLevel=*/0, /*Spaces=*/0,
750                                          /*TargetColumn=*/0);
751         }
752       } else if (TheLine.Type != LT_Invalid &&
753                  (TheLine.Affected || FixIndentation)) {
754         if (FirstTok->WhitespaceRange.isValid()) {
755           if (!DryRun)
756             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
757                              Indent, TheLine.InPPDirective);
758         } else {
759           Indent = LevelIndent = FirstTok->OriginalColumn;
760         }
761 
762         // If everything fits on a single line, just put it there.
763         unsigned ColumnLimit = Style.ColumnLimit;
764         if (I + 1 != E) {
765           AnnotatedLine *NextLine = I[1];
766           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
767             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
768         }
769 
770         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
771           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
772           while (State.NextToken != NULL)
773             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
774         } else if (Style.ColumnLimit == 0) {
775           // FIXME: Implement nested blocks for ColumnLimit = 0.
776           NoColumnLimitFormatter Formatter(Indenter);
777           if (!DryRun)
778             Formatter.format(Indent, &TheLine);
779         } else {
780           Penalty += format(TheLine, Indent, DryRun);
781         }
782 
783         if (!TheLine.InPPDirective)
784           IndentForLevel[TheLine.Level] = LevelIndent;
785       } else if (TheLine.ChildrenAffected) {
786         format(TheLine.Children, DryRun);
787       } else {
788         // Format the first token if necessary, and notify the WhitespaceManager
789         // about the unchanged whitespace.
790         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
791           if (Tok == TheLine.First &&
792               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
793             unsigned LevelIndent = Tok->OriginalColumn;
794             if (!DryRun) {
795               // Remove trailing whitespace of the previous line.
796               if ((PreviousLine && PreviousLine->Affected) ||
797                   TheLine.LeadingEmptyLinesAffected) {
798                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
799                                  TheLine.InPPDirective);
800               } else {
801                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
802               }
803             }
804 
805             if (static_cast<int>(LevelIndent) - Offset >= 0)
806               LevelIndent -= Offset;
807             if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
808               IndentForLevel[TheLine.Level] = LevelIndent;
809           } else if (!DryRun) {
810             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
811           }
812         }
813       }
814       if (!DryRun) {
815         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
816           Tok->Finalized = true;
817         }
818       }
819       PreviousLine = *I;
820     }
821     return Penalty;
822   }
823 
824 private:
825   /// \brief Formats an \c AnnotatedLine and returns the penalty.
826   ///
827   /// If \p DryRun is \c false, directly applies the changes.
828   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
829                   bool DryRun) {
830     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
831 
832     // If the ObjC method declaration does not fit on a line, we should format
833     // it with one arg per line.
834     if (State.Line->Type == LT_ObjCMethodDecl)
835       State.Stack.back().BreakBeforeParameter = true;
836 
837     // Find best solution in solution space.
838     return analyzeSolutionSpace(State, DryRun);
839   }
840 
841   /// \brief An edge in the solution space from \c Previous->State to \c State,
842   /// inserting a newline dependent on the \c NewLine.
843   struct StateNode {
844     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
845         : State(State), NewLine(NewLine), Previous(Previous) {}
846     LineState State;
847     bool NewLine;
848     StateNode *Previous;
849   };
850 
851   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
852   ///
853   /// In case of equal penalties, we want to prefer states that were inserted
854   /// first. During state generation we make sure that we insert states first
855   /// that break the line as late as possible.
856   typedef std::pair<unsigned, unsigned> OrderedPenalty;
857 
858   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
859   /// \c State has the given \c OrderedPenalty.
860   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
861 
862   /// \brief The BFS queue type.
863   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
864                               std::greater<QueueItem> > QueueType;
865 
866   /// \brief Get the offset of the line relatively to the level.
867   ///
868   /// For example, 'public:' labels in classes are offset by 1 or 2
869   /// characters to the left from their level.
870   int getIndentOffset(const FormatToken &RootToken) {
871     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
872       return Style.AccessModifierOffset;
873     return 0;
874   }
875 
876   /// \brief Add a new line and the required indent before the first Token
877   /// of the \c UnwrappedLine if there was no structural parsing error.
878   void formatFirstToken(FormatToken &RootToken,
879                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
880                         unsigned Indent, bool InPPDirective) {
881     unsigned Newlines =
882         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
883     // Remove empty lines before "}" where applicable.
884     if (RootToken.is(tok::r_brace) &&
885         (!RootToken.Next ||
886          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
887       Newlines = std::min(Newlines, 1u);
888     if (Newlines == 0 && !RootToken.IsFirst)
889       Newlines = 1;
890 
891     // Insert extra new line before access specifiers.
892     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
893         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
894       ++Newlines;
895 
896     // Remove empty lines after access specifiers.
897     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
898       Newlines = std::min(1u, Newlines);
899 
900     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
901                                    Indent, InPPDirective &&
902                                                !RootToken.HasUnescapedNewline);
903   }
904 
905   /// \brief Get the indent of \p Level from \p IndentForLevel.
906   ///
907   /// \p IndentForLevel must contain the indent for the level \c l
908   /// at \p IndentForLevel[l], or a value < 0 if the indent for
909   /// that level is unknown.
910   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
911     if (IndentForLevel[Level] != -1)
912       return IndentForLevel[Level];
913     if (Level == 0)
914       return 0;
915     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
916   }
917 
918   void join(AnnotatedLine &A, const AnnotatedLine &B) {
919     assert(!A.Last->Next);
920     assert(!B.First->Previous);
921     if (B.Affected)
922       A.Affected = true;
923     A.Last->Next = B.First;
924     B.First->Previous = A.Last;
925     B.First->CanBreakBefore = true;
926     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
927     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
928       Tok->TotalLength += LengthA;
929       A.Last = Tok;
930     }
931   }
932 
933   unsigned getColumnLimit(bool InPPDirective) const {
934     // In preprocessor directives reserve two chars for trailing " \"
935     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
936   }
937 
938   /// \brief Analyze the entire solution space starting from \p InitialState.
939   ///
940   /// This implements a variant of Dijkstra's algorithm on the graph that spans
941   /// the solution space (\c LineStates are the nodes). The algorithm tries to
942   /// find the shortest path (the one with lowest penalty) from \p InitialState
943   /// to a state where all tokens are placed. Returns the penalty.
944   ///
945   /// If \p DryRun is \c false, directly applies the changes.
946   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
947     std::set<LineState> Seen;
948 
949     // Increasing count of \c StateNode items we have created. This is used to
950     // create a deterministic order independent of the container.
951     unsigned Count = 0;
952     QueueType Queue;
953 
954     // Insert start element into queue.
955     StateNode *Node =
956         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
957     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
958     ++Count;
959 
960     unsigned Penalty = 0;
961 
962     // While not empty, take first element and follow edges.
963     while (!Queue.empty()) {
964       Penalty = Queue.top().first.first;
965       StateNode *Node = Queue.top().second;
966       if (Node->State.NextToken == NULL) {
967         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
968         break;
969       }
970       Queue.pop();
971 
972       // Cut off the analysis of certain solutions if the analysis gets too
973       // complex. See description of IgnoreStackForComparison.
974       if (Count > 10000)
975         Node->State.IgnoreStackForComparison = true;
976 
977       if (!Seen.insert(Node->State).second)
978         // State already examined with lower penalty.
979         continue;
980 
981       FormatDecision LastFormat = Node->State.NextToken->Decision;
982       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
983         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
984       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
985         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
986     }
987 
988     if (Queue.empty()) {
989       // We were unable to find a solution, do nothing.
990       // FIXME: Add diagnostic?
991       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
992       return 0;
993     }
994 
995     // Reconstruct the solution.
996     if (!DryRun)
997       reconstructPath(InitialState, Queue.top().second);
998 
999     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
1000     DEBUG(llvm::dbgs() << "---\n");
1001 
1002     return Penalty;
1003   }
1004 
1005   void reconstructPath(LineState &State, StateNode *Current) {
1006     std::deque<StateNode *> Path;
1007     // We do not need a break before the initial token.
1008     while (Current->Previous) {
1009       Path.push_front(Current);
1010       Current = Current->Previous;
1011     }
1012     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
1013          I != E; ++I) {
1014       unsigned Penalty = 0;
1015       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
1016       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
1017 
1018       DEBUG({
1019         if ((*I)->NewLine) {
1020           llvm::dbgs() << "Penalty for placing "
1021                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1022                        << Penalty << "\n";
1023         }
1024       });
1025     }
1026   }
1027 
1028   /// \brief Add the following state to the analysis queue \c Queue.
1029   ///
1030   /// Assume the current state is \p PreviousNode and has been reached with a
1031   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1032   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1033                            bool NewLine, unsigned *Count, QueueType *Queue) {
1034     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1035       return;
1036     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1037       return;
1038 
1039     StateNode *Node = new (Allocator.Allocate())
1040         StateNode(PreviousNode->State, NewLine, PreviousNode);
1041     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1042       return;
1043 
1044     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1045 
1046     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1047     ++(*Count);
1048   }
1049 
1050   /// \brief If the \p State's next token is an r_brace closing a nested block,
1051   /// format the nested block before it.
1052   ///
1053   /// Returns \c true if all children could be placed successfully and adapts
1054   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
1055   /// creates changes using \c Whitespaces.
1056   ///
1057   /// The crucial idea here is that children always get formatted upon
1058   /// encountering the closing brace right after the nested block. Now, if we
1059   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
1060   /// \c false), the entire block has to be kept on the same line (which is only
1061   /// possible if it fits on the line, only contains a single statement, etc.
1062   ///
1063   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1064   /// break after the "{", format all lines with correct indentation and the put
1065   /// the closing "}" on yet another new line.
1066   ///
1067   /// This enables us to keep the simple structure of the
1068   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1069   /// break or don't break.
1070   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1071                       unsigned &Penalty) {
1072     FormatToken &Previous = *State.NextToken->Previous;
1073     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1074     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1075         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1076       // The previous token does not open a block. Nothing to do. We don't
1077       // assert so that we can simply call this function for all tokens.
1078       return true;
1079 
1080     if (NewLine) {
1081       int AdditionalIndent = State.Stack.back().Indent -
1082                              Previous.Children[0]->Level * Style.IndentWidth;
1083       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1084                         /*FixBadIndentation=*/true);
1085       return true;
1086     }
1087 
1088     // Cannot merge multiple statements into a single line.
1089     if (Previous.Children.size() > 1)
1090       return false;
1091 
1092     // We can't put the closing "}" on a line with a trailing comment.
1093     if (Previous.Children[0]->Last->isTrailingComment())
1094       return false;
1095 
1096     if (!DryRun) {
1097       Whitespaces->replaceWhitespace(
1098           *Previous.Children[0]->First,
1099           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1100           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1101     }
1102     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1103 
1104     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1105     return true;
1106   }
1107 
1108   ContinuationIndenter *Indenter;
1109   WhitespaceManager *Whitespaces;
1110   FormatStyle Style;
1111   LineJoiner Joiner;
1112 
1113   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1114 };
1115 
1116 class FormatTokenLexer {
1117 public:
1118   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1119                    encoding::Encoding Encoding)
1120       : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
1121         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
1122         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
1123     Lex.SetKeepWhitespaceMode(true);
1124   }
1125 
1126   ArrayRef<FormatToken *> lex() {
1127     assert(Tokens.empty());
1128     do {
1129       Tokens.push_back(getNextToken());
1130       tryMergePreviousTokens();
1131     } while (Tokens.back()->Tok.isNot(tok::eof));
1132     return Tokens;
1133   }
1134 
1135   IdentifierTable &getIdentTable() { return IdentTable; }
1136 
1137 private:
1138   void tryMergePreviousTokens() {
1139     if (tryMerge_TMacro())
1140       return;
1141 
1142     if (Style.Language == FormatStyle::LK_JavaScript) {
1143       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1144       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1145       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1146                                                tok::greaterequal };
1147       // FIXME: We probably need to change token type to mimic operator with the
1148       // correct priority.
1149       if (tryMergeTokens(JSIdentity))
1150         return;
1151       if (tryMergeTokens(JSNotIdentity))
1152         return;
1153       if (tryMergeTokens(JSShiftEqual))
1154         return;
1155     }
1156   }
1157 
1158   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1159     if (Tokens.size() < Kinds.size())
1160       return false;
1161 
1162     SmallVectorImpl<FormatToken *>::const_iterator First =
1163         Tokens.end() - Kinds.size();
1164     if (!First[0]->is(Kinds[0]))
1165       return false;
1166     unsigned AddLength = 0;
1167     for (unsigned i = 1; i < Kinds.size(); ++i) {
1168       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1169                                          First[i]->WhitespaceRange.getEnd())
1170         return false;
1171       AddLength += First[i]->TokenText.size();
1172     }
1173     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1174     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1175                                     First[0]->TokenText.size() + AddLength);
1176     First[0]->ColumnWidth += AddLength;
1177     return true;
1178   }
1179 
1180   bool tryMerge_TMacro() {
1181     if (Tokens.size() < 4)
1182       return false;
1183     FormatToken *Last = Tokens.back();
1184     if (!Last->is(tok::r_paren))
1185       return false;
1186 
1187     FormatToken *String = Tokens[Tokens.size() - 2];
1188     if (!String->is(tok::string_literal) || String->IsMultiline)
1189       return false;
1190 
1191     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1192       return false;
1193 
1194     FormatToken *Macro = Tokens[Tokens.size() - 4];
1195     if (Macro->TokenText != "_T")
1196       return false;
1197 
1198     const char *Start = Macro->TokenText.data();
1199     const char *End = Last->TokenText.data() + Last->TokenText.size();
1200     String->TokenText = StringRef(Start, End - Start);
1201     String->IsFirst = Macro->IsFirst;
1202     String->LastNewlineOffset = Macro->LastNewlineOffset;
1203     String->WhitespaceRange = Macro->WhitespaceRange;
1204     String->OriginalColumn = Macro->OriginalColumn;
1205     String->ColumnWidth = encoding::columnWidthWithTabs(
1206         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1207 
1208     Tokens.pop_back();
1209     Tokens.pop_back();
1210     Tokens.pop_back();
1211     Tokens.back() = String;
1212     return true;
1213   }
1214 
1215   FormatToken *getNextToken() {
1216     if (GreaterStashed) {
1217       // Create a synthesized second '>' token.
1218       // FIXME: Increment Column and set OriginalColumn.
1219       Token Greater = FormatTok->Tok;
1220       FormatTok = new (Allocator.Allocate()) FormatToken;
1221       FormatTok->Tok = Greater;
1222       SourceLocation GreaterLocation =
1223           FormatTok->Tok.getLocation().getLocWithOffset(1);
1224       FormatTok->WhitespaceRange =
1225           SourceRange(GreaterLocation, GreaterLocation);
1226       FormatTok->TokenText = ">";
1227       FormatTok->ColumnWidth = 1;
1228       GreaterStashed = false;
1229       return FormatTok;
1230     }
1231 
1232     FormatTok = new (Allocator.Allocate()) FormatToken;
1233     readRawToken(*FormatTok);
1234     SourceLocation WhitespaceStart =
1235         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1236     FormatTok->IsFirst = IsFirstToken;
1237     IsFirstToken = false;
1238 
1239     // Consume and record whitespace until we find a significant token.
1240     unsigned WhitespaceLength = TrailingWhitespace;
1241     while (FormatTok->Tok.is(tok::unknown)) {
1242       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1243         switch (FormatTok->TokenText[i]) {
1244         case '\n':
1245           ++FormatTok->NewlinesBefore;
1246           // FIXME: This is technically incorrect, as it could also
1247           // be a literal backslash at the end of the line.
1248           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1249                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1250                           FormatTok->TokenText[i - 2] != '\\')))
1251             FormatTok->HasUnescapedNewline = true;
1252           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1253           Column = 0;
1254           break;
1255         case '\r':
1256         case '\f':
1257         case '\v':
1258           Column = 0;
1259           break;
1260         case ' ':
1261           ++Column;
1262           break;
1263         case '\t':
1264           Column += Style.TabWidth - Column % Style.TabWidth;
1265           break;
1266         case '\\':
1267           ++Column;
1268           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1269                              FormatTok->TokenText[i + 1] != '\n'))
1270             FormatTok->Type = TT_ImplicitStringLiteral;
1271           break;
1272         default:
1273           FormatTok->Type = TT_ImplicitStringLiteral;
1274           ++Column;
1275           break;
1276         }
1277       }
1278 
1279       if (FormatTok->Type == TT_ImplicitStringLiteral)
1280         break;
1281       WhitespaceLength += FormatTok->Tok.getLength();
1282 
1283       readRawToken(*FormatTok);
1284     }
1285 
1286     // In case the token starts with escaped newlines, we want to
1287     // take them into account as whitespace - this pattern is quite frequent
1288     // in macro definitions.
1289     // FIXME: Add a more explicit test.
1290     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1291            FormatTok->TokenText[1] == '\n') {
1292       // FIXME: ++FormatTok->NewlinesBefore is missing...
1293       WhitespaceLength += 2;
1294       Column = 0;
1295       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1296     }
1297 
1298     FormatTok->WhitespaceRange = SourceRange(
1299         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1300 
1301     FormatTok->OriginalColumn = Column;
1302 
1303     TrailingWhitespace = 0;
1304     if (FormatTok->Tok.is(tok::comment)) {
1305       // FIXME: Add the trimmed whitespace to Column.
1306       StringRef UntrimmedText = FormatTok->TokenText;
1307       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1308       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1309     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1310       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1311       FormatTok->Tok.setIdentifierInfo(&Info);
1312       FormatTok->Tok.setKind(Info.getTokenID());
1313     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1314       FormatTok->Tok.setKind(tok::greater);
1315       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1316       GreaterStashed = true;
1317     }
1318 
1319     // Now FormatTok is the next non-whitespace token.
1320 
1321     StringRef Text = FormatTok->TokenText;
1322     size_t FirstNewlinePos = Text.find('\n');
1323     if (FirstNewlinePos == StringRef::npos) {
1324       // FIXME: ColumnWidth actually depends on the start column, we need to
1325       // take this into account when the token is moved.
1326       FormatTok->ColumnWidth =
1327           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1328       Column += FormatTok->ColumnWidth;
1329     } else {
1330       FormatTok->IsMultiline = true;
1331       // FIXME: ColumnWidth actually depends on the start column, we need to
1332       // take this into account when the token is moved.
1333       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1334           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1335 
1336       // The last line of the token always starts in column 0.
1337       // Thus, the length can be precomputed even in the presence of tabs.
1338       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1339           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1340           Encoding);
1341       Column = FormatTok->LastLineColumnWidth;
1342     }
1343 
1344     return FormatTok;
1345   }
1346 
1347   FormatToken *FormatTok;
1348   bool IsFirstToken;
1349   bool GreaterStashed;
1350   unsigned Column;
1351   unsigned TrailingWhitespace;
1352   Lexer &Lex;
1353   SourceManager &SourceMgr;
1354   FormatStyle &Style;
1355   IdentifierTable IdentTable;
1356   encoding::Encoding Encoding;
1357   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1358   SmallVector<FormatToken *, 16> Tokens;
1359 
1360   void readRawToken(FormatToken &Tok) {
1361     Lex.LexFromRawLexer(Tok.Tok);
1362     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1363                               Tok.Tok.getLength());
1364     // For formatting, treat unterminated string literals like normal string
1365     // literals.
1366     if (Tok.is(tok::unknown)) {
1367       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1368         Tok.Tok.setKind(tok::string_literal);
1369         Tok.IsUnterminatedLiteral = true;
1370       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1371                  Tok.TokenText == "''") {
1372         Tok.Tok.setKind(tok::char_constant);
1373       }
1374     }
1375   }
1376 };
1377 
1378 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1379   switch (Language) {
1380   case FormatStyle::LK_Cpp:
1381     return "C++";
1382   case FormatStyle::LK_JavaScript:
1383     return "JavaScript";
1384   case FormatStyle::LK_Proto:
1385     return "Proto";
1386   default:
1387     return "Unknown";
1388   }
1389 }
1390 
1391 class Formatter : public UnwrappedLineConsumer {
1392 public:
1393   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1394             const std::vector<CharSourceRange> &Ranges)
1395       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1396         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1397         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1398         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1399     DEBUG(llvm::dbgs() << "File encoding: "
1400                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1401                                                                : "unknown")
1402                        << "\n");
1403     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1404                        << "\n");
1405   }
1406 
1407   tooling::Replacements format() {
1408     tooling::Replacements Result;
1409     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1410 
1411     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1412     bool StructuralError = Parser.parse();
1413     assert(UnwrappedLines.rbegin()->empty());
1414     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1415          ++Run) {
1416       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1417       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1418       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1419         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1420       }
1421       tooling::Replacements RunResult =
1422           format(AnnotatedLines, StructuralError, Tokens);
1423       DEBUG({
1424         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1425         for (tooling::Replacements::iterator I = RunResult.begin(),
1426                                              E = RunResult.end();
1427              I != E; ++I) {
1428           llvm::dbgs() << I->toString() << "\n";
1429         }
1430       });
1431       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1432         delete AnnotatedLines[i];
1433       }
1434       Result.insert(RunResult.begin(), RunResult.end());
1435       Whitespaces.reset();
1436     }
1437     return Result;
1438   }
1439 
1440   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1441                                bool StructuralError, FormatTokenLexer &Tokens) {
1442     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1443     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1444       Annotator.annotate(*AnnotatedLines[i]);
1445     }
1446     deriveLocalStyle(AnnotatedLines);
1447     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1448       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1449     }
1450     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1451 
1452     Annotator.setCommentLineLevels(AnnotatedLines);
1453     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1454                                   BinPackInconclusiveFunctions);
1455     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1456     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1457     return Whitespaces.generateReplacements();
1458   }
1459 
1460 private:
1461   // Determines which lines are affected by the SourceRanges given as input.
1462   // Returns \c true if at least one line between I and E or one of their
1463   // children is affected.
1464   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1465                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1466     bool SomeLineAffected = false;
1467     const AnnotatedLine *PreviousLine = NULL;
1468     while (I != E) {
1469       AnnotatedLine *Line = *I;
1470       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1471 
1472       // If a line is part of a preprocessor directive, it needs to be formatted
1473       // if any token within the directive is affected.
1474       if (Line->InPPDirective) {
1475         FormatToken *Last = Line->Last;
1476         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1477         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1478           Last = (*PPEnd)->Last;
1479           ++PPEnd;
1480         }
1481 
1482         if (affectsTokenRange(*Line->First, *Last,
1483                               /*IncludeLeadingNewlines=*/false)) {
1484           SomeLineAffected = true;
1485           markAllAsAffected(I, PPEnd);
1486         }
1487         I = PPEnd;
1488         continue;
1489       }
1490 
1491       if (nonPPLineAffected(Line, PreviousLine))
1492         SomeLineAffected = true;
1493 
1494       PreviousLine = Line;
1495       ++I;
1496     }
1497     return SomeLineAffected;
1498   }
1499 
1500   // Determines whether 'Line' is affected by the SourceRanges given as input.
1501   // Returns \c true if line or one if its children is affected.
1502   bool nonPPLineAffected(AnnotatedLine *Line,
1503                          const AnnotatedLine *PreviousLine) {
1504     bool SomeLineAffected = false;
1505     Line->ChildrenAffected =
1506         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1507     if (Line->ChildrenAffected)
1508       SomeLineAffected = true;
1509 
1510     // Stores whether one of the line's tokens is directly affected.
1511     bool SomeTokenAffected = false;
1512     // Stores whether we need to look at the leading newlines of the next token
1513     // in order to determine whether it was affected.
1514     bool IncludeLeadingNewlines = false;
1515 
1516     // Stores whether the first child line of any of this line's tokens is
1517     // affected.
1518     bool SomeFirstChildAffected = false;
1519 
1520     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1521       // Determine whether 'Tok' was affected.
1522       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1523         SomeTokenAffected = true;
1524 
1525       // Determine whether the first child of 'Tok' was affected.
1526       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1527         SomeFirstChildAffected = true;
1528 
1529       IncludeLeadingNewlines = Tok->Children.empty();
1530     }
1531 
1532     // Was this line moved, i.e. has it previously been on the same line as an
1533     // affected line?
1534     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1535                      Line->First->NewlinesBefore == 0;
1536 
1537     bool IsContinuedComment = Line->First->is(tok::comment) &&
1538                               Line->First->Next == NULL &&
1539                               Line->First->NewlinesBefore < 2 && PreviousLine &&
1540                               PreviousLine->Affected &&
1541                               PreviousLine->Last->is(tok::comment);
1542 
1543     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1544         IsContinuedComment) {
1545       Line->Affected = true;
1546       SomeLineAffected = true;
1547     }
1548     return SomeLineAffected;
1549   }
1550 
1551   // Marks all lines between I and E as well as all their children as affected.
1552   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1553                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1554     while (I != E) {
1555       (*I)->Affected = true;
1556       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1557       ++I;
1558     }
1559   }
1560 
1561   // Returns true if the range from 'First' to 'Last' intersects with one of the
1562   // input ranges.
1563   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1564                          bool IncludeLeadingNewlines) {
1565     SourceLocation Start = First.WhitespaceRange.getBegin();
1566     if (!IncludeLeadingNewlines)
1567       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1568     SourceLocation End = Last.getStartOfNonWhitespace();
1569     if (Last.TokenText.size() > 0)
1570       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1571     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1572     return affectsCharSourceRange(Range);
1573   }
1574 
1575   // Returns true if one of the input ranges intersect the leading empty lines
1576   // before 'Tok'.
1577   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1578     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1579         Tok.WhitespaceRange.getBegin(),
1580         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1581     return affectsCharSourceRange(EmptyLineRange);
1582   }
1583 
1584   // Returns true if 'Range' intersects with one of the input ranges.
1585   bool affectsCharSourceRange(const CharSourceRange &Range) {
1586     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1587                                                           E = Ranges.end();
1588          I != E; ++I) {
1589       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1590           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1591         return true;
1592     }
1593     return false;
1594   }
1595 
1596   static bool inputUsesCRLF(StringRef Text) {
1597     return Text.count('\r') * 2 > Text.count('\n');
1598   }
1599 
1600   void
1601   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1602     unsigned CountBoundToVariable = 0;
1603     unsigned CountBoundToType = 0;
1604     bool HasCpp03IncompatibleFormat = false;
1605     bool HasBinPackedFunction = false;
1606     bool HasOnePerLineFunction = false;
1607     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1608       if (!AnnotatedLines[i]->First->Next)
1609         continue;
1610       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1611       while (Tok->Next) {
1612         if (Tok->Type == TT_PointerOrReference) {
1613           bool SpacesBefore =
1614               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1615           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1616                              Tok->Next->WhitespaceRange.getEnd();
1617           if (SpacesBefore && !SpacesAfter)
1618             ++CountBoundToVariable;
1619           else if (!SpacesBefore && SpacesAfter)
1620             ++CountBoundToType;
1621         }
1622 
1623         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1624           if (Tok->is(tok::coloncolon) &&
1625               Tok->Previous->Type == TT_TemplateOpener)
1626             HasCpp03IncompatibleFormat = true;
1627           if (Tok->Type == TT_TemplateCloser &&
1628               Tok->Previous->Type == TT_TemplateCloser)
1629             HasCpp03IncompatibleFormat = true;
1630         }
1631 
1632         if (Tok->PackingKind == PPK_BinPacked)
1633           HasBinPackedFunction = true;
1634         if (Tok->PackingKind == PPK_OnePerLine)
1635           HasOnePerLineFunction = true;
1636 
1637         Tok = Tok->Next;
1638       }
1639     }
1640     if (Style.DerivePointerBinding) {
1641       if (CountBoundToType > CountBoundToVariable)
1642         Style.PointerBindsToType = true;
1643       else if (CountBoundToType < CountBoundToVariable)
1644         Style.PointerBindsToType = false;
1645     }
1646     if (Style.Standard == FormatStyle::LS_Auto) {
1647       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1648                                                   : FormatStyle::LS_Cpp03;
1649     }
1650     BinPackInconclusiveFunctions =
1651         HasBinPackedFunction || !HasOnePerLineFunction;
1652   }
1653 
1654   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1655     assert(!UnwrappedLines.empty());
1656     UnwrappedLines.back().push_back(TheLine);
1657   }
1658 
1659   void finishRun() override {
1660     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1661   }
1662 
1663   FormatStyle Style;
1664   Lexer &Lex;
1665   SourceManager &SourceMgr;
1666   WhitespaceManager Whitespaces;
1667   SmallVector<CharSourceRange, 8> Ranges;
1668   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1669 
1670   encoding::Encoding Encoding;
1671   bool BinPackInconclusiveFunctions;
1672 };
1673 
1674 } // end anonymous namespace
1675 
1676 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1677                                SourceManager &SourceMgr,
1678                                std::vector<CharSourceRange> Ranges) {
1679   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1680   return formatter.format();
1681 }
1682 
1683 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1684                                std::vector<tooling::Range> Ranges,
1685                                StringRef FileName) {
1686   FileManager Files((FileSystemOptions()));
1687   DiagnosticsEngine Diagnostics(
1688       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1689       new DiagnosticOptions);
1690   SourceManager SourceMgr(Diagnostics, Files);
1691   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1692   const clang::FileEntry *Entry =
1693       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1694   SourceMgr.overrideFileContents(Entry, Buf);
1695   FileID ID =
1696       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1697   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1698             getFormattingLangOpts(Style.Standard));
1699   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1700   std::vector<CharSourceRange> CharRanges;
1701   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1702     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1703     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1704     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1705   }
1706   return reformat(Style, Lex, SourceMgr, CharRanges);
1707 }
1708 
1709 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1710   LangOptions LangOpts;
1711   LangOpts.CPlusPlus = 1;
1712   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1713   LangOpts.LineComment = 1;
1714   LangOpts.Bool = 1;
1715   LangOpts.ObjC1 = 1;
1716   LangOpts.ObjC2 = 1;
1717   return LangOpts;
1718 }
1719 
1720 const char *StyleOptionHelpDescription =
1721     "Coding style, currently supports:\n"
1722     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1723     "Use -style=file to load style configuration from\n"
1724     ".clang-format file located in one of the parent\n"
1725     "directories of the source file (or current\n"
1726     "directory for stdin).\n"
1727     "Use -style=\"{key: value, ...}\" to set specific\n"
1728     "parameters, e.g.:\n"
1729     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1730 
1731 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1732   if (FileName.endswith_lower(".js")) {
1733     return FormatStyle::LK_JavaScript;
1734   } else if (FileName.endswith_lower(".proto") ||
1735              FileName.endswith_lower(".protodevel")) {
1736     return FormatStyle::LK_Proto;
1737   }
1738   return FormatStyle::LK_Cpp;
1739 }
1740 
1741 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1742                      StringRef FallbackStyle) {
1743   FormatStyle Style = getLLVMStyle();
1744   Style.Language = getLanguageByFileName(FileName);
1745   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1746     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1747                  << "\" using LLVM style\n";
1748     return Style;
1749   }
1750 
1751   if (StyleName.startswith("{")) {
1752     // Parse YAML/JSON style from the command line.
1753     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1754       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1755                    << FallbackStyle << " style\n";
1756     }
1757     return Style;
1758   }
1759 
1760   if (!StyleName.equals_lower("file")) {
1761     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1762       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1763                    << " style\n";
1764     return Style;
1765   }
1766 
1767   // Look for .clang-format/_clang-format file in the file's parent directories.
1768   SmallString<128> UnsuitableConfigFiles;
1769   SmallString<128> Path(FileName);
1770   llvm::sys::fs::make_absolute(Path);
1771   for (StringRef Directory = Path; !Directory.empty();
1772        Directory = llvm::sys::path::parent_path(Directory)) {
1773     if (!llvm::sys::fs::is_directory(Directory))
1774       continue;
1775     SmallString<128> ConfigFile(Directory);
1776 
1777     llvm::sys::path::append(ConfigFile, ".clang-format");
1778     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1779     bool IsFile = false;
1780     // Ignore errors from is_regular_file: we only need to know if we can read
1781     // the file or not.
1782     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1783 
1784     if (!IsFile) {
1785       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1786       ConfigFile = Directory;
1787       llvm::sys::path::append(ConfigFile, "_clang-format");
1788       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1789       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1790     }
1791 
1792     if (IsFile) {
1793       std::unique_ptr<llvm::MemoryBuffer> Text;
1794       if (llvm::error_code ec =
1795               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1796         llvm::errs() << ec.message() << "\n";
1797         break;
1798       }
1799       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1800         if (ec == llvm::errc::not_supported) {
1801           if (!UnsuitableConfigFiles.empty())
1802             UnsuitableConfigFiles.append(", ");
1803           UnsuitableConfigFiles.append(ConfigFile);
1804           continue;
1805         }
1806         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1807                      << "\n";
1808         break;
1809       }
1810       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1811       return Style;
1812     }
1813   }
1814   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1815                << " style\n";
1816   if (!UnsuitableConfigFiles.empty()) {
1817     llvm::errs() << "Configuration file(s) do(es) not support "
1818                  << getLanguageName(Style.Language) << ": "
1819                  << UnsuitableConfigFiles << "\n";
1820   }
1821   return Style;
1822 }
1823 
1824 } // namespace format
1825 } // namespace clang
1826