1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33 
34 using clang::format::FormatStyle;
35 
36 namespace llvm {
37 namespace yaml {
38 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
39   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
40     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
41     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
42     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
43   }
44 };
45 
46 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
47   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
48     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
49     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
50     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
51     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
52     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
53   }
54 };
55 
56 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
57   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
58     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
59     IO.enumCase(Value, "false", FormatStyle::UT_Never);
60     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
61     IO.enumCase(Value, "true", FormatStyle::UT_Always);
62     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
63   }
64 };
65 
66 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
67   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
68     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
69     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
70     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
71     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
72     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
73   }
74 };
75 
76 template <>
77 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
78   static void enumeration(IO &IO,
79                           FormatStyle::NamespaceIndentationKind &Value) {
80     IO.enumCase(Value, "None", FormatStyle::NI_None);
81     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
82     IO.enumCase(Value, "All", FormatStyle::NI_All);
83   }
84 };
85 
86 template <>
87 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
88   static void enumeration(IO &IO,
89                           FormatStyle::SpaceBeforeParensOptions &Value) {
90     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
91     IO.enumCase(Value, "ControlStatements",
92                 FormatStyle::SBPO_ControlStatements);
93     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
94 
95     // For backward compatibility.
96     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
97     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
98   }
99 };
100 
101 template <> struct MappingTraits<FormatStyle> {
102   static void mapping(IO &IO, FormatStyle &Style) {
103     // When reading, read the language first, we need it for getPredefinedStyle.
104     IO.mapOptional("Language", Style.Language);
105 
106     if (IO.outputting()) {
107       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
108                                   "Mozilla", "WebKit", "GNU" };
109       ArrayRef<StringRef> Styles(StylesArray);
110       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
111         StringRef StyleName(Styles[i]);
112         FormatStyle PredefinedStyle;
113         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
114             Style == PredefinedStyle) {
115           IO.mapOptional("# BasedOnStyle", StyleName);
116           break;
117         }
118       }
119     } else {
120       StringRef BasedOnStyle;
121       IO.mapOptional("BasedOnStyle", BasedOnStyle);
122       if (!BasedOnStyle.empty()) {
123         FormatStyle::LanguageKind OldLanguage = Style.Language;
124         FormatStyle::LanguageKind Language =
125             ((FormatStyle *)IO.getContext())->Language;
126         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
127           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
128           return;
129         }
130         Style.Language = OldLanguage;
131       }
132     }
133 
134     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
135     IO.mapOptional("ConstructorInitializerIndentWidth",
136                    Style.ConstructorInitializerIndentWidth);
137     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
138     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
139     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
140                    Style.AllowAllParametersOfDeclarationOnNextLine);
141     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
142                    Style.AllowShortIfStatementsOnASingleLine);
143     IO.mapOptional("AllowShortLoopsOnASingleLine",
144                    Style.AllowShortLoopsOnASingleLine);
145     IO.mapOptional("AllowShortFunctionsOnASingleLine",
146                    Style.AllowShortFunctionsOnASingleLine);
147     IO.mapOptional("AlwaysBreakTemplateDeclarations",
148                    Style.AlwaysBreakTemplateDeclarations);
149     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
150                    Style.AlwaysBreakBeforeMultilineStrings);
151     IO.mapOptional("BreakBeforeBinaryOperators",
152                    Style.BreakBeforeBinaryOperators);
153     IO.mapOptional("BreakBeforeTernaryOperators",
154                    Style.BreakBeforeTernaryOperators);
155     IO.mapOptional("BreakConstructorInitializersBeforeComma",
156                    Style.BreakConstructorInitializersBeforeComma);
157     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
158     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
159     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
160                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
161     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
162     IO.mapOptional("ExperimentalAutoDetectBinPacking",
163                    Style.ExperimentalAutoDetectBinPacking);
164     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
165     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
166     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
167     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
168     IO.mapOptional("ObjCSpaceBeforeProtocolList",
169                    Style.ObjCSpaceBeforeProtocolList);
170     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
171                    Style.PenaltyBreakBeforeFirstCallParameter);
172     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
173     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
174     IO.mapOptional("PenaltyBreakFirstLessLess",
175                    Style.PenaltyBreakFirstLessLess);
176     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
177     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
178                    Style.PenaltyReturnTypeOnItsOwnLine);
179     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
180     IO.mapOptional("SpacesBeforeTrailingComments",
181                    Style.SpacesBeforeTrailingComments);
182     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
183     IO.mapOptional("Standard", Style.Standard);
184     IO.mapOptional("IndentWidth", Style.IndentWidth);
185     IO.mapOptional("TabWidth", Style.TabWidth);
186     IO.mapOptional("UseTab", Style.UseTab);
187     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
188     IO.mapOptional("IndentFunctionDeclarationAfterType",
189                    Style.IndentFunctionDeclarationAfterType);
190     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
191     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
192     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
193     IO.mapOptional("SpacesInCStyleCastParentheses",
194                    Style.SpacesInCStyleCastParentheses);
195     IO.mapOptional("SpacesInContainerLiterals",
196                    Style.SpacesInContainerLiterals);
197     IO.mapOptional("SpaceBeforeAssignmentOperators",
198                    Style.SpaceBeforeAssignmentOperators);
199     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
200     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
201 
202     // For backward compatibility.
203     if (!IO.outputting()) {
204       IO.mapOptional("SpaceAfterControlStatementKeyword",
205                      Style.SpaceBeforeParens);
206     }
207     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
208   }
209 };
210 
211 // Allows to read vector<FormatStyle> while keeping default values.
212 // IO.getContext() should contain a pointer to the FormatStyle structure, that
213 // will be used to get default values for missing keys.
214 // If the first element has no Language specified, it will be treated as the
215 // default one for the following elements.
216 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
217   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
218     return Seq.size();
219   }
220   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
221                               size_t Index) {
222     if (Index >= Seq.size()) {
223       assert(Index == Seq.size());
224       FormatStyle Template;
225       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
226         Template = Seq[0];
227       } else {
228         Template = *((const FormatStyle*)IO.getContext());
229         Template.Language = FormatStyle::LK_None;
230       }
231       Seq.resize(Index + 1, Template);
232     }
233     return Seq[Index];
234   }
235 };
236 }
237 }
238 
239 namespace clang {
240 namespace format {
241 
242 FormatStyle getLLVMStyle() {
243   FormatStyle LLVMStyle;
244   LLVMStyle.Language = FormatStyle::LK_Cpp;
245   LLVMStyle.AccessModifierOffset = -2;
246   LLVMStyle.AlignEscapedNewlinesLeft = false;
247   LLVMStyle.AlignTrailingComments = true;
248   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
249   LLVMStyle.AllowShortFunctionsOnASingleLine = true;
250   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
251   LLVMStyle.AllowShortLoopsOnASingleLine = false;
252   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
253   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
254   LLVMStyle.BinPackParameters = true;
255   LLVMStyle.BreakBeforeBinaryOperators = false;
256   LLVMStyle.BreakBeforeTernaryOperators = true;
257   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
258   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
259   LLVMStyle.ColumnLimit = 80;
260   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
261   LLVMStyle.ConstructorInitializerIndentWidth = 4;
262   LLVMStyle.Cpp11BracedListStyle = false;
263   LLVMStyle.DerivePointerBinding = false;
264   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
265   LLVMStyle.IndentCaseLabels = false;
266   LLVMStyle.IndentFunctionDeclarationAfterType = false;
267   LLVMStyle.IndentWidth = 2;
268   LLVMStyle.TabWidth = 8;
269   LLVMStyle.MaxEmptyLinesToKeep = 1;
270   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
271   LLVMStyle.ObjCSpaceAfterProperty = false;
272   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
273   LLVMStyle.PointerBindsToType = false;
274   LLVMStyle.SpacesBeforeTrailingComments = 1;
275   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
276   LLVMStyle.UseTab = FormatStyle::UT_Never;
277   LLVMStyle.SpacesInParentheses = false;
278   LLVMStyle.SpaceInEmptyParentheses = false;
279   LLVMStyle.SpacesInContainerLiterals = true;
280   LLVMStyle.SpacesInCStyleCastParentheses = false;
281   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
282   LLVMStyle.SpaceBeforeAssignmentOperators = true;
283   LLVMStyle.ContinuationIndentWidth = 4;
284   LLVMStyle.SpacesInAngles = false;
285   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
286 
287   LLVMStyle.PenaltyBreakComment = 300;
288   LLVMStyle.PenaltyBreakFirstLessLess = 120;
289   LLVMStyle.PenaltyBreakString = 1000;
290   LLVMStyle.PenaltyExcessCharacter = 1000000;
291   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
292   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
293 
294   return LLVMStyle;
295 }
296 
297 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
298   FormatStyle GoogleStyle = getLLVMStyle();
299   GoogleStyle.Language = Language;
300 
301   GoogleStyle.AccessModifierOffset = -1;
302   GoogleStyle.AlignEscapedNewlinesLeft = true;
303   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
304   GoogleStyle.AllowShortLoopsOnASingleLine = true;
305   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
306   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
307   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
308   GoogleStyle.Cpp11BracedListStyle = true;
309   GoogleStyle.DerivePointerBinding = true;
310   GoogleStyle.IndentCaseLabels = true;
311   GoogleStyle.IndentFunctionDeclarationAfterType = true;
312   GoogleStyle.ObjCSpaceAfterProperty = false;
313   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
314   GoogleStyle.PointerBindsToType = true;
315   GoogleStyle.SpacesBeforeTrailingComments = 2;
316   GoogleStyle.Standard = FormatStyle::LS_Auto;
317 
318   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
319   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
320 
321   if (Language == FormatStyle::LK_JavaScript) {
322     GoogleStyle.BreakBeforeTernaryOperators = false;
323     GoogleStyle.MaxEmptyLinesToKeep = 2;
324     GoogleStyle.SpacesInContainerLiterals = false;
325   } else if (Language == FormatStyle::LK_Proto) {
326     GoogleStyle.AllowShortFunctionsOnASingleLine = false;
327   }
328 
329   return GoogleStyle;
330 }
331 
332 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
333   FormatStyle ChromiumStyle = getGoogleStyle(Language);
334   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
335   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
336   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
337   ChromiumStyle.BinPackParameters = false;
338   ChromiumStyle.DerivePointerBinding = false;
339   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
340   return ChromiumStyle;
341 }
342 
343 FormatStyle getMozillaStyle() {
344   FormatStyle MozillaStyle = getLLVMStyle();
345   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
346   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
347   MozillaStyle.DerivePointerBinding = true;
348   MozillaStyle.IndentCaseLabels = true;
349   MozillaStyle.ObjCSpaceAfterProperty = true;
350   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
351   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
352   MozillaStyle.PointerBindsToType = true;
353   return MozillaStyle;
354 }
355 
356 FormatStyle getWebKitStyle() {
357   FormatStyle Style = getLLVMStyle();
358   Style.AccessModifierOffset = -4;
359   Style.AlignTrailingComments = false;
360   Style.BreakBeforeBinaryOperators = true;
361   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
362   Style.BreakConstructorInitializersBeforeComma = true;
363   Style.ColumnLimit = 0;
364   Style.IndentWidth = 4;
365   Style.NamespaceIndentation = FormatStyle::NI_Inner;
366   Style.ObjCSpaceAfterProperty = true;
367   Style.PointerBindsToType = true;
368   return Style;
369 }
370 
371 FormatStyle getGNUStyle() {
372   FormatStyle Style = getLLVMStyle();
373   Style.BreakBeforeBinaryOperators = true;
374   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
375   Style.BreakBeforeTernaryOperators = true;
376   Style.ColumnLimit = 79;
377   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
378   return Style;
379 }
380 
381 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
382                         FormatStyle *Style) {
383   if (Name.equals_lower("llvm")) {
384     *Style = getLLVMStyle();
385   } else if (Name.equals_lower("chromium")) {
386     *Style = getChromiumStyle(Language);
387   } else if (Name.equals_lower("mozilla")) {
388     *Style = getMozillaStyle();
389   } else if (Name.equals_lower("google")) {
390     *Style = getGoogleStyle(Language);
391   } else if (Name.equals_lower("webkit")) {
392     *Style = getWebKitStyle();
393   } else if (Name.equals_lower("gnu")) {
394     *Style = getGNUStyle();
395   } else {
396     return false;
397   }
398 
399   Style->Language = Language;
400   return true;
401 }
402 
403 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
404   assert(Style);
405   FormatStyle::LanguageKind Language = Style->Language;
406   assert(Language != FormatStyle::LK_None);
407   if (Text.trim().empty())
408     return llvm::make_error_code(llvm::errc::invalid_argument);
409 
410   std::vector<FormatStyle> Styles;
411   llvm::yaml::Input Input(Text);
412   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
413   // values for the fields, keys for which are missing from the configuration.
414   // Mapping also uses the context to get the language to find the correct
415   // base style.
416   Input.setContext(Style);
417   Input >> Styles;
418   if (Input.error())
419     return Input.error();
420 
421   for (unsigned i = 0; i < Styles.size(); ++i) {
422     // Ensures that only the first configuration can skip the Language option.
423     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
424       return llvm::make_error_code(llvm::errc::invalid_argument);
425     // Ensure that each language is configured at most once.
426     for (unsigned j = 0; j < i; ++j) {
427       if (Styles[i].Language == Styles[j].Language) {
428         DEBUG(llvm::dbgs()
429               << "Duplicate languages in the config file on positions " << j
430               << " and " << i << "\n");
431         return llvm::make_error_code(llvm::errc::invalid_argument);
432       }
433     }
434   }
435   // Look for a suitable configuration starting from the end, so we can
436   // find the configuration for the specific language first, and the default
437   // configuration (which can only be at slot 0) after it.
438   for (int i = Styles.size() - 1; i >= 0; --i) {
439     if (Styles[i].Language == Language ||
440         Styles[i].Language == FormatStyle::LK_None) {
441       *Style = Styles[i];
442       Style->Language = Language;
443       return llvm::make_error_code(llvm::errc::success);
444     }
445   }
446   return llvm::make_error_code(llvm::errc::not_supported);
447 }
448 
449 std::string configurationAsText(const FormatStyle &Style) {
450   std::string Text;
451   llvm::raw_string_ostream Stream(Text);
452   llvm::yaml::Output Output(Stream);
453   // We use the same mapping method for input and output, so we need a non-const
454   // reference here.
455   FormatStyle NonConstStyle = Style;
456   Output << NonConstStyle;
457   return Stream.str();
458 }
459 
460 namespace {
461 
462 class NoColumnLimitFormatter {
463 public:
464   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
465 
466   /// \brief Formats the line starting at \p State, simply keeping all of the
467   /// input's line breaking decisions.
468   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
469     LineState State =
470         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
471     while (State.NextToken != NULL) {
472       bool Newline =
473           Indenter->mustBreak(State) ||
474           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
475       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
476     }
477   }
478 
479 private:
480   ContinuationIndenter *Indenter;
481 };
482 
483 class LineJoiner {
484 public:
485   LineJoiner(const FormatStyle &Style) : Style(Style) {}
486 
487   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
488   unsigned
489   tryFitMultipleLinesInOne(unsigned Indent,
490                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
491                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
492     // We can never merge stuff if there are trailing line comments.
493     const AnnotatedLine *TheLine = *I;
494     if (TheLine->Last->Type == TT_LineComment)
495       return 0;
496 
497     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
498       return 0;
499 
500     unsigned Limit =
501         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
502     // If we already exceed the column limit, we set 'Limit' to 0. The different
503     // tryMerge..() functions can then decide whether to still do merging.
504     Limit = TheLine->Last->TotalLength > Limit
505                 ? 0
506                 : Limit - TheLine->Last->TotalLength;
507 
508     if (I + 1 == E || I[1]->Type == LT_Invalid)
509       return 0;
510 
511     if (TheLine->Last->Type == TT_FunctionLBrace &&
512         TheLine->First != TheLine->Last) {
513       return Style.AllowShortFunctionsOnASingleLine
514                  ? tryMergeSimpleBlock(I, E, Limit)
515                  : 0;
516     }
517     if (TheLine->Last->is(tok::l_brace)) {
518       return Style.BreakBeforeBraces == FormatStyle::BS_Attach
519                  ? tryMergeSimpleBlock(I, E, Limit)
520                  : 0;
521     }
522     if (I[1]->First->Type == TT_FunctionLBrace &&
523         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
524       // Check for Limit <= 2 to account for the " {".
525       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
526         return 0;
527       Limit -= 2;
528 
529       unsigned MergedLines = 0;
530       if (Style.AllowShortFunctionsOnASingleLine) {
531         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
532         // If we managed to merge the block, count the function header, which is
533         // on a separate line.
534         if (MergedLines > 0)
535           ++MergedLines;
536       }
537       return MergedLines;
538     }
539     if (TheLine->First->is(tok::kw_if)) {
540       return Style.AllowShortIfStatementsOnASingleLine
541                  ? tryMergeSimpleControlStatement(I, E, Limit)
542                  : 0;
543     }
544     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
545       return Style.AllowShortLoopsOnASingleLine
546                  ? tryMergeSimpleControlStatement(I, E, Limit)
547                  : 0;
548     }
549     if (TheLine->InPPDirective &&
550         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
551       return tryMergeSimplePPDirective(I, E, Limit);
552     }
553     return 0;
554   }
555 
556 private:
557   unsigned
558   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
559                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
560                             unsigned Limit) {
561     if (Limit == 0)
562       return 0;
563     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
564       return 0;
565     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
566       return 0;
567     if (1 + I[1]->Last->TotalLength > Limit)
568       return 0;
569     return 1;
570   }
571 
572   unsigned tryMergeSimpleControlStatement(
573       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
574       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
575     if (Limit == 0)
576       return 0;
577     if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
578          Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
579         I[1]->First->is(tok::l_brace))
580       return 0;
581     if (I[1]->InPPDirective != (*I)->InPPDirective ||
582         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
583       return 0;
584     Limit = limitConsideringMacros(I + 1, E, Limit);
585     AnnotatedLine &Line = **I;
586     if (Line.Last->isNot(tok::r_paren))
587       return 0;
588     if (1 + I[1]->Last->TotalLength > Limit)
589       return 0;
590     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
591                              tok::kw_while) ||
592         I[1]->First->Type == TT_LineComment)
593       return 0;
594     // Only inline simple if's (no nested if or else).
595     if (I + 2 != E && Line.First->is(tok::kw_if) &&
596         I[2]->First->is(tok::kw_else))
597       return 0;
598     return 1;
599   }
600 
601   unsigned
602   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
603                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
604                       unsigned Limit) {
605     // First, check that the current line allows merging. This is the case if
606     // we're not in a control flow statement and the last token is an opening
607     // brace.
608     AnnotatedLine &Line = **I;
609     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
610                             tok::kw_else, tok::kw_try, tok::kw_catch,
611                             tok::kw_for,
612                             // This gets rid of all ObjC @ keywords and methods.
613                             tok::at, tok::minus, tok::plus))
614       return 0;
615 
616     FormatToken *Tok = I[1]->First;
617     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
618         (Tok->getNextNonComment() == NULL ||
619          Tok->getNextNonComment()->is(tok::semi))) {
620       // We merge empty blocks even if the line exceeds the column limit.
621       Tok->SpacesRequiredBefore = 0;
622       Tok->CanBreakBefore = true;
623       return 1;
624     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
625       // Check that we still have three lines and they fit into the limit.
626       if (I + 2 == E || I[2]->Type == LT_Invalid)
627         return 0;
628       Limit = limitConsideringMacros(I + 2, E, Limit);
629 
630       if (!nextTwoLinesFitInto(I, Limit))
631         return 0;
632 
633       // Second, check that the next line does not contain any braces - if it
634       // does, readability declines when putting it into a single line.
635       if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
636         return 0;
637       do {
638         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
639           return 0;
640         Tok = Tok->Next;
641       } while (Tok != NULL);
642 
643       // Last, check that the third line contains a single closing brace.
644       Tok = I[2]->First;
645       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
646           Tok->MustBreakBefore)
647         return 0;
648 
649       return 2;
650     }
651     return 0;
652   }
653 
654   /// Returns the modified column limit for \p I if it is inside a macro and
655   /// needs a trailing '\'.
656   unsigned
657   limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
658                          SmallVectorImpl<AnnotatedLine *>::const_iterator E,
659                          unsigned Limit) {
660     if (I[0]->InPPDirective && I + 1 != E &&
661         !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
662       return Limit < 2 ? 0 : Limit - 2;
663     }
664     return Limit;
665   }
666 
667   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
668                            unsigned Limit) {
669     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
670   }
671 
672   bool containsMustBreak(const AnnotatedLine *Line) {
673     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
674       if (Tok->MustBreakBefore)
675         return true;
676     }
677     return false;
678   }
679 
680   const FormatStyle &Style;
681 };
682 
683 class UnwrappedLineFormatter {
684 public:
685   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
686                          WhitespaceManager *Whitespaces,
687                          const FormatStyle &Style)
688       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
689         Joiner(Style) {}
690 
691   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
692                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
693     assert(!Lines.empty());
694     unsigned Penalty = 0;
695     std::vector<int> IndentForLevel;
696     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
697       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
698     const AnnotatedLine *PreviousLine = NULL;
699     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
700                                                           E = Lines.end();
701          I != E; ++I) {
702       const AnnotatedLine &TheLine = **I;
703       const FormatToken *FirstTok = TheLine.First;
704       int Offset = getIndentOffset(*FirstTok);
705 
706       // Determine indent and try to merge multiple unwrapped lines.
707       unsigned Indent;
708       if (TheLine.InPPDirective) {
709         Indent = TheLine.Level * Style.IndentWidth;
710       } else {
711         while (IndentForLevel.size() <= TheLine.Level)
712           IndentForLevel.push_back(-1);
713         IndentForLevel.resize(TheLine.Level + 1);
714         Indent = getIndent(IndentForLevel, TheLine.Level);
715       }
716       unsigned LevelIndent = Indent;
717       if (static_cast<int>(Indent) + Offset >= 0)
718         Indent += Offset;
719 
720       // Merge multiple lines if possible.
721       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
722       if (MergedLines > 0 && Style.ColumnLimit == 0) {
723         // Disallow line merging if there is a break at the start of one of the
724         // input lines.
725         for (unsigned i = 0; i < MergedLines; ++i) {
726           if (I[i + 1]->First->NewlinesBefore > 0)
727             MergedLines = 0;
728         }
729       }
730       if (!DryRun) {
731         for (unsigned i = 0; i < MergedLines; ++i) {
732           join(*I[i], *I[i + 1]);
733         }
734       }
735       I += MergedLines;
736 
737       bool FixIndentation =
738           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
739       if (TheLine.First->is(tok::eof)) {
740         if (PreviousLine && PreviousLine->Affected && !DryRun) {
741           // Remove the file's trailing whitespace.
742           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
743           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
744                                          /*IndentLevel=*/0, /*Spaces=*/0,
745                                          /*TargetColumn=*/0);
746         }
747       } else if (TheLine.Type != LT_Invalid &&
748                  (TheLine.Affected || FixIndentation)) {
749         if (FirstTok->WhitespaceRange.isValid()) {
750           if (!DryRun)
751             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
752                              Indent, TheLine.InPPDirective);
753         } else {
754           Indent = LevelIndent = FirstTok->OriginalColumn;
755         }
756 
757         // If everything fits on a single line, just put it there.
758         unsigned ColumnLimit = Style.ColumnLimit;
759         if (I + 1 != E) {
760           AnnotatedLine *NextLine = I[1];
761           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
762             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
763         }
764 
765         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
766           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
767           while (State.NextToken != NULL)
768             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
769         } else if (Style.ColumnLimit == 0) {
770           // FIXME: Implement nested blocks for ColumnLimit = 0.
771           NoColumnLimitFormatter Formatter(Indenter);
772           if (!DryRun)
773             Formatter.format(Indent, &TheLine);
774         } else {
775           Penalty += format(TheLine, Indent, DryRun);
776         }
777 
778         if (!TheLine.InPPDirective)
779           IndentForLevel[TheLine.Level] = LevelIndent;
780       } else if (TheLine.ChildrenAffected) {
781         format(TheLine.Children, DryRun);
782       } else {
783         // Format the first token if necessary, and notify the WhitespaceManager
784         // about the unchanged whitespace.
785         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
786           if (Tok == TheLine.First &&
787               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
788             unsigned LevelIndent = Tok->OriginalColumn;
789             if (!DryRun) {
790               // Remove trailing whitespace of the previous line.
791               if ((PreviousLine && PreviousLine->Affected) ||
792                   TheLine.LeadingEmptyLinesAffected) {
793                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
794                                  TheLine.InPPDirective);
795               } else {
796                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
797               }
798             }
799 
800             if (static_cast<int>(LevelIndent) - Offset >= 0)
801               LevelIndent -= Offset;
802             if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
803               IndentForLevel[TheLine.Level] = LevelIndent;
804           } else if (!DryRun) {
805             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
806           }
807         }
808       }
809       if (!DryRun) {
810         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
811           Tok->Finalized = true;
812         }
813       }
814       PreviousLine = *I;
815     }
816     return Penalty;
817   }
818 
819 private:
820   /// \brief Formats an \c AnnotatedLine and returns the penalty.
821   ///
822   /// If \p DryRun is \c false, directly applies the changes.
823   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
824                   bool DryRun) {
825     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
826 
827     // If the ObjC method declaration does not fit on a line, we should format
828     // it with one arg per line.
829     if (State.Line->Type == LT_ObjCMethodDecl)
830       State.Stack.back().BreakBeforeParameter = true;
831 
832     // Find best solution in solution space.
833     return analyzeSolutionSpace(State, DryRun);
834   }
835 
836   /// \brief An edge in the solution space from \c Previous->State to \c State,
837   /// inserting a newline dependent on the \c NewLine.
838   struct StateNode {
839     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
840         : State(State), NewLine(NewLine), Previous(Previous) {}
841     LineState State;
842     bool NewLine;
843     StateNode *Previous;
844   };
845 
846   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
847   ///
848   /// In case of equal penalties, we want to prefer states that were inserted
849   /// first. During state generation we make sure that we insert states first
850   /// that break the line as late as possible.
851   typedef std::pair<unsigned, unsigned> OrderedPenalty;
852 
853   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
854   /// \c State has the given \c OrderedPenalty.
855   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
856 
857   /// \brief The BFS queue type.
858   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
859                               std::greater<QueueItem> > QueueType;
860 
861   /// \brief Get the offset of the line relatively to the level.
862   ///
863   /// For example, 'public:' labels in classes are offset by 1 or 2
864   /// characters to the left from their level.
865   int getIndentOffset(const FormatToken &RootToken) {
866     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
867       return Style.AccessModifierOffset;
868     return 0;
869   }
870 
871   /// \brief Add a new line and the required indent before the first Token
872   /// of the \c UnwrappedLine if there was no structural parsing error.
873   void formatFirstToken(FormatToken &RootToken,
874                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
875                         unsigned Indent, bool InPPDirective) {
876     unsigned Newlines =
877         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
878     // Remove empty lines before "}" where applicable.
879     if (RootToken.is(tok::r_brace) &&
880         (!RootToken.Next ||
881          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
882       Newlines = std::min(Newlines, 1u);
883     if (Newlines == 0 && !RootToken.IsFirst)
884       Newlines = 1;
885 
886     // Insert extra new line before access specifiers.
887     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
888         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
889       ++Newlines;
890 
891     // Remove empty lines after access specifiers.
892     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
893       Newlines = std::min(1u, Newlines);
894 
895     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
896                                    Indent, InPPDirective &&
897                                                !RootToken.HasUnescapedNewline);
898   }
899 
900   /// \brief Get the indent of \p Level from \p IndentForLevel.
901   ///
902   /// \p IndentForLevel must contain the indent for the level \c l
903   /// at \p IndentForLevel[l], or a value < 0 if the indent for
904   /// that level is unknown.
905   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
906     if (IndentForLevel[Level] != -1)
907       return IndentForLevel[Level];
908     if (Level == 0)
909       return 0;
910     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
911   }
912 
913   void join(AnnotatedLine &A, const AnnotatedLine &B) {
914     assert(!A.Last->Next);
915     assert(!B.First->Previous);
916     if (B.Affected)
917       A.Affected = true;
918     A.Last->Next = B.First;
919     B.First->Previous = A.Last;
920     B.First->CanBreakBefore = true;
921     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
922     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
923       Tok->TotalLength += LengthA;
924       A.Last = Tok;
925     }
926   }
927 
928   unsigned getColumnLimit(bool InPPDirective) const {
929     // In preprocessor directives reserve two chars for trailing " \"
930     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
931   }
932 
933   /// \brief Analyze the entire solution space starting from \p InitialState.
934   ///
935   /// This implements a variant of Dijkstra's algorithm on the graph that spans
936   /// the solution space (\c LineStates are the nodes). The algorithm tries to
937   /// find the shortest path (the one with lowest penalty) from \p InitialState
938   /// to a state where all tokens are placed. Returns the penalty.
939   ///
940   /// If \p DryRun is \c false, directly applies the changes.
941   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
942     std::set<LineState> Seen;
943 
944     // Increasing count of \c StateNode items we have created. This is used to
945     // create a deterministic order independent of the container.
946     unsigned Count = 0;
947     QueueType Queue;
948 
949     // Insert start element into queue.
950     StateNode *Node =
951         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
952     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
953     ++Count;
954 
955     unsigned Penalty = 0;
956 
957     // While not empty, take first element and follow edges.
958     while (!Queue.empty()) {
959       Penalty = Queue.top().first.first;
960       StateNode *Node = Queue.top().second;
961       if (Node->State.NextToken == NULL) {
962         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
963         break;
964       }
965       Queue.pop();
966 
967       // Cut off the analysis of certain solutions if the analysis gets too
968       // complex. See description of IgnoreStackForComparison.
969       if (Count > 10000)
970         Node->State.IgnoreStackForComparison = true;
971 
972       if (!Seen.insert(Node->State).second)
973         // State already examined with lower penalty.
974         continue;
975 
976       FormatDecision LastFormat = Node->State.NextToken->Decision;
977       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
978         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
979       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
980         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
981     }
982 
983     if (Queue.empty()) {
984       // We were unable to find a solution, do nothing.
985       // FIXME: Add diagnostic?
986       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
987       return 0;
988     }
989 
990     // Reconstruct the solution.
991     if (!DryRun)
992       reconstructPath(InitialState, Queue.top().second);
993 
994     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
995     DEBUG(llvm::dbgs() << "---\n");
996 
997     return Penalty;
998   }
999 
1000   void reconstructPath(LineState &State, StateNode *Current) {
1001     std::deque<StateNode *> Path;
1002     // We do not need a break before the initial token.
1003     while (Current->Previous) {
1004       Path.push_front(Current);
1005       Current = Current->Previous;
1006     }
1007     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
1008          I != E; ++I) {
1009       unsigned Penalty = 0;
1010       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
1011       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
1012 
1013       DEBUG({
1014         if ((*I)->NewLine) {
1015           llvm::dbgs() << "Penalty for placing "
1016                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1017                        << Penalty << "\n";
1018         }
1019       });
1020     }
1021   }
1022 
1023   /// \brief Add the following state to the analysis queue \c Queue.
1024   ///
1025   /// Assume the current state is \p PreviousNode and has been reached with a
1026   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1027   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1028                            bool NewLine, unsigned *Count, QueueType *Queue) {
1029     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1030       return;
1031     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1032       return;
1033 
1034     StateNode *Node = new (Allocator.Allocate())
1035         StateNode(PreviousNode->State, NewLine, PreviousNode);
1036     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1037       return;
1038 
1039     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1040 
1041     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1042     ++(*Count);
1043   }
1044 
1045   /// \brief If the \p State's next token is an r_brace closing a nested block,
1046   /// format the nested block before it.
1047   ///
1048   /// Returns \c true if all children could be placed successfully and adapts
1049   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
1050   /// creates changes using \c Whitespaces.
1051   ///
1052   /// The crucial idea here is that children always get formatted upon
1053   /// encountering the closing brace right after the nested block. Now, if we
1054   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
1055   /// \c false), the entire block has to be kept on the same line (which is only
1056   /// possible if it fits on the line, only contains a single statement, etc.
1057   ///
1058   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1059   /// break after the "{", format all lines with correct indentation and the put
1060   /// the closing "}" on yet another new line.
1061   ///
1062   /// This enables us to keep the simple structure of the
1063   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1064   /// break or don't break.
1065   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1066                       unsigned &Penalty) {
1067     FormatToken &Previous = *State.NextToken->Previous;
1068     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1069     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1070         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1071       // The previous token does not open a block. Nothing to do. We don't
1072       // assert so that we can simply call this function for all tokens.
1073       return true;
1074 
1075     if (NewLine) {
1076       int AdditionalIndent = State.Stack.back().Indent -
1077                              Previous.Children[0]->Level * Style.IndentWidth;
1078       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1079                         /*FixBadIndentation=*/true);
1080       return true;
1081     }
1082 
1083     // Cannot merge multiple statements into a single line.
1084     if (Previous.Children.size() > 1)
1085       return false;
1086 
1087     // We can't put the closing "}" on a line with a trailing comment.
1088     if (Previous.Children[0]->Last->isTrailingComment())
1089       return false;
1090 
1091     if (!DryRun) {
1092       Whitespaces->replaceWhitespace(
1093           *Previous.Children[0]->First,
1094           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1095           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1096     }
1097     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1098 
1099     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1100     return true;
1101   }
1102 
1103   ContinuationIndenter *Indenter;
1104   WhitespaceManager *Whitespaces;
1105   FormatStyle Style;
1106   LineJoiner Joiner;
1107 
1108   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1109 };
1110 
1111 class FormatTokenLexer {
1112 public:
1113   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1114                    encoding::Encoding Encoding)
1115       : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
1116         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
1117         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
1118     Lex.SetKeepWhitespaceMode(true);
1119   }
1120 
1121   ArrayRef<FormatToken *> lex() {
1122     assert(Tokens.empty());
1123     do {
1124       Tokens.push_back(getNextToken());
1125       tryMergePreviousTokens();
1126     } while (Tokens.back()->Tok.isNot(tok::eof));
1127     return Tokens;
1128   }
1129 
1130   IdentifierTable &getIdentTable() { return IdentTable; }
1131 
1132 private:
1133   void tryMergePreviousTokens() {
1134     if (tryMerge_TMacro())
1135       return;
1136 
1137     if (Style.Language == FormatStyle::LK_JavaScript) {
1138       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1139       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1140       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1141                                                tok::greaterequal };
1142       // FIXME: We probably need to change token type to mimic operator with the
1143       // correct priority.
1144       if (tryMergeTokens(JSIdentity))
1145         return;
1146       if (tryMergeTokens(JSNotIdentity))
1147         return;
1148       if (tryMergeTokens(JSShiftEqual))
1149         return;
1150     }
1151   }
1152 
1153   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1154     if (Tokens.size() < Kinds.size())
1155       return false;
1156 
1157     SmallVectorImpl<FormatToken *>::const_iterator First =
1158         Tokens.end() - Kinds.size();
1159     if (!First[0]->is(Kinds[0]))
1160       return false;
1161     unsigned AddLength = 0;
1162     for (unsigned i = 1; i < Kinds.size(); ++i) {
1163       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1164                                          First[i]->WhitespaceRange.getEnd())
1165         return false;
1166       AddLength += First[i]->TokenText.size();
1167     }
1168     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1169     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1170                                     First[0]->TokenText.size() + AddLength);
1171     First[0]->ColumnWidth += AddLength;
1172     return true;
1173   }
1174 
1175   bool tryMerge_TMacro() {
1176     if (Tokens.size() < 4)
1177       return false;
1178     FormatToken *Last = Tokens.back();
1179     if (!Last->is(tok::r_paren))
1180       return false;
1181 
1182     FormatToken *String = Tokens[Tokens.size() - 2];
1183     if (!String->is(tok::string_literal) || String->IsMultiline)
1184       return false;
1185 
1186     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1187       return false;
1188 
1189     FormatToken *Macro = Tokens[Tokens.size() - 4];
1190     if (Macro->TokenText != "_T")
1191       return false;
1192 
1193     const char *Start = Macro->TokenText.data();
1194     const char *End = Last->TokenText.data() + Last->TokenText.size();
1195     String->TokenText = StringRef(Start, End - Start);
1196     String->IsFirst = Macro->IsFirst;
1197     String->LastNewlineOffset = Macro->LastNewlineOffset;
1198     String->WhitespaceRange = Macro->WhitespaceRange;
1199     String->OriginalColumn = Macro->OriginalColumn;
1200     String->ColumnWidth = encoding::columnWidthWithTabs(
1201         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1202 
1203     Tokens.pop_back();
1204     Tokens.pop_back();
1205     Tokens.pop_back();
1206     Tokens.back() = String;
1207     return true;
1208   }
1209 
1210   FormatToken *getNextToken() {
1211     if (GreaterStashed) {
1212       // Create a synthesized second '>' token.
1213       // FIXME: Increment Column and set OriginalColumn.
1214       Token Greater = FormatTok->Tok;
1215       FormatTok = new (Allocator.Allocate()) FormatToken;
1216       FormatTok->Tok = Greater;
1217       SourceLocation GreaterLocation =
1218           FormatTok->Tok.getLocation().getLocWithOffset(1);
1219       FormatTok->WhitespaceRange =
1220           SourceRange(GreaterLocation, GreaterLocation);
1221       FormatTok->TokenText = ">";
1222       FormatTok->ColumnWidth = 1;
1223       GreaterStashed = false;
1224       return FormatTok;
1225     }
1226 
1227     FormatTok = new (Allocator.Allocate()) FormatToken;
1228     readRawToken(*FormatTok);
1229     SourceLocation WhitespaceStart =
1230         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1231     FormatTok->IsFirst = IsFirstToken;
1232     IsFirstToken = false;
1233 
1234     // Consume and record whitespace until we find a significant token.
1235     unsigned WhitespaceLength = TrailingWhitespace;
1236     while (FormatTok->Tok.is(tok::unknown)) {
1237       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1238         switch (FormatTok->TokenText[i]) {
1239         case '\n':
1240           ++FormatTok->NewlinesBefore;
1241           // FIXME: This is technically incorrect, as it could also
1242           // be a literal backslash at the end of the line.
1243           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1244                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1245                           FormatTok->TokenText[i - 2] != '\\')))
1246             FormatTok->HasUnescapedNewline = true;
1247           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1248           Column = 0;
1249           break;
1250         case '\r':
1251         case '\f':
1252         case '\v':
1253           Column = 0;
1254           break;
1255         case ' ':
1256           ++Column;
1257           break;
1258         case '\t':
1259           Column += Style.TabWidth - Column % Style.TabWidth;
1260           break;
1261         case '\\':
1262           ++Column;
1263           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1264                              FormatTok->TokenText[i + 1] != '\n'))
1265             FormatTok->Type = TT_ImplicitStringLiteral;
1266           break;
1267         default:
1268           FormatTok->Type = TT_ImplicitStringLiteral;
1269           ++Column;
1270           break;
1271         }
1272       }
1273 
1274       if (FormatTok->Type == TT_ImplicitStringLiteral)
1275         break;
1276       WhitespaceLength += FormatTok->Tok.getLength();
1277 
1278       readRawToken(*FormatTok);
1279     }
1280 
1281     // In case the token starts with escaped newlines, we want to
1282     // take them into account as whitespace - this pattern is quite frequent
1283     // in macro definitions.
1284     // FIXME: Add a more explicit test.
1285     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1286            FormatTok->TokenText[1] == '\n') {
1287       // FIXME: ++FormatTok->NewlinesBefore is missing...
1288       WhitespaceLength += 2;
1289       Column = 0;
1290       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1291     }
1292 
1293     FormatTok->WhitespaceRange = SourceRange(
1294         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1295 
1296     FormatTok->OriginalColumn = Column;
1297 
1298     TrailingWhitespace = 0;
1299     if (FormatTok->Tok.is(tok::comment)) {
1300       // FIXME: Add the trimmed whitespace to Column.
1301       StringRef UntrimmedText = FormatTok->TokenText;
1302       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1303       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1304     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1305       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1306       FormatTok->Tok.setIdentifierInfo(&Info);
1307       FormatTok->Tok.setKind(Info.getTokenID());
1308     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1309       FormatTok->Tok.setKind(tok::greater);
1310       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1311       GreaterStashed = true;
1312     }
1313 
1314     // Now FormatTok is the next non-whitespace token.
1315 
1316     StringRef Text = FormatTok->TokenText;
1317     size_t FirstNewlinePos = Text.find('\n');
1318     if (FirstNewlinePos == StringRef::npos) {
1319       // FIXME: ColumnWidth actually depends on the start column, we need to
1320       // take this into account when the token is moved.
1321       FormatTok->ColumnWidth =
1322           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1323       Column += FormatTok->ColumnWidth;
1324     } else {
1325       FormatTok->IsMultiline = true;
1326       // FIXME: ColumnWidth actually depends on the start column, we need to
1327       // take this into account when the token is moved.
1328       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1329           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1330 
1331       // The last line of the token always starts in column 0.
1332       // Thus, the length can be precomputed even in the presence of tabs.
1333       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1334           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1335           Encoding);
1336       Column = FormatTok->LastLineColumnWidth;
1337     }
1338 
1339     return FormatTok;
1340   }
1341 
1342   FormatToken *FormatTok;
1343   bool IsFirstToken;
1344   bool GreaterStashed;
1345   unsigned Column;
1346   unsigned TrailingWhitespace;
1347   Lexer &Lex;
1348   SourceManager &SourceMgr;
1349   FormatStyle &Style;
1350   IdentifierTable IdentTable;
1351   encoding::Encoding Encoding;
1352   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1353   SmallVector<FormatToken *, 16> Tokens;
1354 
1355   void readRawToken(FormatToken &Tok) {
1356     Lex.LexFromRawLexer(Tok.Tok);
1357     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1358                               Tok.Tok.getLength());
1359     // For formatting, treat unterminated string literals like normal string
1360     // literals.
1361     if (Tok.is(tok::unknown)) {
1362       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1363         Tok.Tok.setKind(tok::string_literal);
1364         Tok.IsUnterminatedLiteral = true;
1365       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1366                  Tok.TokenText == "''") {
1367         Tok.Tok.setKind(tok::char_constant);
1368       }
1369     }
1370   }
1371 };
1372 
1373 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1374   switch (Language) {
1375   case FormatStyle::LK_Cpp:
1376     return "C++";
1377   case FormatStyle::LK_JavaScript:
1378     return "JavaScript";
1379   case FormatStyle::LK_Proto:
1380     return "Proto";
1381   default:
1382     return "Unknown";
1383   }
1384 }
1385 
1386 class Formatter : public UnwrappedLineConsumer {
1387 public:
1388   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1389             const std::vector<CharSourceRange> &Ranges)
1390       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1391         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1392         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1393         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1394     DEBUG(llvm::dbgs() << "File encoding: "
1395                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1396                                                                : "unknown")
1397                        << "\n");
1398     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1399                        << "\n");
1400   }
1401 
1402   tooling::Replacements format() {
1403     tooling::Replacements Result;
1404     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1405 
1406     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1407     bool StructuralError = Parser.parse();
1408     assert(UnwrappedLines.rbegin()->empty());
1409     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1410          ++Run) {
1411       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1412       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1413       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1414         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1415       }
1416       tooling::Replacements RunResult =
1417           format(AnnotatedLines, StructuralError, Tokens);
1418       DEBUG({
1419         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1420         for (tooling::Replacements::iterator I = RunResult.begin(),
1421                                              E = RunResult.end();
1422              I != E; ++I) {
1423           llvm::dbgs() << I->toString() << "\n";
1424         }
1425       });
1426       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1427         delete AnnotatedLines[i];
1428       }
1429       Result.insert(RunResult.begin(), RunResult.end());
1430       Whitespaces.reset();
1431     }
1432     return Result;
1433   }
1434 
1435   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1436                                bool StructuralError, FormatTokenLexer &Tokens) {
1437     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1438     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1439       Annotator.annotate(*AnnotatedLines[i]);
1440     }
1441     deriveLocalStyle(AnnotatedLines);
1442     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1443       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1444     }
1445     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1446 
1447     Annotator.setCommentLineLevels(AnnotatedLines);
1448     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1449                                   BinPackInconclusiveFunctions);
1450     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1451     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1452     return Whitespaces.generateReplacements();
1453   }
1454 
1455 private:
1456   // Determines which lines are affected by the SourceRanges given as input.
1457   // Returns \c true if at least one line between I and E or one of their
1458   // children is affected.
1459   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1460                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1461     bool SomeLineAffected = false;
1462     const AnnotatedLine *PreviousLine = NULL;
1463     while (I != E) {
1464       AnnotatedLine *Line = *I;
1465       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1466 
1467       // If a line is part of a preprocessor directive, it needs to be formatted
1468       // if any token within the directive is affected.
1469       if (Line->InPPDirective) {
1470         FormatToken *Last = Line->Last;
1471         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1472         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1473           Last = (*PPEnd)->Last;
1474           ++PPEnd;
1475         }
1476 
1477         if (affectsTokenRange(*Line->First, *Last,
1478                               /*IncludeLeadingNewlines=*/false)) {
1479           SomeLineAffected = true;
1480           markAllAsAffected(I, PPEnd);
1481         }
1482         I = PPEnd;
1483         continue;
1484       }
1485 
1486       if (nonPPLineAffected(Line, PreviousLine))
1487         SomeLineAffected = true;
1488 
1489       PreviousLine = Line;
1490       ++I;
1491     }
1492     return SomeLineAffected;
1493   }
1494 
1495   // Determines whether 'Line' is affected by the SourceRanges given as input.
1496   // Returns \c true if line or one if its children is affected.
1497   bool nonPPLineAffected(AnnotatedLine *Line,
1498                          const AnnotatedLine *PreviousLine) {
1499     bool SomeLineAffected = false;
1500     Line->ChildrenAffected =
1501         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1502     if (Line->ChildrenAffected)
1503       SomeLineAffected = true;
1504 
1505     // Stores whether one of the line's tokens is directly affected.
1506     bool SomeTokenAffected = false;
1507     // Stores whether we need to look at the leading newlines of the next token
1508     // in order to determine whether it was affected.
1509     bool IncludeLeadingNewlines = false;
1510 
1511     // Stores whether the first child line of any of this line's tokens is
1512     // affected.
1513     bool SomeFirstChildAffected = false;
1514 
1515     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1516       // Determine whether 'Tok' was affected.
1517       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1518         SomeTokenAffected = true;
1519 
1520       // Determine whether the first child of 'Tok' was affected.
1521       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1522         SomeFirstChildAffected = true;
1523 
1524       IncludeLeadingNewlines = Tok->Children.empty();
1525     }
1526 
1527     // Was this line moved, i.e. has it previously been on the same line as an
1528     // affected line?
1529     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1530                      Line->First->NewlinesBefore == 0;
1531 
1532     bool IsContinuedComment = Line->First->is(tok::comment) &&
1533                               Line->First->Next == NULL &&
1534                               Line->First->NewlinesBefore < 2 && PreviousLine &&
1535                               PreviousLine->Affected &&
1536                               PreviousLine->Last->is(tok::comment);
1537 
1538     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1539         IsContinuedComment) {
1540       Line->Affected = true;
1541       SomeLineAffected = true;
1542     }
1543     return SomeLineAffected;
1544   }
1545 
1546   // Marks all lines between I and E as well as all their children as affected.
1547   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1548                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1549     while (I != E) {
1550       (*I)->Affected = true;
1551       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1552       ++I;
1553     }
1554   }
1555 
1556   // Returns true if the range from 'First' to 'Last' intersects with one of the
1557   // input ranges.
1558   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1559                          bool IncludeLeadingNewlines) {
1560     SourceLocation Start = First.WhitespaceRange.getBegin();
1561     if (!IncludeLeadingNewlines)
1562       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1563     SourceLocation End = Last.getStartOfNonWhitespace();
1564     if (Last.TokenText.size() > 0)
1565       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1566     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1567     return affectsCharSourceRange(Range);
1568   }
1569 
1570   // Returns true if one of the input ranges intersect the leading empty lines
1571   // before 'Tok'.
1572   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1573     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1574         Tok.WhitespaceRange.getBegin(),
1575         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1576     return affectsCharSourceRange(EmptyLineRange);
1577   }
1578 
1579   // Returns true if 'Range' intersects with one of the input ranges.
1580   bool affectsCharSourceRange(const CharSourceRange &Range) {
1581     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1582                                                           E = Ranges.end();
1583          I != E; ++I) {
1584       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1585           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1586         return true;
1587     }
1588     return false;
1589   }
1590 
1591   static bool inputUsesCRLF(StringRef Text) {
1592     return Text.count('\r') * 2 > Text.count('\n');
1593   }
1594 
1595   void
1596   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1597     unsigned CountBoundToVariable = 0;
1598     unsigned CountBoundToType = 0;
1599     bool HasCpp03IncompatibleFormat = false;
1600     bool HasBinPackedFunction = false;
1601     bool HasOnePerLineFunction = false;
1602     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1603       if (!AnnotatedLines[i]->First->Next)
1604         continue;
1605       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1606       while (Tok->Next) {
1607         if (Tok->Type == TT_PointerOrReference) {
1608           bool SpacesBefore =
1609               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1610           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1611                              Tok->Next->WhitespaceRange.getEnd();
1612           if (SpacesBefore && !SpacesAfter)
1613             ++CountBoundToVariable;
1614           else if (!SpacesBefore && SpacesAfter)
1615             ++CountBoundToType;
1616         }
1617 
1618         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1619           if (Tok->is(tok::coloncolon) &&
1620               Tok->Previous->Type == TT_TemplateOpener)
1621             HasCpp03IncompatibleFormat = true;
1622           if (Tok->Type == TT_TemplateCloser &&
1623               Tok->Previous->Type == TT_TemplateCloser)
1624             HasCpp03IncompatibleFormat = true;
1625         }
1626 
1627         if (Tok->PackingKind == PPK_BinPacked)
1628           HasBinPackedFunction = true;
1629         if (Tok->PackingKind == PPK_OnePerLine)
1630           HasOnePerLineFunction = true;
1631 
1632         Tok = Tok->Next;
1633       }
1634     }
1635     if (Style.DerivePointerBinding) {
1636       if (CountBoundToType > CountBoundToVariable)
1637         Style.PointerBindsToType = true;
1638       else if (CountBoundToType < CountBoundToVariable)
1639         Style.PointerBindsToType = false;
1640     }
1641     if (Style.Standard == FormatStyle::LS_Auto) {
1642       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1643                                                   : FormatStyle::LS_Cpp03;
1644     }
1645     BinPackInconclusiveFunctions =
1646         HasBinPackedFunction || !HasOnePerLineFunction;
1647   }
1648 
1649   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1650     assert(!UnwrappedLines.empty());
1651     UnwrappedLines.back().push_back(TheLine);
1652   }
1653 
1654   virtual void finishRun() {
1655     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1656   }
1657 
1658   FormatStyle Style;
1659   Lexer &Lex;
1660   SourceManager &SourceMgr;
1661   WhitespaceManager Whitespaces;
1662   SmallVector<CharSourceRange, 8> Ranges;
1663   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1664 
1665   encoding::Encoding Encoding;
1666   bool BinPackInconclusiveFunctions;
1667 };
1668 
1669 } // end anonymous namespace
1670 
1671 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1672                                SourceManager &SourceMgr,
1673                                std::vector<CharSourceRange> Ranges) {
1674   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1675   return formatter.format();
1676 }
1677 
1678 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1679                                std::vector<tooling::Range> Ranges,
1680                                StringRef FileName) {
1681   FileManager Files((FileSystemOptions()));
1682   DiagnosticsEngine Diagnostics(
1683       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1684       new DiagnosticOptions);
1685   SourceManager SourceMgr(Diagnostics, Files);
1686   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1687   const clang::FileEntry *Entry =
1688       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1689   SourceMgr.overrideFileContents(Entry, Buf);
1690   FileID ID =
1691       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1692   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1693             getFormattingLangOpts(Style.Standard));
1694   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1695   std::vector<CharSourceRange> CharRanges;
1696   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1697     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1698     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1699     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1700   }
1701   return reformat(Style, Lex, SourceMgr, CharRanges);
1702 }
1703 
1704 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1705   LangOptions LangOpts;
1706   LangOpts.CPlusPlus = 1;
1707   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1708   LangOpts.LineComment = 1;
1709   LangOpts.Bool = 1;
1710   LangOpts.ObjC1 = 1;
1711   LangOpts.ObjC2 = 1;
1712   return LangOpts;
1713 }
1714 
1715 const char *StyleOptionHelpDescription =
1716     "Coding style, currently supports:\n"
1717     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1718     "Use -style=file to load style configuration from\n"
1719     ".clang-format file located in one of the parent\n"
1720     "directories of the source file (or current\n"
1721     "directory for stdin).\n"
1722     "Use -style=\"{key: value, ...}\" to set specific\n"
1723     "parameters, e.g.:\n"
1724     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1725 
1726 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1727   if (FileName.endswith_lower(".js")) {
1728     return FormatStyle::LK_JavaScript;
1729   } else if (FileName.endswith_lower(".proto") ||
1730              FileName.endswith_lower(".protodevel")) {
1731     return FormatStyle::LK_Proto;
1732   }
1733   return FormatStyle::LK_Cpp;
1734 }
1735 
1736 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1737                      StringRef FallbackStyle) {
1738   FormatStyle Style = getLLVMStyle();
1739   Style.Language = getLanguageByFileName(FileName);
1740   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1741     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1742                  << "\" using LLVM style\n";
1743     return Style;
1744   }
1745 
1746   if (StyleName.startswith("{")) {
1747     // Parse YAML/JSON style from the command line.
1748     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1749       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1750                    << FallbackStyle << " style\n";
1751     }
1752     return Style;
1753   }
1754 
1755   if (!StyleName.equals_lower("file")) {
1756     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1757       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1758                    << " style\n";
1759     return Style;
1760   }
1761 
1762   // Look for .clang-format/_clang-format file in the file's parent directories.
1763   SmallString<128> UnsuitableConfigFiles;
1764   SmallString<128> Path(FileName);
1765   llvm::sys::fs::make_absolute(Path);
1766   for (StringRef Directory = Path; !Directory.empty();
1767        Directory = llvm::sys::path::parent_path(Directory)) {
1768     if (!llvm::sys::fs::is_directory(Directory))
1769       continue;
1770     SmallString<128> ConfigFile(Directory);
1771 
1772     llvm::sys::path::append(ConfigFile, ".clang-format");
1773     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1774     bool IsFile = false;
1775     // Ignore errors from is_regular_file: we only need to know if we can read
1776     // the file or not.
1777     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1778 
1779     if (!IsFile) {
1780       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1781       ConfigFile = Directory;
1782       llvm::sys::path::append(ConfigFile, "_clang-format");
1783       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1784       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1785     }
1786 
1787     if (IsFile) {
1788       OwningPtr<llvm::MemoryBuffer> Text;
1789       if (llvm::error_code ec =
1790               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1791         llvm::errs() << ec.message() << "\n";
1792         break;
1793       }
1794       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1795         if (ec == llvm::errc::not_supported) {
1796           if (!UnsuitableConfigFiles.empty())
1797             UnsuitableConfigFiles.append(", ");
1798           UnsuitableConfigFiles.append(ConfigFile);
1799           continue;
1800         }
1801         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1802                      << "\n";
1803         break;
1804       }
1805       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1806       return Style;
1807     }
1808   }
1809   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1810                << " style\n";
1811   if (!UnsuitableConfigFiles.empty()) {
1812     llvm::errs() << "Configuration file(s) do(es) not support "
1813                  << getLanguageName(Style.Language) << ": "
1814                  << UnsuitableConfigFiles << "\n";
1815   }
1816   return Style;
1817 }
1818 
1819 } // namespace format
1820 } // namespace clang
1821