1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-formatter"
17 
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/YAMLTraits.h"
30 #include "llvm/Support/Path.h"
31 #include <queue>
32 #include <string>
33 
34 namespace llvm {
35 namespace yaml {
36 template <>
37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageKind> {
38   static void enumeration(IO &IO,
39                           clang::format::FormatStyle::LanguageKind &Value) {
40     IO.enumCase(Value, "Cpp", clang::format::FormatStyle::LK_Cpp);
41     IO.enumCase(Value, "JavaScript", clang::format::FormatStyle::LK_JavaScript);
42   }
43 };
44 
45 template <>
46 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
47   static void enumeration(IO &IO,
48                           clang::format::FormatStyle::LanguageStandard &Value) {
49     IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03);
50     IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
51     IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11);
52     IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
53     IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
54   }
55 };
56 
57 template <>
58 struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> {
59   static void enumeration(IO &IO,
60                           clang::format::FormatStyle::UseTabStyle &Value) {
61     IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never);
62     IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never);
63     IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always);
64     IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always);
65     IO.enumCase(Value, "ForIndentation",
66                 clang::format::FormatStyle::UT_ForIndentation);
67   }
68 };
69 
70 template <>
71 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
72   static void
73   enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
74     IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
75     IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
76     IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
77     IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
78   }
79 };
80 
81 template <>
82 struct ScalarEnumerationTraits<
83     clang::format::FormatStyle::NamespaceIndentationKind> {
84   static void
85   enumeration(IO &IO,
86               clang::format::FormatStyle::NamespaceIndentationKind &Value) {
87     IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
88     IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
89     IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
90   }
91 };
92 
93 template <> struct MappingTraits<clang::format::FormatStyle> {
94   static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
95     if (IO.outputting()) {
96       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
97                                   "Mozilla", "WebKit" };
98       ArrayRef<StringRef> Styles(StylesArray);
99       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
100         StringRef StyleName(Styles[i]);
101         clang::format::FormatStyle PredefinedStyle;
102         if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
103             Style == PredefinedStyle) {
104           IO.mapOptional("# BasedOnStyle", StyleName);
105           break;
106         }
107       }
108     } else {
109       StringRef BasedOnStyle;
110       IO.mapOptional("BasedOnStyle", BasedOnStyle);
111       if (!BasedOnStyle.empty()) {
112         clang::format::FormatStyle::LanguageKind Language = Style.Language;
113         if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
114           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
115           return;
116         }
117         Style.Language = Language;
118       }
119     }
120 
121     IO.mapOptional("Language", Style.Language);
122     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
123     IO.mapOptional("ConstructorInitializerIndentWidth",
124                    Style.ConstructorInitializerIndentWidth);
125     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
126     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
127     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
128                    Style.AllowAllParametersOfDeclarationOnNextLine);
129     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
130                    Style.AllowShortIfStatementsOnASingleLine);
131     IO.mapOptional("AllowShortLoopsOnASingleLine",
132                    Style.AllowShortLoopsOnASingleLine);
133     IO.mapOptional("AllowShortFunctionsOnASingleLine",
134                    Style.AllowShortFunctionsOnASingleLine);
135     IO.mapOptional("AlwaysBreakTemplateDeclarations",
136                    Style.AlwaysBreakTemplateDeclarations);
137     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
138                    Style.AlwaysBreakBeforeMultilineStrings);
139     IO.mapOptional("BreakBeforeBinaryOperators",
140                    Style.BreakBeforeBinaryOperators);
141     IO.mapOptional("BreakBeforeTernaryOperators",
142                    Style.BreakBeforeTernaryOperators);
143     IO.mapOptional("BreakConstructorInitializersBeforeComma",
144                    Style.BreakConstructorInitializersBeforeComma);
145     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
146     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
147     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
148                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
149     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
150     IO.mapOptional("ExperimentalAutoDetectBinPacking",
151                    Style.ExperimentalAutoDetectBinPacking);
152     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
153     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
154     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
155     IO.mapOptional("ObjCSpaceBeforeProtocolList",
156                    Style.ObjCSpaceBeforeProtocolList);
157     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
158                    Style.PenaltyBreakBeforeFirstCallParameter);
159     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
160     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
161     IO.mapOptional("PenaltyBreakFirstLessLess",
162                    Style.PenaltyBreakFirstLessLess);
163     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
164     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
165                    Style.PenaltyReturnTypeOnItsOwnLine);
166     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
167     IO.mapOptional("SpacesBeforeTrailingComments",
168                    Style.SpacesBeforeTrailingComments);
169     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
170     IO.mapOptional("Standard", Style.Standard);
171     IO.mapOptional("IndentWidth", Style.IndentWidth);
172     IO.mapOptional("TabWidth", Style.TabWidth);
173     IO.mapOptional("UseTab", Style.UseTab);
174     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
175     IO.mapOptional("IndentFunctionDeclarationAfterType",
176                    Style.IndentFunctionDeclarationAfterType);
177     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
178     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
179     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
180     IO.mapOptional("SpacesInCStyleCastParentheses",
181                    Style.SpacesInCStyleCastParentheses);
182     IO.mapOptional("SpaceAfterControlStatementKeyword",
183                    Style.SpaceAfterControlStatementKeyword);
184     IO.mapOptional("SpaceBeforeAssignmentOperators",
185                    Style.SpaceBeforeAssignmentOperators);
186     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
187   }
188 };
189 
190 // Allows to read vector<FormatStyle> while keeping default values.
191 // Elements will be written or read starting from the 1st element.
192 // When writing, the 0th element is ignored.
193 // When reading, keys that are not present in the serialized form will be
194 // copied from the 0th element of the vector. If the first element had no
195 // Language specified, it will be treated as the default one for the following
196 // elements.
197 template <>
198 struct DocumentListTraits<std::vector<clang::format::FormatStyle> > {
199   static size_t size(IO &io, std::vector<clang::format::FormatStyle> &Seq) {
200     return Seq.size() - 1;
201   }
202   static clang::format::FormatStyle &
203   element(IO &io, std::vector<clang::format::FormatStyle> &Seq, size_t Index) {
204     if (Index + 2 > Seq.size()) {
205       assert(Index + 2 == Seq.size() + 1);
206       clang::format::FormatStyle Template;
207       if (Seq.size() > 1 &&
208           Seq[1].Language == clang::format::FormatStyle::LK_None) {
209         Template = Seq[1];
210       } else {
211         Template = Seq[0];
212         Template.Language = clang::format::FormatStyle::LK_None;
213       }
214       Seq.resize(Index + 2, Template);
215     }
216     return Seq[Index + 1];
217   }
218 };
219 }
220 }
221 
222 namespace clang {
223 namespace format {
224 
225 void setDefaultPenalties(FormatStyle &Style) {
226   Style.PenaltyBreakComment = 60;
227   Style.PenaltyBreakFirstLessLess = 120;
228   Style.PenaltyBreakString = 1000;
229   Style.PenaltyExcessCharacter = 1000000;
230 }
231 
232 FormatStyle getLLVMStyle() {
233   FormatStyle LLVMStyle;
234   LLVMStyle.Language = FormatStyle::LK_Cpp;
235   LLVMStyle.AccessModifierOffset = -2;
236   LLVMStyle.AlignEscapedNewlinesLeft = false;
237   LLVMStyle.AlignTrailingComments = true;
238   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
239   LLVMStyle.AllowShortFunctionsOnASingleLine = true;
240   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
241   LLVMStyle.AllowShortLoopsOnASingleLine = false;
242   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
243   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
244   LLVMStyle.BinPackParameters = true;
245   LLVMStyle.BreakBeforeBinaryOperators = false;
246   LLVMStyle.BreakBeforeTernaryOperators = true;
247   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
248   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
249   LLVMStyle.ColumnLimit = 80;
250   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
251   LLVMStyle.ConstructorInitializerIndentWidth = 4;
252   LLVMStyle.Cpp11BracedListStyle = false;
253   LLVMStyle.DerivePointerBinding = false;
254   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
255   LLVMStyle.IndentCaseLabels = false;
256   LLVMStyle.IndentFunctionDeclarationAfterType = false;
257   LLVMStyle.IndentWidth = 2;
258   LLVMStyle.TabWidth = 8;
259   LLVMStyle.MaxEmptyLinesToKeep = 1;
260   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
261   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
262   LLVMStyle.PointerBindsToType = false;
263   LLVMStyle.SpacesBeforeTrailingComments = 1;
264   LLVMStyle.Standard = FormatStyle::LS_Cpp03;
265   LLVMStyle.UseTab = FormatStyle::UT_Never;
266   LLVMStyle.SpacesInParentheses = false;
267   LLVMStyle.SpaceInEmptyParentheses = false;
268   LLVMStyle.SpacesInCStyleCastParentheses = false;
269   LLVMStyle.SpaceAfterControlStatementKeyword = true;
270   LLVMStyle.SpaceBeforeAssignmentOperators = true;
271   LLVMStyle.ContinuationIndentWidth = 4;
272   LLVMStyle.SpacesInAngles = false;
273 
274   setDefaultPenalties(LLVMStyle);
275   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
276   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
277 
278   return LLVMStyle;
279 }
280 
281 FormatStyle getGoogleStyle() {
282   FormatStyle GoogleStyle;
283   GoogleStyle.Language = FormatStyle::LK_Cpp;
284   GoogleStyle.AccessModifierOffset = -1;
285   GoogleStyle.AlignEscapedNewlinesLeft = true;
286   GoogleStyle.AlignTrailingComments = true;
287   GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
288   GoogleStyle.AllowShortFunctionsOnASingleLine = true;
289   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
290   GoogleStyle.AllowShortLoopsOnASingleLine = true;
291   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
292   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
293   GoogleStyle.BinPackParameters = true;
294   GoogleStyle.BreakBeforeBinaryOperators = false;
295   GoogleStyle.BreakBeforeTernaryOperators = true;
296   GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
297   GoogleStyle.BreakConstructorInitializersBeforeComma = false;
298   GoogleStyle.ColumnLimit = 80;
299   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
300   GoogleStyle.ConstructorInitializerIndentWidth = 4;
301   GoogleStyle.Cpp11BracedListStyle = true;
302   GoogleStyle.DerivePointerBinding = true;
303   GoogleStyle.ExperimentalAutoDetectBinPacking = false;
304   GoogleStyle.IndentCaseLabels = true;
305   GoogleStyle.IndentFunctionDeclarationAfterType = true;
306   GoogleStyle.IndentWidth = 2;
307   GoogleStyle.TabWidth = 8;
308   GoogleStyle.MaxEmptyLinesToKeep = 1;
309   GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
310   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
311   GoogleStyle.PointerBindsToType = true;
312   GoogleStyle.SpacesBeforeTrailingComments = 2;
313   GoogleStyle.Standard = FormatStyle::LS_Auto;
314   GoogleStyle.UseTab = FormatStyle::UT_Never;
315   GoogleStyle.SpacesInParentheses = false;
316   GoogleStyle.SpaceInEmptyParentheses = false;
317   GoogleStyle.SpacesInCStyleCastParentheses = false;
318   GoogleStyle.SpaceAfterControlStatementKeyword = true;
319   GoogleStyle.SpaceBeforeAssignmentOperators = true;
320   GoogleStyle.ContinuationIndentWidth = 4;
321   GoogleStyle.SpacesInAngles = false;
322 
323   setDefaultPenalties(GoogleStyle);
324   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
325   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
326 
327   return GoogleStyle;
328 }
329 
330 FormatStyle getChromiumStyle() {
331   FormatStyle ChromiumStyle = getGoogleStyle();
332   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
333   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
334   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
335   ChromiumStyle.BinPackParameters = false;
336   ChromiumStyle.DerivePointerBinding = false;
337   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
338   return ChromiumStyle;
339 }
340 
341 FormatStyle getMozillaStyle() {
342   FormatStyle MozillaStyle = getLLVMStyle();
343   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
344   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
345   MozillaStyle.DerivePointerBinding = true;
346   MozillaStyle.IndentCaseLabels = true;
347   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
348   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
349   MozillaStyle.PointerBindsToType = true;
350   return MozillaStyle;
351 }
352 
353 FormatStyle getWebKitStyle() {
354   FormatStyle Style = getLLVMStyle();
355   Style.AccessModifierOffset = -4;
356   Style.AlignTrailingComments = false;
357   Style.BreakBeforeBinaryOperators = true;
358   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
359   Style.BreakConstructorInitializersBeforeComma = true;
360   Style.ColumnLimit = 0;
361   Style.IndentWidth = 4;
362   Style.NamespaceIndentation = FormatStyle::NI_Inner;
363   Style.PointerBindsToType = true;
364   return Style;
365 }
366 
367 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
368   if (Name.equals_lower("llvm"))
369     *Style = getLLVMStyle();
370   else if (Name.equals_lower("chromium"))
371     *Style = getChromiumStyle();
372   else if (Name.equals_lower("mozilla"))
373     *Style = getMozillaStyle();
374   else if (Name.equals_lower("google"))
375     *Style = getGoogleStyle();
376   else if (Name.equals_lower("webkit"))
377     *Style = getWebKitStyle();
378   else
379     return false;
380 
381   return true;
382 }
383 
384 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
385   assert(Style);
386   assert(Style->Language != FormatStyle::LK_None);
387   if (Text.trim().empty())
388     return llvm::make_error_code(llvm::errc::invalid_argument);
389 
390   std::vector<FormatStyle> Styles;
391   // DocumentListTraits<vector<FormatStyle>> uses 0th element as the default one
392   // for the fields, keys for which are missing from the configuration.
393   Styles.push_back(*Style);
394   llvm::yaml::Input Input(Text);
395   Input >> Styles;
396   if (Input.error())
397     return Input.error();
398 
399   for (unsigned i = 1; i < Styles.size(); ++i) {
400     // Ensures that only the first configuration can skip the Language option.
401     if (Styles[i].Language == FormatStyle::LK_None && i != 1)
402       return llvm::make_error_code(llvm::errc::invalid_argument);
403     // Ensure that each language is configured at most once.
404     for (unsigned j = 1; j < i; ++j) {
405       if (Styles[i].Language == Styles[j].Language)
406         return llvm::make_error_code(llvm::errc::invalid_argument);
407     }
408   }
409   // Look for a suitable configuration starting from the end, so we can
410   // find the configuration for the specific language first, and the default
411   // configuration (which can only be at slot 1) after it.
412   for (unsigned i = Styles.size() - 1; i > 0; --i) {
413     if (Styles[i].Language == Styles[0].Language ||
414         Styles[i].Language == FormatStyle::LK_None) {
415       *Style = Styles[i];
416       Style->Language = Styles[0].Language;
417       return llvm::make_error_code(llvm::errc::success);
418     }
419   }
420   return llvm::make_error_code(llvm::errc::not_supported);
421 }
422 
423 std::string configurationAsText(const FormatStyle &Style) {
424   std::string Text;
425   llvm::raw_string_ostream Stream(Text);
426   llvm::yaml::Output Output(Stream);
427   // We use the same mapping method for input and output, so we need a non-const
428   // reference here.
429   FormatStyle NonConstStyle = Style;
430   Output << NonConstStyle;
431   return Stream.str();
432 }
433 
434 namespace {
435 
436 class NoColumnLimitFormatter {
437 public:
438   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
439 
440   /// \brief Formats the line starting at \p State, simply keeping all of the
441   /// input's line breaking decisions.
442   void format(unsigned FirstIndent, const AnnotatedLine *Line,
443               bool LineIsMerged) {
444     LineState State =
445         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
446     while (State.NextToken != NULL) {
447       bool Newline =
448           (!LineIsMerged && Indenter->mustBreak(State)) ||
449           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
450       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
451     }
452   }
453 
454 private:
455   ContinuationIndenter *Indenter;
456 };
457 
458 class LineJoiner {
459 public:
460   LineJoiner(const FormatStyle &Style) : Style(Style) {}
461 
462   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
463   unsigned
464   tryFitMultipleLinesInOne(unsigned Indent,
465                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
466                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
467     // We can never merge stuff if there are trailing line comments.
468     AnnotatedLine *TheLine = *I;
469     if (TheLine->Last->Type == TT_LineComment)
470       return 0;
471 
472     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
473       return 0;
474 
475     unsigned Limit =
476         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
477     // If we already exceed the column limit, we set 'Limit' to 0. The different
478     // tryMerge..() functions can then decide whether to still do merging.
479     Limit = TheLine->Last->TotalLength > Limit
480                 ? 0
481                 : Limit - TheLine->Last->TotalLength;
482 
483     if (I + 1 == E || I[1]->Type == LT_Invalid)
484       return 0;
485 
486     if (TheLine->Last->Type == TT_FunctionLBrace) {
487       return Style.AllowShortFunctionsOnASingleLine
488                  ? tryMergeSimpleBlock(I, E, Limit)
489                  : 0;
490     }
491     if (TheLine->Last->is(tok::l_brace)) {
492       return Style.BreakBeforeBraces == clang::format::FormatStyle::BS_Attach
493                  ? tryMergeSimpleBlock(I, E, Limit)
494                  : 0;
495     }
496     if (I[1]->First->Type == TT_FunctionLBrace &&
497         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
498       // Reduce the column limit by the number of spaces we need to insert
499       // around braces.
500       Limit = Limit > 3 ? Limit - 3 : 0;
501       unsigned MergedLines = 0;
502       if (Style.AllowShortFunctionsOnASingleLine) {
503         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
504         // If we managed to merge the block, count the function header, which is
505         // on a separate line.
506         if (MergedLines > 0)
507           ++MergedLines;
508       }
509       return MergedLines;
510     }
511     if (TheLine->First->is(tok::kw_if)) {
512       return Style.AllowShortIfStatementsOnASingleLine
513                  ? tryMergeSimpleControlStatement(I, E, Limit)
514                  : 0;
515     }
516     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
517       return Style.AllowShortLoopsOnASingleLine
518                  ? tryMergeSimpleControlStatement(I, E, Limit)
519                  : 0;
520     }
521     if (TheLine->InPPDirective &&
522         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
523       return tryMergeSimplePPDirective(I, E, Limit);
524     }
525     return 0;
526   }
527 
528 private:
529   unsigned
530   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
531                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
532                             unsigned Limit) {
533     if (Limit == 0)
534       return 0;
535     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
536       return 0;
537     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
538       return 0;
539     if (1 + I[1]->Last->TotalLength > Limit)
540       return 0;
541     return 1;
542   }
543 
544   unsigned tryMergeSimpleControlStatement(
545       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
546       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
547     if (Limit == 0)
548       return 0;
549     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
550         I[1]->First->is(tok::l_brace))
551       return 0;
552     if (I[1]->InPPDirective != (*I)->InPPDirective ||
553         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
554       return 0;
555     AnnotatedLine &Line = **I;
556     if (Line.Last->isNot(tok::r_paren))
557       return 0;
558     if (1 + I[1]->Last->TotalLength > Limit)
559       return 0;
560     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
561                              tok::kw_while) ||
562         I[1]->First->Type == TT_LineComment)
563       return 0;
564     // Only inline simple if's (no nested if or else).
565     if (I + 2 != E && Line.First->is(tok::kw_if) &&
566         I[2]->First->is(tok::kw_else))
567       return 0;
568     return 1;
569   }
570 
571   unsigned
572   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
573                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
574                       unsigned Limit) {
575     // First, check that the current line allows merging. This is the case if
576     // we're not in a control flow statement and the last token is an opening
577     // brace.
578     AnnotatedLine &Line = **I;
579     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
580                             tok::kw_else, tok::kw_try, tok::kw_catch,
581                             tok::kw_for,
582                             // This gets rid of all ObjC @ keywords and methods.
583                             tok::at, tok::minus, tok::plus))
584       return 0;
585 
586     FormatToken *Tok = I[1]->First;
587     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
588         (Tok->getNextNonComment() == NULL ||
589          Tok->getNextNonComment()->is(tok::semi))) {
590       // We merge empty blocks even if the line exceeds the column limit.
591       Tok->SpacesRequiredBefore = 0;
592       Tok->CanBreakBefore = true;
593       return 1;
594     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
595       // Check that we still have three lines and they fit into the limit.
596       if (I + 2 == E || I[2]->Type == LT_Invalid)
597         return 0;
598 
599       if (!nextTwoLinesFitInto(I, Limit))
600         return 0;
601 
602       // Second, check that the next line does not contain any braces - if it
603       // does, readability declines when putting it into a single line.
604       if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
605         return 0;
606       do {
607         if (Tok->isOneOf(tok::l_brace, tok::r_brace))
608           return 0;
609         Tok = Tok->Next;
610       } while (Tok != NULL);
611 
612       // Last, check that the third line contains a single closing brace.
613       Tok = I[2]->First;
614       if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
615           Tok->MustBreakBefore)
616         return 0;
617 
618       return 2;
619     }
620     return 0;
621   }
622 
623   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
624                            unsigned Limit) {
625     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
626   }
627 
628   const FormatStyle &Style;
629 };
630 
631 class UnwrappedLineFormatter {
632 public:
633   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
634                          WhitespaceManager *Whitespaces,
635                          const FormatStyle &Style)
636       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
637         Joiner(Style) {}
638 
639   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
640                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
641     assert(!Lines.empty());
642     unsigned Penalty = 0;
643     std::vector<int> IndentForLevel;
644     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
645       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
646     const AnnotatedLine *PreviousLine = NULL;
647     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
648                                                           E = Lines.end();
649          I != E; ++I) {
650       const AnnotatedLine &TheLine = **I;
651       const FormatToken *FirstTok = TheLine.First;
652       int Offset = getIndentOffset(*FirstTok);
653 
654       // Determine indent and try to merge multiple unwrapped lines.
655       while (IndentForLevel.size() <= TheLine.Level)
656         IndentForLevel.push_back(-1);
657       IndentForLevel.resize(TheLine.Level + 1);
658       unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
659       if (static_cast<int>(Indent) + Offset >= 0)
660         Indent += Offset;
661       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
662       if (MergedLines > 0 && Style.ColumnLimit == 0) {
663         // Disallow line merging if there is a break at the start of one of the
664         // input lines.
665         for (unsigned i = 0; i < MergedLines; ++i) {
666           if (I[i + 1]->First->NewlinesBefore > 0)
667             MergedLines = 0;
668         }
669       }
670       if (!DryRun) {
671         for (unsigned i = 0; i < MergedLines; ++i) {
672           join(*I[i], *I[i + 1]);
673         }
674       }
675       I += MergedLines;
676 
677       unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
678       bool FixIndentation =
679           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
680       if (TheLine.First->is(tok::eof)) {
681         if (PreviousLine && PreviousLine->Affected && !DryRun) {
682           // Remove the file's trailing whitespace.
683           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
684           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
685                                          /*IndentLevel=*/0, /*Spaces=*/0,
686                                          /*TargetColumn=*/0);
687         }
688       } else if (TheLine.Type != LT_Invalid &&
689                  (TheLine.Affected || FixIndentation)) {
690         if (FirstTok->WhitespaceRange.isValid()) {
691           if (!DryRun)
692             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
693                              Indent, TheLine.InPPDirective);
694         } else {
695           Indent = LevelIndent = FirstTok->OriginalColumn;
696         }
697 
698         // If everything fits on a single line, just put it there.
699         unsigned ColumnLimit = Style.ColumnLimit;
700         if (I + 1 != E) {
701           AnnotatedLine *NextLine = I[1];
702           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
703             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
704         }
705 
706         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
707           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
708           while (State.NextToken != NULL)
709             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
710         } else if (Style.ColumnLimit == 0) {
711           // FIXME: Implement nested blocks for ColumnLimit = 0.
712           NoColumnLimitFormatter Formatter(Indenter);
713           if (!DryRun)
714             Formatter.format(Indent, &TheLine,
715                              /*LineIsMerged=*/MergedLines > 0);
716         } else {
717           Penalty += format(TheLine, Indent, DryRun);
718         }
719 
720         IndentForLevel[TheLine.Level] = LevelIndent;
721       } else if (TheLine.ChildrenAffected) {
722         format(TheLine.Children, DryRun);
723       } else {
724         // Format the first token if necessary, and notify the WhitespaceManager
725         // about the unchanged whitespace.
726         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
727           if (Tok == TheLine.First &&
728               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
729             unsigned LevelIndent = Tok->OriginalColumn;
730             if (!DryRun) {
731               // Remove trailing whitespace of the previous line.
732               if ((PreviousLine && PreviousLine->Affected) ||
733                   TheLine.LeadingEmptyLinesAffected) {
734                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
735                                  TheLine.InPPDirective);
736               } else {
737                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
738               }
739             }
740 
741             if (static_cast<int>(LevelIndent) - Offset >= 0)
742               LevelIndent -= Offset;
743             if (Tok->isNot(tok::comment))
744               IndentForLevel[TheLine.Level] = LevelIndent;
745           } else if (!DryRun) {
746             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
747           }
748         }
749       }
750       if (!DryRun) {
751         for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
752           Tok->Finalized = true;
753         }
754       }
755       PreviousLine = *I;
756     }
757     return Penalty;
758   }
759 
760 private:
761   /// \brief Formats an \c AnnotatedLine and returns the penalty.
762   ///
763   /// If \p DryRun is \c false, directly applies the changes.
764   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
765                   bool DryRun) {
766     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
767 
768     // If the ObjC method declaration does not fit on a line, we should format
769     // it with one arg per line.
770     if (State.Line->Type == LT_ObjCMethodDecl)
771       State.Stack.back().BreakBeforeParameter = true;
772 
773     // Find best solution in solution space.
774     return analyzeSolutionSpace(State, DryRun);
775   }
776 
777   /// \brief An edge in the solution space from \c Previous->State to \c State,
778   /// inserting a newline dependent on the \c NewLine.
779   struct StateNode {
780     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
781         : State(State), NewLine(NewLine), Previous(Previous) {}
782     LineState State;
783     bool NewLine;
784     StateNode *Previous;
785   };
786 
787   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
788   ///
789   /// In case of equal penalties, we want to prefer states that were inserted
790   /// first. During state generation we make sure that we insert states first
791   /// that break the line as late as possible.
792   typedef std::pair<unsigned, unsigned> OrderedPenalty;
793 
794   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
795   /// \c State has the given \c OrderedPenalty.
796   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
797 
798   /// \brief The BFS queue type.
799   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
800                               std::greater<QueueItem> > QueueType;
801 
802   /// \brief Get the offset of the line relatively to the level.
803   ///
804   /// For example, 'public:' labels in classes are offset by 1 or 2
805   /// characters to the left from their level.
806   int getIndentOffset(const FormatToken &RootToken) {
807     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
808       return Style.AccessModifierOffset;
809     return 0;
810   }
811 
812   /// \brief Add a new line and the required indent before the first Token
813   /// of the \c UnwrappedLine if there was no structural parsing error.
814   void formatFirstToken(FormatToken &RootToken,
815                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
816                         unsigned Indent, bool InPPDirective) {
817     unsigned Newlines =
818         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
819     // Remove empty lines before "}" where applicable.
820     if (RootToken.is(tok::r_brace) &&
821         (!RootToken.Next ||
822          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
823       Newlines = std::min(Newlines, 1u);
824     if (Newlines == 0 && !RootToken.IsFirst)
825       Newlines = 1;
826 
827     // Insert extra new line before access specifiers.
828     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
829         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
830       ++Newlines;
831 
832     // Remove empty lines after access specifiers.
833     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
834       Newlines = std::min(1u, Newlines);
835 
836     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
837                                    Indent, InPPDirective &&
838                                                !RootToken.HasUnescapedNewline);
839   }
840 
841   /// \brief Get the indent of \p Level from \p IndentForLevel.
842   ///
843   /// \p IndentForLevel must contain the indent for the level \c l
844   /// at \p IndentForLevel[l], or a value < 0 if the indent for
845   /// that level is unknown.
846   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
847     if (IndentForLevel[Level] != -1)
848       return IndentForLevel[Level];
849     if (Level == 0)
850       return 0;
851     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
852   }
853 
854   void join(AnnotatedLine &A, const AnnotatedLine &B) {
855     assert(!A.Last->Next);
856     assert(!B.First->Previous);
857     if (B.Affected)
858       A.Affected = true;
859     A.Last->Next = B.First;
860     B.First->Previous = A.Last;
861     B.First->CanBreakBefore = true;
862     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
863     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
864       Tok->TotalLength += LengthA;
865       A.Last = Tok;
866     }
867   }
868 
869   unsigned getColumnLimit(bool InPPDirective) const {
870     // In preprocessor directives reserve two chars for trailing " \"
871     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
872   }
873 
874   /// \brief Analyze the entire solution space starting from \p InitialState.
875   ///
876   /// This implements a variant of Dijkstra's algorithm on the graph that spans
877   /// the solution space (\c LineStates are the nodes). The algorithm tries to
878   /// find the shortest path (the one with lowest penalty) from \p InitialState
879   /// to a state where all tokens are placed. Returns the penalty.
880   ///
881   /// If \p DryRun is \c false, directly applies the changes.
882   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
883     std::set<LineState> Seen;
884 
885     // Increasing count of \c StateNode items we have created. This is used to
886     // create a deterministic order independent of the container.
887     unsigned Count = 0;
888     QueueType Queue;
889 
890     // Insert start element into queue.
891     StateNode *Node =
892         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
893     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
894     ++Count;
895 
896     unsigned Penalty = 0;
897 
898     // While not empty, take first element and follow edges.
899     while (!Queue.empty()) {
900       Penalty = Queue.top().first.first;
901       StateNode *Node = Queue.top().second;
902       if (Node->State.NextToken == NULL) {
903         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
904         break;
905       }
906       Queue.pop();
907 
908       // Cut off the analysis of certain solutions if the analysis gets too
909       // complex. See description of IgnoreStackForComparison.
910       if (Count > 10000)
911         Node->State.IgnoreStackForComparison = true;
912 
913       if (!Seen.insert(Node->State).second)
914         // State already examined with lower penalty.
915         continue;
916 
917       FormatDecision LastFormat = Node->State.NextToken->Decision;
918       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
919         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
920       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
921         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
922     }
923 
924     if (Queue.empty()) {
925       // We were unable to find a solution, do nothing.
926       // FIXME: Add diagnostic?
927       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
928       return 0;
929     }
930 
931     // Reconstruct the solution.
932     if (!DryRun)
933       reconstructPath(InitialState, Queue.top().second);
934 
935     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
936     DEBUG(llvm::dbgs() << "---\n");
937 
938     return Penalty;
939   }
940 
941   void reconstructPath(LineState &State, StateNode *Current) {
942     std::deque<StateNode *> Path;
943     // We do not need a break before the initial token.
944     while (Current->Previous) {
945       Path.push_front(Current);
946       Current = Current->Previous;
947     }
948     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
949          I != E; ++I) {
950       unsigned Penalty = 0;
951       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
952       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
953 
954       DEBUG({
955         if ((*I)->NewLine) {
956           llvm::dbgs() << "Penalty for placing "
957                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
958                        << Penalty << "\n";
959         }
960       });
961     }
962   }
963 
964   /// \brief Add the following state to the analysis queue \c Queue.
965   ///
966   /// Assume the current state is \p PreviousNode and has been reached with a
967   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
968   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
969                            bool NewLine, unsigned *Count, QueueType *Queue) {
970     if (NewLine && !Indenter->canBreak(PreviousNode->State))
971       return;
972     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
973       return;
974 
975     StateNode *Node = new (Allocator.Allocate())
976         StateNode(PreviousNode->State, NewLine, PreviousNode);
977     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
978       return;
979 
980     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
981 
982     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
983     ++(*Count);
984   }
985 
986   /// \brief If the \p State's next token is an r_brace closing a nested block,
987   /// format the nested block before it.
988   ///
989   /// Returns \c true if all children could be placed successfully and adapts
990   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
991   /// creates changes using \c Whitespaces.
992   ///
993   /// The crucial idea here is that children always get formatted upon
994   /// encountering the closing brace right after the nested block. Now, if we
995   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
996   /// \c false), the entire block has to be kept on the same line (which is only
997   /// possible if it fits on the line, only contains a single statement, etc.
998   ///
999   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1000   /// break after the "{", format all lines with correct indentation and the put
1001   /// the closing "}" on yet another new line.
1002   ///
1003   /// This enables us to keep the simple structure of the
1004   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1005   /// break or don't break.
1006   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1007                       unsigned &Penalty) {
1008     FormatToken &Previous = *State.NextToken->Previous;
1009     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1010     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1011         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1012       // The previous token does not open a block. Nothing to do. We don't
1013       // assert so that we can simply call this function for all tokens.
1014       return true;
1015 
1016     if (NewLine) {
1017       int AdditionalIndent = State.Stack.back().Indent -
1018                              Previous.Children[0]->Level * Style.IndentWidth;
1019       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1020                         /*FixBadIndentation=*/true);
1021       return true;
1022     }
1023 
1024     // Cannot merge multiple statements into a single line.
1025     if (Previous.Children.size() > 1)
1026       return false;
1027 
1028     // We can't put the closing "}" on a line with a trailing comment.
1029     if (Previous.Children[0]->Last->isTrailingComment())
1030       return false;
1031 
1032     if (!DryRun) {
1033       Whitespaces->replaceWhitespace(
1034           *Previous.Children[0]->First,
1035           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1036           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1037     }
1038     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1039 
1040     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1041     return true;
1042   }
1043 
1044   ContinuationIndenter *Indenter;
1045   WhitespaceManager *Whitespaces;
1046   FormatStyle Style;
1047   LineJoiner Joiner;
1048 
1049   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1050 };
1051 
1052 class FormatTokenLexer {
1053 public:
1054   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1055                    encoding::Encoding Encoding)
1056       : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
1057         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
1058         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
1059     Lex.SetKeepWhitespaceMode(true);
1060   }
1061 
1062   ArrayRef<FormatToken *> lex() {
1063     assert(Tokens.empty());
1064     do {
1065       Tokens.push_back(getNextToken());
1066       tryMergePreviousTokens();
1067     } while (Tokens.back()->Tok.isNot(tok::eof));
1068     return Tokens;
1069   }
1070 
1071   IdentifierTable &getIdentTable() { return IdentTable; }
1072 
1073 private:
1074   void tryMergePreviousTokens() {
1075     if (tryMerge_TMacro())
1076       return;
1077 
1078     if (Style.Language == FormatStyle::LK_JavaScript) {
1079       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1080       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1081       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1082                                                tok::greaterequal };
1083       // FIXME: We probably need to change token type to mimic operator with the
1084       // correct priority.
1085       if (tryMergeTokens(JSIdentity))
1086         return;
1087       if (tryMergeTokens(JSNotIdentity))
1088         return;
1089       if (tryMergeTokens(JSShiftEqual))
1090         return;
1091     }
1092   }
1093 
1094   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1095     if (Tokens.size() < Kinds.size())
1096       return false;
1097 
1098     SmallVectorImpl<FormatToken *>::const_iterator First =
1099         Tokens.end() - Kinds.size();
1100     if (!First[0]->is(Kinds[0]))
1101       return false;
1102     unsigned AddLength = 0;
1103     for (unsigned i = 1; i < Kinds.size(); ++i) {
1104       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1105                                          First[i]->WhitespaceRange.getEnd())
1106         return false;
1107       AddLength += First[i]->TokenText.size();
1108     }
1109     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1110     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1111                                     First[0]->TokenText.size() + AddLength);
1112     First[0]->ColumnWidth += AddLength;
1113     return true;
1114   }
1115 
1116   bool tryMerge_TMacro() {
1117     if (Tokens.size() < 4)
1118       return false;
1119     FormatToken *Last = Tokens.back();
1120     if (!Last->is(tok::r_paren))
1121       return false;
1122 
1123     FormatToken *String = Tokens[Tokens.size() - 2];
1124     if (!String->is(tok::string_literal) || String->IsMultiline)
1125       return false;
1126 
1127     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1128       return false;
1129 
1130     FormatToken *Macro = Tokens[Tokens.size() - 4];
1131     if (Macro->TokenText != "_T")
1132       return false;
1133 
1134     const char *Start = Macro->TokenText.data();
1135     const char *End = Last->TokenText.data() + Last->TokenText.size();
1136     String->TokenText = StringRef(Start, End - Start);
1137     String->IsFirst = Macro->IsFirst;
1138     String->LastNewlineOffset = Macro->LastNewlineOffset;
1139     String->WhitespaceRange = Macro->WhitespaceRange;
1140     String->OriginalColumn = Macro->OriginalColumn;
1141     String->ColumnWidth = encoding::columnWidthWithTabs(
1142         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1143 
1144     Tokens.pop_back();
1145     Tokens.pop_back();
1146     Tokens.pop_back();
1147     Tokens.back() = String;
1148     return true;
1149   }
1150 
1151   FormatToken *getNextToken() {
1152     if (GreaterStashed) {
1153       // Create a synthesized second '>' token.
1154       // FIXME: Increment Column and set OriginalColumn.
1155       Token Greater = FormatTok->Tok;
1156       FormatTok = new (Allocator.Allocate()) FormatToken;
1157       FormatTok->Tok = Greater;
1158       SourceLocation GreaterLocation =
1159           FormatTok->Tok.getLocation().getLocWithOffset(1);
1160       FormatTok->WhitespaceRange =
1161           SourceRange(GreaterLocation, GreaterLocation);
1162       FormatTok->TokenText = ">";
1163       FormatTok->ColumnWidth = 1;
1164       GreaterStashed = false;
1165       return FormatTok;
1166     }
1167 
1168     FormatTok = new (Allocator.Allocate()) FormatToken;
1169     readRawToken(*FormatTok);
1170     SourceLocation WhitespaceStart =
1171         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1172     FormatTok->IsFirst = IsFirstToken;
1173     IsFirstToken = false;
1174 
1175     // Consume and record whitespace until we find a significant token.
1176     unsigned WhitespaceLength = TrailingWhitespace;
1177     while (FormatTok->Tok.is(tok::unknown)) {
1178       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1179         switch (FormatTok->TokenText[i]) {
1180         case '\n':
1181           ++FormatTok->NewlinesBefore;
1182           // FIXME: This is technically incorrect, as it could also
1183           // be a literal backslash at the end of the line.
1184           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1185                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1186                           FormatTok->TokenText[i - 2] != '\\')))
1187             FormatTok->HasUnescapedNewline = true;
1188           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1189           Column = 0;
1190           break;
1191         case '\r':
1192         case '\f':
1193         case '\v':
1194           Column = 0;
1195           break;
1196         case ' ':
1197           ++Column;
1198           break;
1199         case '\t':
1200           Column += Style.TabWidth - Column % Style.TabWidth;
1201           break;
1202         case '\\':
1203           ++Column;
1204           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1205                              FormatTok->TokenText[i + 1] != '\n'))
1206             FormatTok->Type = TT_ImplicitStringLiteral;
1207           break;
1208         default:
1209           FormatTok->Type = TT_ImplicitStringLiteral;
1210           ++Column;
1211           break;
1212         }
1213       }
1214 
1215       if (FormatTok->Type == TT_ImplicitStringLiteral)
1216         break;
1217       WhitespaceLength += FormatTok->Tok.getLength();
1218 
1219       readRawToken(*FormatTok);
1220     }
1221 
1222     // In case the token starts with escaped newlines, we want to
1223     // take them into account as whitespace - this pattern is quite frequent
1224     // in macro definitions.
1225     // FIXME: Add a more explicit test.
1226     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1227            FormatTok->TokenText[1] == '\n') {
1228       // FIXME: ++FormatTok->NewlinesBefore is missing...
1229       WhitespaceLength += 2;
1230       Column = 0;
1231       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1232     }
1233 
1234     FormatTok->WhitespaceRange = SourceRange(
1235         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1236 
1237     FormatTok->OriginalColumn = Column;
1238 
1239     TrailingWhitespace = 0;
1240     if (FormatTok->Tok.is(tok::comment)) {
1241       // FIXME: Add the trimmed whitespace to Column.
1242       StringRef UntrimmedText = FormatTok->TokenText;
1243       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1244       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1245     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1246       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1247       FormatTok->Tok.setIdentifierInfo(&Info);
1248       FormatTok->Tok.setKind(Info.getTokenID());
1249     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1250       FormatTok->Tok.setKind(tok::greater);
1251       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1252       GreaterStashed = true;
1253     }
1254 
1255     // Now FormatTok is the next non-whitespace token.
1256 
1257     StringRef Text = FormatTok->TokenText;
1258     size_t FirstNewlinePos = Text.find('\n');
1259     if (FirstNewlinePos == StringRef::npos) {
1260       // FIXME: ColumnWidth actually depends on the start column, we need to
1261       // take this into account when the token is moved.
1262       FormatTok->ColumnWidth =
1263           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1264       Column += FormatTok->ColumnWidth;
1265     } else {
1266       FormatTok->IsMultiline = true;
1267       // FIXME: ColumnWidth actually depends on the start column, we need to
1268       // take this into account when the token is moved.
1269       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1270           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1271 
1272       // The last line of the token always starts in column 0.
1273       // Thus, the length can be precomputed even in the presence of tabs.
1274       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1275           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1276           Encoding);
1277       Column = FormatTok->LastLineColumnWidth;
1278     }
1279 
1280     return FormatTok;
1281   }
1282 
1283   FormatToken *FormatTok;
1284   bool IsFirstToken;
1285   bool GreaterStashed;
1286   unsigned Column;
1287   unsigned TrailingWhitespace;
1288   Lexer &Lex;
1289   SourceManager &SourceMgr;
1290   FormatStyle &Style;
1291   IdentifierTable IdentTable;
1292   encoding::Encoding Encoding;
1293   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1294   SmallVector<FormatToken *, 16> Tokens;
1295 
1296   void readRawToken(FormatToken &Tok) {
1297     Lex.LexFromRawLexer(Tok.Tok);
1298     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1299                               Tok.Tok.getLength());
1300     // For formatting, treat unterminated string literals like normal string
1301     // literals.
1302     if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
1303         Tok.TokenText[0] == '"') {
1304       Tok.Tok.setKind(tok::string_literal);
1305       Tok.IsUnterminatedLiteral = true;
1306     }
1307   }
1308 };
1309 
1310 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1311   switch (Language) {
1312   case FormatStyle::LK_Cpp:
1313     return "C++";
1314   case FormatStyle::LK_JavaScript:
1315     return "JavaScript";
1316   default:
1317     return "Unknown";
1318   }
1319 }
1320 
1321 class Formatter : public UnwrappedLineConsumer {
1322 public:
1323   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1324             const std::vector<CharSourceRange> &Ranges)
1325       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1326         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1327         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1328         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1329     DEBUG(llvm::dbgs() << "File encoding: "
1330                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1331                                                                : "unknown")
1332                        << "\n");
1333     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1334                        << "\n");
1335   }
1336 
1337   tooling::Replacements format() {
1338     tooling::Replacements Result;
1339     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1340 
1341     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1342     bool StructuralError = Parser.parse();
1343     assert(UnwrappedLines.rbegin()->empty());
1344     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1345          ++Run) {
1346       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1347       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1348       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1349         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1350       }
1351       tooling::Replacements RunResult =
1352           format(AnnotatedLines, StructuralError, Tokens);
1353       DEBUG({
1354         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1355         for (tooling::Replacements::iterator I = RunResult.begin(),
1356                                              E = RunResult.end();
1357              I != E; ++I) {
1358           llvm::dbgs() << I->toString() << "\n";
1359         }
1360       });
1361       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1362         delete AnnotatedLines[i];
1363       }
1364       Result.insert(RunResult.begin(), RunResult.end());
1365       Whitespaces.reset();
1366     }
1367     return Result;
1368   }
1369 
1370   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1371                                bool StructuralError, FormatTokenLexer &Tokens) {
1372     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1373     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1374       Annotator.annotate(*AnnotatedLines[i]);
1375     }
1376     deriveLocalStyle(AnnotatedLines);
1377     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1378       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1379     }
1380     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1381 
1382     Annotator.setCommentLineLevels(AnnotatedLines);
1383     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1384                                   BinPackInconclusiveFunctions);
1385     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1386     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1387     return Whitespaces.generateReplacements();
1388   }
1389 
1390 private:
1391   // Determines which lines are affected by the SourceRanges given as input.
1392   // Returns \c true if at least one line between I and E or one of their
1393   // children is affected.
1394   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1395                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1396     bool SomeLineAffected = false;
1397     const AnnotatedLine *PreviousLine = NULL;
1398     while (I != E) {
1399       AnnotatedLine *Line = *I;
1400       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1401 
1402       // If a line is part of a preprocessor directive, it needs to be formatted
1403       // if any token within the directive is affected.
1404       if (Line->InPPDirective) {
1405         FormatToken *Last = Line->Last;
1406         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1407         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1408           Last = (*PPEnd)->Last;
1409           ++PPEnd;
1410         }
1411 
1412         if (affectsTokenRange(*Line->First, *Last,
1413                               /*IncludeLeadingNewlines=*/false)) {
1414           SomeLineAffected = true;
1415           markAllAsAffected(I, PPEnd);
1416         }
1417         I = PPEnd;
1418         continue;
1419       }
1420 
1421       if (nonPPLineAffected(Line, PreviousLine))
1422         SomeLineAffected = true;
1423 
1424       PreviousLine = Line;
1425       ++I;
1426     }
1427     return SomeLineAffected;
1428   }
1429 
1430   // Determines whether 'Line' is affected by the SourceRanges given as input.
1431   // Returns \c true if line or one if its children is affected.
1432   bool nonPPLineAffected(AnnotatedLine *Line,
1433                          const AnnotatedLine *PreviousLine) {
1434     bool SomeLineAffected = false;
1435     Line->ChildrenAffected =
1436         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1437     if (Line->ChildrenAffected)
1438       SomeLineAffected = true;
1439 
1440     // Stores whether one of the line's tokens is directly affected.
1441     bool SomeTokenAffected = false;
1442     // Stores whether we need to look at the leading newlines of the next token
1443     // in order to determine whether it was affected.
1444     bool IncludeLeadingNewlines = false;
1445 
1446     // Stores whether the first child line of any of this line's tokens is
1447     // affected.
1448     bool SomeFirstChildAffected = false;
1449 
1450     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1451       // Determine whether 'Tok' was affected.
1452       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1453         SomeTokenAffected = true;
1454 
1455       // Determine whether the first child of 'Tok' was affected.
1456       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1457         SomeFirstChildAffected = true;
1458 
1459       IncludeLeadingNewlines = Tok->Children.empty();
1460     }
1461 
1462     // Was this line moved, i.e. has it previously been on the same line as an
1463     // affected line?
1464     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1465                      Line->First->NewlinesBefore == 0;
1466 
1467     bool IsContinuedComment = Line->First->is(tok::comment) &&
1468                               Line->First->Next == NULL &&
1469                               Line->First->NewlinesBefore < 2 && PreviousLine &&
1470                               PreviousLine->Affected &&
1471                               PreviousLine->Last->is(tok::comment);
1472 
1473     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1474         IsContinuedComment) {
1475       Line->Affected = true;
1476       SomeLineAffected = true;
1477     }
1478     return SomeLineAffected;
1479   }
1480 
1481   // Marks all lines between I and E as well as all their children as affected.
1482   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1483                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1484     while (I != E) {
1485       (*I)->Affected = true;
1486       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1487       ++I;
1488     }
1489   }
1490 
1491   // Returns true if the range from 'First' to 'Last' intersects with one of the
1492   // input ranges.
1493   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1494                          bool IncludeLeadingNewlines) {
1495     SourceLocation Start = First.WhitespaceRange.getBegin();
1496     if (!IncludeLeadingNewlines)
1497       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1498     SourceLocation End = Last.getStartOfNonWhitespace();
1499     if (Last.TokenText.size() > 0)
1500       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1501     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1502     return affectsCharSourceRange(Range);
1503   }
1504 
1505   // Returns true if one of the input ranges intersect the leading empty lines
1506   // before 'Tok'.
1507   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1508     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1509         Tok.WhitespaceRange.getBegin(),
1510         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1511     return affectsCharSourceRange(EmptyLineRange);
1512   }
1513 
1514   // Returns true if 'Range' intersects with one of the input ranges.
1515   bool affectsCharSourceRange(const CharSourceRange &Range) {
1516     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1517                                                           E = Ranges.end();
1518          I != E; ++I) {
1519       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1520           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1521         return true;
1522     }
1523     return false;
1524   }
1525 
1526   static bool inputUsesCRLF(StringRef Text) {
1527     return Text.count('\r') * 2 > Text.count('\n');
1528   }
1529 
1530   void
1531   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1532     unsigned CountBoundToVariable = 0;
1533     unsigned CountBoundToType = 0;
1534     bool HasCpp03IncompatibleFormat = false;
1535     bool HasBinPackedFunction = false;
1536     bool HasOnePerLineFunction = false;
1537     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1538       if (!AnnotatedLines[i]->First->Next)
1539         continue;
1540       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1541       while (Tok->Next) {
1542         if (Tok->Type == TT_PointerOrReference) {
1543           bool SpacesBefore =
1544               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1545           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1546                              Tok->Next->WhitespaceRange.getEnd();
1547           if (SpacesBefore && !SpacesAfter)
1548             ++CountBoundToVariable;
1549           else if (!SpacesBefore && SpacesAfter)
1550             ++CountBoundToType;
1551         }
1552 
1553         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1554           if (Tok->is(tok::coloncolon) &&
1555               Tok->Previous->Type == TT_TemplateOpener)
1556             HasCpp03IncompatibleFormat = true;
1557           if (Tok->Type == TT_TemplateCloser &&
1558               Tok->Previous->Type == TT_TemplateCloser)
1559             HasCpp03IncompatibleFormat = true;
1560         }
1561 
1562         if (Tok->PackingKind == PPK_BinPacked)
1563           HasBinPackedFunction = true;
1564         if (Tok->PackingKind == PPK_OnePerLine)
1565           HasOnePerLineFunction = true;
1566 
1567         Tok = Tok->Next;
1568       }
1569     }
1570     if (Style.DerivePointerBinding) {
1571       if (CountBoundToType > CountBoundToVariable)
1572         Style.PointerBindsToType = true;
1573       else if (CountBoundToType < CountBoundToVariable)
1574         Style.PointerBindsToType = false;
1575     }
1576     if (Style.Standard == FormatStyle::LS_Auto) {
1577       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1578                                                   : FormatStyle::LS_Cpp03;
1579     }
1580     BinPackInconclusiveFunctions =
1581         HasBinPackedFunction || !HasOnePerLineFunction;
1582   }
1583 
1584   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1585     assert(!UnwrappedLines.empty());
1586     UnwrappedLines.back().push_back(TheLine);
1587   }
1588 
1589   virtual void finishRun() {
1590     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1591   }
1592 
1593   FormatStyle Style;
1594   Lexer &Lex;
1595   SourceManager &SourceMgr;
1596   WhitespaceManager Whitespaces;
1597   SmallVector<CharSourceRange, 8> Ranges;
1598   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1599 
1600   encoding::Encoding Encoding;
1601   bool BinPackInconclusiveFunctions;
1602 };
1603 
1604 } // end anonymous namespace
1605 
1606 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1607                                SourceManager &SourceMgr,
1608                                std::vector<CharSourceRange> Ranges) {
1609   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1610   return formatter.format();
1611 }
1612 
1613 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1614                                std::vector<tooling::Range> Ranges,
1615                                StringRef FileName) {
1616   FileManager Files((FileSystemOptions()));
1617   DiagnosticsEngine Diagnostics(
1618       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1619       new DiagnosticOptions);
1620   SourceManager SourceMgr(Diagnostics, Files);
1621   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1622   const clang::FileEntry *Entry =
1623       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1624   SourceMgr.overrideFileContents(Entry, Buf);
1625   FileID ID =
1626       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1627   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1628             getFormattingLangOpts(Style.Standard));
1629   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1630   std::vector<CharSourceRange> CharRanges;
1631   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1632     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1633     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1634     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1635   }
1636   return reformat(Style, Lex, SourceMgr, CharRanges);
1637 }
1638 
1639 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1640   LangOptions LangOpts;
1641   LangOpts.CPlusPlus = 1;
1642   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1643   LangOpts.LineComment = 1;
1644   LangOpts.Bool = 1;
1645   LangOpts.ObjC1 = 1;
1646   LangOpts.ObjC2 = 1;
1647   return LangOpts;
1648 }
1649 
1650 const char *StyleOptionHelpDescription =
1651     "Coding style, currently supports:\n"
1652     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1653     "Use -style=file to load style configuration from\n"
1654     ".clang-format file located in one of the parent\n"
1655     "directories of the source file (or current\n"
1656     "directory for stdin).\n"
1657     "Use -style=\"{key: value, ...}\" to set specific\n"
1658     "parameters, e.g.:\n"
1659     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1660 
1661 static void fillLanguageByFileName(StringRef FileName, FormatStyle *Style) {
1662   if (FileName.endswith_lower(".c") || FileName.endswith_lower(".h") ||
1663       FileName.endswith_lower(".cpp") || FileName.endswith_lower(".hpp") ||
1664       FileName.endswith_lower(".cc") || FileName.endswith_lower(".hh") ||
1665       FileName.endswith_lower(".cxx") || FileName.endswith_lower(".hxx") ||
1666       FileName.endswith_lower(".m") || FileName.endswith_lower(".mm")) {
1667     Style->Language = FormatStyle::LK_Cpp;
1668   }
1669   if (FileName.endswith_lower(".js")) {
1670     Style->Language = FormatStyle::LK_JavaScript;
1671   }
1672 }
1673 
1674 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1675                      StringRef FallbackStyle) {
1676   FormatStyle Style;
1677   if (!getPredefinedStyle(FallbackStyle, &Style)) {
1678     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1679                  << "\" using LLVM style\n";
1680     return getLLVMStyle();
1681   }
1682   fillLanguageByFileName(FileName, &Style);
1683 
1684   if (StyleName.startswith("{")) {
1685     // Parse YAML/JSON style from the command line.
1686     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1687       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1688                    << FallbackStyle << " style\n";
1689     }
1690     return Style;
1691   }
1692 
1693   if (!StyleName.equals_lower("file")) {
1694     if (!getPredefinedStyle(StyleName, &Style))
1695       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1696                    << " style\n";
1697     fillLanguageByFileName(FileName, &Style);
1698     return Style;
1699   }
1700 
1701   SmallString<128> UnsuitableConfigFiles;
1702   SmallString<128> Path(FileName);
1703   llvm::sys::fs::make_absolute(Path);
1704   for (StringRef Directory = Path; !Directory.empty();
1705        Directory = llvm::sys::path::parent_path(Directory)) {
1706     if (!llvm::sys::fs::is_directory(Directory))
1707       continue;
1708     SmallString<128> ConfigFile(Directory);
1709 
1710     llvm::sys::path::append(ConfigFile, ".clang-format");
1711     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1712     bool IsFile = false;
1713     // Ignore errors from is_regular_file: we only need to know if we can read
1714     // the file or not.
1715     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1716 
1717     if (!IsFile) {
1718       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1719       ConfigFile = Directory;
1720       llvm::sys::path::append(ConfigFile, "_clang-format");
1721       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1722       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1723     }
1724 
1725     if (IsFile) {
1726       OwningPtr<llvm::MemoryBuffer> Text;
1727       if (llvm::error_code ec =
1728               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1729         llvm::errs() << ec.message() << "\n";
1730         break;
1731       }
1732       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1733         if (ec == llvm::errc::not_supported) {
1734           if (!UnsuitableConfigFiles.empty())
1735             UnsuitableConfigFiles.append(", ");
1736           UnsuitableConfigFiles.append(ConfigFile);
1737           continue;
1738         }
1739         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1740                      << "\n";
1741         break;
1742       }
1743       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1744       return Style;
1745     }
1746   }
1747   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1748                << " style\n";
1749   if (!UnsuitableConfigFiles.empty()) {
1750     llvm::errs() << "Configuration file(s) do(es) not support "
1751                  << getLanguageName(Style.Language) << ": "
1752                  << UnsuitableConfigFiles << "\n";
1753   }
1754   return Style;
1755 }
1756 
1757 } // namespace format
1758 } // namespace clang
1759