1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "ContinuationIndenter.h"
17 #include "TokenAnnotator.h"
18 #include "UnwrappedLineParser.h"
19 #include "WhitespaceManager.h"
20 #include "clang/Basic/Diagnostic.h"
21 #include "clang/Basic/DiagnosticOptions.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Format/Format.h"
24 #include "clang/Lex/Lexer.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/YAMLTraits.h"
30 #include <queue>
31 #include <string>
32 
33 #define DEBUG_TYPE "format-formatter"
34 
35 using clang::format::FormatStyle;
36 
37 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
38 
39 namespace llvm {
40 namespace yaml {
41 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
42   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
43     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
44     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
45     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
46   }
47 };
48 
49 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
50   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
51     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
52     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
53     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
54     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
55     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
56   }
57 };
58 
59 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
60   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
61     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
62     IO.enumCase(Value, "false", FormatStyle::UT_Never);
63     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
64     IO.enumCase(Value, "true", FormatStyle::UT_Always);
65     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
66   }
67 };
68 
69 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
70   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
71     IO.enumCase(Value, "None", FormatStyle::SFS_None);
72     IO.enumCase(Value, "false", FormatStyle::SFS_None);
73     IO.enumCase(Value, "All", FormatStyle::SFS_All);
74     IO.enumCase(Value, "true", FormatStyle::SFS_All);
75     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
76   }
77 };
78 
79 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
80   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
81     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
82     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
83     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
84     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
85     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
86   }
87 };
88 
89 template <>
90 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
91   static void enumeration(IO &IO,
92                           FormatStyle::NamespaceIndentationKind &Value) {
93     IO.enumCase(Value, "None", FormatStyle::NI_None);
94     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
95     IO.enumCase(Value, "All", FormatStyle::NI_All);
96   }
97 };
98 
99 template <>
100 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
101   static void enumeration(IO &IO,
102                           FormatStyle::SpaceBeforeParensOptions &Value) {
103     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
104     IO.enumCase(Value, "ControlStatements",
105                 FormatStyle::SBPO_ControlStatements);
106     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
107 
108     // For backward compatibility.
109     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
110     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
111   }
112 };
113 
114 template <> struct MappingTraits<FormatStyle> {
115   static void mapping(IO &IO, FormatStyle &Style) {
116     // When reading, read the language first, we need it for getPredefinedStyle.
117     IO.mapOptional("Language", Style.Language);
118 
119     if (IO.outputting()) {
120       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
121                                   "Mozilla", "WebKit", "GNU" };
122       ArrayRef<StringRef> Styles(StylesArray);
123       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
124         StringRef StyleName(Styles[i]);
125         FormatStyle PredefinedStyle;
126         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
127             Style == PredefinedStyle) {
128           IO.mapOptional("# BasedOnStyle", StyleName);
129           break;
130         }
131       }
132     } else {
133       StringRef BasedOnStyle;
134       IO.mapOptional("BasedOnStyle", BasedOnStyle);
135       if (!BasedOnStyle.empty()) {
136         FormatStyle::LanguageKind OldLanguage = Style.Language;
137         FormatStyle::LanguageKind Language =
138             ((FormatStyle *)IO.getContext())->Language;
139         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
140           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
141           return;
142         }
143         Style.Language = OldLanguage;
144       }
145     }
146 
147     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
148     IO.mapOptional("ConstructorInitializerIndentWidth",
149                    Style.ConstructorInitializerIndentWidth);
150     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
151     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
152     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
153                    Style.AllowAllParametersOfDeclarationOnNextLine);
154     IO.mapOptional("AllowShortBlocksOnASingleLine",
155                    Style.AllowShortBlocksOnASingleLine);
156     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
157                    Style.AllowShortIfStatementsOnASingleLine);
158     IO.mapOptional("AllowShortLoopsOnASingleLine",
159                    Style.AllowShortLoopsOnASingleLine);
160     IO.mapOptional("AllowShortFunctionsOnASingleLine",
161                    Style.AllowShortFunctionsOnASingleLine);
162     IO.mapOptional("AlwaysBreakTemplateDeclarations",
163                    Style.AlwaysBreakTemplateDeclarations);
164     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
165                    Style.AlwaysBreakBeforeMultilineStrings);
166     IO.mapOptional("BreakBeforeBinaryOperators",
167                    Style.BreakBeforeBinaryOperators);
168     IO.mapOptional("BreakBeforeTernaryOperators",
169                    Style.BreakBeforeTernaryOperators);
170     IO.mapOptional("BreakConstructorInitializersBeforeComma",
171                    Style.BreakConstructorInitializersBeforeComma);
172     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
173     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
174     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
175                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
176     IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
177     IO.mapOptional("ExperimentalAutoDetectBinPacking",
178                    Style.ExperimentalAutoDetectBinPacking);
179     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
180     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
181     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
182                    Style.KeepEmptyLinesAtTheStartOfBlocks);
183     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
184     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
185     IO.mapOptional("ObjCSpaceBeforeProtocolList",
186                    Style.ObjCSpaceBeforeProtocolList);
187     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
188                    Style.PenaltyBreakBeforeFirstCallParameter);
189     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
190     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
191     IO.mapOptional("PenaltyBreakFirstLessLess",
192                    Style.PenaltyBreakFirstLessLess);
193     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
194     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
195                    Style.PenaltyReturnTypeOnItsOwnLine);
196     IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
197     IO.mapOptional("SpacesBeforeTrailingComments",
198                    Style.SpacesBeforeTrailingComments);
199     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
200     IO.mapOptional("Standard", Style.Standard);
201     IO.mapOptional("IndentWidth", Style.IndentWidth);
202     IO.mapOptional("TabWidth", Style.TabWidth);
203     IO.mapOptional("UseTab", Style.UseTab);
204     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
205     IO.mapOptional("IndentFunctionDeclarationAfterType",
206                    Style.IndentFunctionDeclarationAfterType);
207     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
208     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
209     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
210     IO.mapOptional("SpacesInCStyleCastParentheses",
211                    Style.SpacesInCStyleCastParentheses);
212     IO.mapOptional("SpacesInContainerLiterals",
213                    Style.SpacesInContainerLiterals);
214     IO.mapOptional("SpaceBeforeAssignmentOperators",
215                    Style.SpaceBeforeAssignmentOperators);
216     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
217     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
218     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
219 
220     // For backward compatibility.
221     if (!IO.outputting()) {
222       IO.mapOptional("SpaceAfterControlStatementKeyword",
223                      Style.SpaceBeforeParens);
224     }
225     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
226   }
227 };
228 
229 // Allows to read vector<FormatStyle> while keeping default values.
230 // IO.getContext() should contain a pointer to the FormatStyle structure, that
231 // will be used to get default values for missing keys.
232 // If the first element has no Language specified, it will be treated as the
233 // default one for the following elements.
234 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
235   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
236     return Seq.size();
237   }
238   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
239                               size_t Index) {
240     if (Index >= Seq.size()) {
241       assert(Index == Seq.size());
242       FormatStyle Template;
243       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
244         Template = Seq[0];
245       } else {
246         Template = *((const FormatStyle *)IO.getContext());
247         Template.Language = FormatStyle::LK_None;
248       }
249       Seq.resize(Index + 1, Template);
250     }
251     return Seq[Index];
252   }
253 };
254 }
255 }
256 
257 namespace clang {
258 namespace format {
259 
260 FormatStyle getLLVMStyle() {
261   FormatStyle LLVMStyle;
262   LLVMStyle.Language = FormatStyle::LK_Cpp;
263   LLVMStyle.AccessModifierOffset = -2;
264   LLVMStyle.AlignEscapedNewlinesLeft = false;
265   LLVMStyle.AlignTrailingComments = true;
266   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
267   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
268   LLVMStyle.AllowShortBlocksOnASingleLine = false;
269   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
270   LLVMStyle.AllowShortLoopsOnASingleLine = false;
271   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
272   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
273   LLVMStyle.BinPackParameters = true;
274   LLVMStyle.BreakBeforeBinaryOperators = false;
275   LLVMStyle.BreakBeforeTernaryOperators = true;
276   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
277   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
278   LLVMStyle.ColumnLimit = 80;
279   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
280   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
281   LLVMStyle.ConstructorInitializerIndentWidth = 4;
282   LLVMStyle.ContinuationIndentWidth = 4;
283   LLVMStyle.Cpp11BracedListStyle = true;
284   LLVMStyle.DerivePointerBinding = false;
285   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
286   LLVMStyle.ForEachMacros.push_back("foreach");
287   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
288   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
289   LLVMStyle.IndentCaseLabels = false;
290   LLVMStyle.IndentFunctionDeclarationAfterType = false;
291   LLVMStyle.IndentWidth = 2;
292   LLVMStyle.TabWidth = 8;
293   LLVMStyle.MaxEmptyLinesToKeep = 1;
294   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
295   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
296   LLVMStyle.ObjCSpaceAfterProperty = false;
297   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
298   LLVMStyle.PointerBindsToType = false;
299   LLVMStyle.SpacesBeforeTrailingComments = 1;
300   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
301   LLVMStyle.UseTab = FormatStyle::UT_Never;
302   LLVMStyle.SpacesInParentheses = false;
303   LLVMStyle.SpaceInEmptyParentheses = false;
304   LLVMStyle.SpacesInContainerLiterals = true;
305   LLVMStyle.SpacesInCStyleCastParentheses = false;
306   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
307   LLVMStyle.SpaceBeforeAssignmentOperators = true;
308   LLVMStyle.SpacesInAngles = false;
309 
310   LLVMStyle.PenaltyBreakComment = 300;
311   LLVMStyle.PenaltyBreakFirstLessLess = 120;
312   LLVMStyle.PenaltyBreakString = 1000;
313   LLVMStyle.PenaltyExcessCharacter = 1000000;
314   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
315   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
316 
317   return LLVMStyle;
318 }
319 
320 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
321   FormatStyle GoogleStyle = getLLVMStyle();
322   GoogleStyle.Language = Language;
323 
324   GoogleStyle.AccessModifierOffset = -1;
325   GoogleStyle.AlignEscapedNewlinesLeft = true;
326   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
327   GoogleStyle.AllowShortLoopsOnASingleLine = true;
328   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
329   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
330   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
331   GoogleStyle.DerivePointerBinding = true;
332   GoogleStyle.IndentCaseLabels = true;
333   GoogleStyle.IndentFunctionDeclarationAfterType = true;
334   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
335   GoogleStyle.ObjCSpaceAfterProperty = false;
336   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
337   GoogleStyle.PointerBindsToType = true;
338   GoogleStyle.SpacesBeforeTrailingComments = 2;
339   GoogleStyle.Standard = FormatStyle::LS_Auto;
340 
341   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
342   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
343 
344   if (Language == FormatStyle::LK_JavaScript) {
345     GoogleStyle.BreakBeforeTernaryOperators = false;
346     GoogleStyle.MaxEmptyLinesToKeep = 3;
347     GoogleStyle.SpacesInContainerLiterals = false;
348   } else if (Language == FormatStyle::LK_Proto) {
349     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
350     GoogleStyle.SpacesInContainerLiterals = false;
351   }
352 
353   return GoogleStyle;
354 }
355 
356 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
357   FormatStyle ChromiumStyle = getGoogleStyle(Language);
358   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
359   ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
360   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
361   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
362   ChromiumStyle.BinPackParameters = false;
363   ChromiumStyle.DerivePointerBinding = false;
364   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
365   return ChromiumStyle;
366 }
367 
368 FormatStyle getMozillaStyle() {
369   FormatStyle MozillaStyle = getLLVMStyle();
370   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
371   MozillaStyle.Cpp11BracedListStyle = false;
372   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
373   MozillaStyle.DerivePointerBinding = true;
374   MozillaStyle.IndentCaseLabels = true;
375   MozillaStyle.ObjCSpaceAfterProperty = true;
376   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
377   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
378   MozillaStyle.PointerBindsToType = true;
379   MozillaStyle.Standard = FormatStyle::LS_Cpp03;
380   return MozillaStyle;
381 }
382 
383 FormatStyle getWebKitStyle() {
384   FormatStyle Style = getLLVMStyle();
385   Style.AccessModifierOffset = -4;
386   Style.AlignTrailingComments = false;
387   Style.BreakBeforeBinaryOperators = true;
388   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
389   Style.BreakConstructorInitializersBeforeComma = true;
390   Style.Cpp11BracedListStyle = false;
391   Style.ColumnLimit = 0;
392   Style.IndentWidth = 4;
393   Style.NamespaceIndentation = FormatStyle::NI_Inner;
394   Style.ObjCSpaceAfterProperty = true;
395   Style.PointerBindsToType = true;
396   Style.Standard = FormatStyle::LS_Cpp03;
397   return Style;
398 }
399 
400 FormatStyle getGNUStyle() {
401   FormatStyle Style = getLLVMStyle();
402   Style.BreakBeforeBinaryOperators = true;
403   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
404   Style.BreakBeforeTernaryOperators = true;
405   Style.Cpp11BracedListStyle = false;
406   Style.ColumnLimit = 79;
407   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
408   Style.Standard = FormatStyle::LS_Cpp03;
409   return Style;
410 }
411 
412 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
413                         FormatStyle *Style) {
414   if (Name.equals_lower("llvm")) {
415     *Style = getLLVMStyle();
416   } else if (Name.equals_lower("chromium")) {
417     *Style = getChromiumStyle(Language);
418   } else if (Name.equals_lower("mozilla")) {
419     *Style = getMozillaStyle();
420   } else if (Name.equals_lower("google")) {
421     *Style = getGoogleStyle(Language);
422   } else if (Name.equals_lower("webkit")) {
423     *Style = getWebKitStyle();
424   } else if (Name.equals_lower("gnu")) {
425     *Style = getGNUStyle();
426   } else {
427     return false;
428   }
429 
430   Style->Language = Language;
431   return true;
432 }
433 
434 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
435   assert(Style);
436   FormatStyle::LanguageKind Language = Style->Language;
437   assert(Language != FormatStyle::LK_None);
438   if (Text.trim().empty())
439     return llvm::make_error_code(llvm::errc::invalid_argument);
440 
441   std::vector<FormatStyle> Styles;
442   llvm::yaml::Input Input(Text);
443   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
444   // values for the fields, keys for which are missing from the configuration.
445   // Mapping also uses the context to get the language to find the correct
446   // base style.
447   Input.setContext(Style);
448   Input >> Styles;
449   if (Input.error())
450     return Input.error();
451 
452   for (unsigned i = 0; i < Styles.size(); ++i) {
453     // Ensures that only the first configuration can skip the Language option.
454     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
455       return llvm::make_error_code(llvm::errc::invalid_argument);
456     // Ensure that each language is configured at most once.
457     for (unsigned j = 0; j < i; ++j) {
458       if (Styles[i].Language == Styles[j].Language) {
459         DEBUG(llvm::dbgs()
460               << "Duplicate languages in the config file on positions " << j
461               << " and " << i << "\n");
462         return llvm::make_error_code(llvm::errc::invalid_argument);
463       }
464     }
465   }
466   // Look for a suitable configuration starting from the end, so we can
467   // find the configuration for the specific language first, and the default
468   // configuration (which can only be at slot 0) after it.
469   for (int i = Styles.size() - 1; i >= 0; --i) {
470     if (Styles[i].Language == Language ||
471         Styles[i].Language == FormatStyle::LK_None) {
472       *Style = Styles[i];
473       Style->Language = Language;
474       return llvm::make_error_code(llvm::errc::success);
475     }
476   }
477   return llvm::make_error_code(llvm::errc::not_supported);
478 }
479 
480 std::string configurationAsText(const FormatStyle &Style) {
481   std::string Text;
482   llvm::raw_string_ostream Stream(Text);
483   llvm::yaml::Output Output(Stream);
484   // We use the same mapping method for input and output, so we need a non-const
485   // reference here.
486   FormatStyle NonConstStyle = Style;
487   Output << NonConstStyle;
488   return Stream.str();
489 }
490 
491 namespace {
492 
493 class NoColumnLimitFormatter {
494 public:
495   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
496 
497   /// \brief Formats the line starting at \p State, simply keeping all of the
498   /// input's line breaking decisions.
499   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
500     LineState State =
501         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
502     while (State.NextToken) {
503       bool Newline =
504           Indenter->mustBreak(State) ||
505           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
506       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
507     }
508   }
509 
510 private:
511   ContinuationIndenter *Indenter;
512 };
513 
514 class LineJoiner {
515 public:
516   LineJoiner(const FormatStyle &Style) : Style(Style) {}
517 
518   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
519   unsigned
520   tryFitMultipleLinesInOne(unsigned Indent,
521                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
522                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
523     // We can never merge stuff if there are trailing line comments.
524     const AnnotatedLine *TheLine = *I;
525     if (TheLine->Last->Type == TT_LineComment)
526       return 0;
527 
528     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
529       return 0;
530 
531     unsigned Limit =
532         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
533     // If we already exceed the column limit, we set 'Limit' to 0. The different
534     // tryMerge..() functions can then decide whether to still do merging.
535     Limit = TheLine->Last->TotalLength > Limit
536                 ? 0
537                 : Limit - TheLine->Last->TotalLength;
538 
539     if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
540       return 0;
541 
542     // FIXME: TheLine->Level != 0 might or might not be the right check to do.
543     // If necessary, change to something smarter.
544     bool MergeShortFunctions =
545         Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
546         (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
547          TheLine->Level != 0);
548 
549     if (TheLine->Last->Type == TT_FunctionLBrace &&
550         TheLine->First != TheLine->Last) {
551       return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
552     }
553     if (TheLine->Last->is(tok::l_brace)) {
554       return Style.BreakBeforeBraces == FormatStyle::BS_Attach
555                  ? tryMergeSimpleBlock(I, E, Limit)
556                  : 0;
557     }
558     if (I[1]->First->Type == TT_FunctionLBrace &&
559         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
560       // Check for Limit <= 2 to account for the " {".
561       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
562         return 0;
563       Limit -= 2;
564 
565       unsigned MergedLines = 0;
566       if (MergeShortFunctions) {
567         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
568         // If we managed to merge the block, count the function header, which is
569         // on a separate line.
570         if (MergedLines > 0)
571           ++MergedLines;
572       }
573       return MergedLines;
574     }
575     if (TheLine->First->is(tok::kw_if)) {
576       return Style.AllowShortIfStatementsOnASingleLine
577                  ? tryMergeSimpleControlStatement(I, E, Limit)
578                  : 0;
579     }
580     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
581       return Style.AllowShortLoopsOnASingleLine
582                  ? tryMergeSimpleControlStatement(I, E, Limit)
583                  : 0;
584     }
585     if (TheLine->InPPDirective &&
586         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
587       return tryMergeSimplePPDirective(I, E, Limit);
588     }
589     return 0;
590   }
591 
592 private:
593   unsigned
594   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
595                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
596                             unsigned Limit) {
597     if (Limit == 0)
598       return 0;
599     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
600       return 0;
601     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
602       return 0;
603     if (1 + I[1]->Last->TotalLength > Limit)
604       return 0;
605     return 1;
606   }
607 
608   unsigned tryMergeSimpleControlStatement(
609       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
610       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
611     if (Limit == 0)
612       return 0;
613     if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
614          Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
615         (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
616       return 0;
617     if (I[1]->InPPDirective != (*I)->InPPDirective ||
618         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
619       return 0;
620     Limit = limitConsideringMacros(I + 1, E, Limit);
621     AnnotatedLine &Line = **I;
622     if (Line.Last->isNot(tok::r_paren))
623       return 0;
624     if (1 + I[1]->Last->TotalLength > Limit)
625       return 0;
626     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
627                              tok::kw_while) ||
628         I[1]->First->Type == TT_LineComment)
629       return 0;
630     // Only inline simple if's (no nested if or else).
631     if (I + 2 != E && Line.First->is(tok::kw_if) &&
632         I[2]->First->is(tok::kw_else))
633       return 0;
634     return 1;
635   }
636 
637   unsigned
638   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
639                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
640                       unsigned Limit) {
641     AnnotatedLine &Line = **I;
642 
643     // Don't merge ObjC @ keywords and methods.
644     if (Line.First->isOneOf(tok::at, tok::minus, tok::plus))
645       return 0;
646 
647     // Check that the current line allows merging. This depends on whether we
648     // are in a control flow statements as well as several style flags.
649     if (Line.First->isOneOf(tok::kw_else, tok::kw_case))
650       return 0;
651     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
652                             tok::kw_catch, tok::kw_for, tok::r_brace)) {
653       if (!Style.AllowShortBlocksOnASingleLine)
654         return 0;
655       if (!Style.AllowShortIfStatementsOnASingleLine &&
656           Line.First->is(tok::kw_if))
657         return 0;
658       if (!Style.AllowShortLoopsOnASingleLine &&
659           Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
660         return 0;
661       // FIXME: Consider an option to allow short exception handling clauses on
662       // a single line.
663       if (Line.First->isOneOf(tok::kw_try, tok::kw_catch))
664         return 0;
665     }
666 
667     FormatToken *Tok = I[1]->First;
668     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
669         (Tok->getNextNonComment() == nullptr ||
670          Tok->getNextNonComment()->is(tok::semi))) {
671       // We merge empty blocks even if the line exceeds the column limit.
672       Tok->SpacesRequiredBefore = 0;
673       Tok->CanBreakBefore = true;
674       return 1;
675     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
676       // We don't merge short records.
677       if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct))
678         return 0;
679 
680       // Check that we still have three lines and they fit into the limit.
681       if (I + 2 == E || I[2]->Type == LT_Invalid)
682         return 0;
683       Limit = limitConsideringMacros(I + 2, E, Limit);
684 
685       if (!nextTwoLinesFitInto(I, Limit))
686         return 0;
687 
688       // Second, check that the next line does not contain any braces - if it
689       // does, readability declines when putting it into a single line.
690       if (I[1]->Last->Type == TT_LineComment)
691         return 0;
692       do {
693         if (Tok->isOneOf(tok::l_brace, tok::r_brace) &&
694             !Style.AllowShortBlocksOnASingleLine)
695           return 0;
696         Tok = Tok->Next;
697       } while (Tok);
698 
699       // Last, check that the third line starts with a closing brace.
700       Tok = I[2]->First;
701       if (Tok->isNot(tok::r_brace))
702         return 0;
703 
704       return 2;
705     }
706     return 0;
707   }
708 
709   /// Returns the modified column limit for \p I if it is inside a macro and
710   /// needs a trailing '\'.
711   unsigned
712   limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
713                          SmallVectorImpl<AnnotatedLine *>::const_iterator E,
714                          unsigned Limit) {
715     if (I[0]->InPPDirective && I + 1 != E &&
716         !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
717       return Limit < 2 ? 0 : Limit - 2;
718     }
719     return Limit;
720   }
721 
722   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
723                            unsigned Limit) {
724     if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
725       return false;
726     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
727   }
728 
729   bool containsMustBreak(const AnnotatedLine *Line) {
730     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
731       if (Tok->MustBreakBefore)
732         return true;
733     }
734     return false;
735   }
736 
737   const FormatStyle &Style;
738 };
739 
740 class UnwrappedLineFormatter {
741 public:
742   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
743                          WhitespaceManager *Whitespaces,
744                          const FormatStyle &Style)
745       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
746         Joiner(Style) {}
747 
748   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
749                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
750     // Try to look up already computed penalty in DryRun-mode.
751     std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
752         &Lines, AdditionalIndent);
753     auto CacheIt = PenaltyCache.find(CacheKey);
754     if (DryRun && CacheIt != PenaltyCache.end())
755       return CacheIt->second;
756 
757     assert(!Lines.empty());
758     unsigned Penalty = 0;
759     std::vector<int> IndentForLevel;
760     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
761       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
762     const AnnotatedLine *PreviousLine = nullptr;
763     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
764                                                           E = Lines.end();
765          I != E; ++I) {
766       const AnnotatedLine &TheLine = **I;
767       const FormatToken *FirstTok = TheLine.First;
768       int Offset = getIndentOffset(*FirstTok);
769 
770       // Determine indent and try to merge multiple unwrapped lines.
771       unsigned Indent;
772       if (TheLine.InPPDirective) {
773         Indent = TheLine.Level * Style.IndentWidth;
774       } else {
775         while (IndentForLevel.size() <= TheLine.Level)
776           IndentForLevel.push_back(-1);
777         IndentForLevel.resize(TheLine.Level + 1);
778         Indent = getIndent(IndentForLevel, TheLine.Level);
779       }
780       unsigned LevelIndent = Indent;
781       if (static_cast<int>(Indent) + Offset >= 0)
782         Indent += Offset;
783 
784       // Merge multiple lines if possible.
785       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
786       if (MergedLines > 0 && Style.ColumnLimit == 0) {
787         // Disallow line merging if there is a break at the start of one of the
788         // input lines.
789         for (unsigned i = 0; i < MergedLines; ++i) {
790           if (I[i + 1]->First->NewlinesBefore > 0)
791             MergedLines = 0;
792         }
793       }
794       if (!DryRun) {
795         for (unsigned i = 0; i < MergedLines; ++i) {
796           join(*I[i], *I[i + 1]);
797         }
798       }
799       I += MergedLines;
800 
801       bool FixIndentation =
802           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
803       if (TheLine.First->is(tok::eof)) {
804         if (PreviousLine && PreviousLine->Affected && !DryRun) {
805           // Remove the file's trailing whitespace.
806           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
807           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
808                                          /*IndentLevel=*/0, /*Spaces=*/0,
809                                          /*TargetColumn=*/0);
810         }
811       } else if (TheLine.Type != LT_Invalid &&
812                  (TheLine.Affected || FixIndentation)) {
813         if (FirstTok->WhitespaceRange.isValid()) {
814           if (!DryRun)
815             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
816                              Indent, TheLine.InPPDirective);
817         } else {
818           Indent = LevelIndent = FirstTok->OriginalColumn;
819         }
820 
821         // If everything fits on a single line, just put it there.
822         unsigned ColumnLimit = Style.ColumnLimit;
823         if (I + 1 != E) {
824           AnnotatedLine *NextLine = I[1];
825           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
826             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
827         }
828 
829         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
830           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
831           while (State.NextToken)
832             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
833         } else if (Style.ColumnLimit == 0) {
834           // FIXME: Implement nested blocks for ColumnLimit = 0.
835           NoColumnLimitFormatter Formatter(Indenter);
836           if (!DryRun)
837             Formatter.format(Indent, &TheLine);
838         } else {
839           Penalty += format(TheLine, Indent, DryRun);
840         }
841 
842         if (!TheLine.InPPDirective)
843           IndentForLevel[TheLine.Level] = LevelIndent;
844       } else if (TheLine.ChildrenAffected) {
845         format(TheLine.Children, DryRun);
846       } else {
847         // Format the first token if necessary, and notify the WhitespaceManager
848         // about the unchanged whitespace.
849         for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) {
850           if (Tok == TheLine.First &&
851               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
852             unsigned LevelIndent = Tok->OriginalColumn;
853             if (!DryRun) {
854               // Remove trailing whitespace of the previous line.
855               if ((PreviousLine && PreviousLine->Affected) ||
856                   TheLine.LeadingEmptyLinesAffected) {
857                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
858                                  TheLine.InPPDirective);
859               } else {
860                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
861               }
862             }
863 
864             if (static_cast<int>(LevelIndent) - Offset >= 0)
865               LevelIndent -= Offset;
866             if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
867               IndentForLevel[TheLine.Level] = LevelIndent;
868           } else if (!DryRun) {
869             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
870           }
871         }
872       }
873       if (!DryRun) {
874         for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) {
875           Tok->Finalized = true;
876         }
877       }
878       PreviousLine = *I;
879     }
880     PenaltyCache[CacheKey] = Penalty;
881     return Penalty;
882   }
883 
884 private:
885   /// \brief Formats an \c AnnotatedLine and returns the penalty.
886   ///
887   /// If \p DryRun is \c false, directly applies the changes.
888   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
889                   bool DryRun) {
890     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
891 
892     // If the ObjC method declaration does not fit on a line, we should format
893     // it with one arg per line.
894     if (State.Line->Type == LT_ObjCMethodDecl)
895       State.Stack.back().BreakBeforeParameter = true;
896 
897     // Find best solution in solution space.
898     return analyzeSolutionSpace(State, DryRun);
899   }
900 
901   /// \brief An edge in the solution space from \c Previous->State to \c State,
902   /// inserting a newline dependent on the \c NewLine.
903   struct StateNode {
904     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
905         : State(State), NewLine(NewLine), Previous(Previous) {}
906     LineState State;
907     bool NewLine;
908     StateNode *Previous;
909   };
910 
911   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
912   ///
913   /// In case of equal penalties, we want to prefer states that were inserted
914   /// first. During state generation we make sure that we insert states first
915   /// that break the line as late as possible.
916   typedef std::pair<unsigned, unsigned> OrderedPenalty;
917 
918   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
919   /// \c State has the given \c OrderedPenalty.
920   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
921 
922   /// \brief The BFS queue type.
923   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
924                               std::greater<QueueItem> > QueueType;
925 
926   /// \brief Get the offset of the line relatively to the level.
927   ///
928   /// For example, 'public:' labels in classes are offset by 1 or 2
929   /// characters to the left from their level.
930   int getIndentOffset(const FormatToken &RootToken) {
931     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
932       return Style.AccessModifierOffset;
933     return 0;
934   }
935 
936   /// \brief Add a new line and the required indent before the first Token
937   /// of the \c UnwrappedLine if there was no structural parsing error.
938   void formatFirstToken(FormatToken &RootToken,
939                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
940                         unsigned Indent, bool InPPDirective) {
941     unsigned Newlines =
942         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
943     // Remove empty lines before "}" where applicable.
944     if (RootToken.is(tok::r_brace) &&
945         (!RootToken.Next ||
946          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
947       Newlines = std::min(Newlines, 1u);
948     if (Newlines == 0 && !RootToken.IsFirst)
949       Newlines = 1;
950     if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
951       Newlines = 0;
952 
953     // Remove empty lines after "{".
954     if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
955         PreviousLine->Last->is(tok::l_brace) &&
956         PreviousLine->First->isNot(tok::kw_namespace))
957       Newlines = 1;
958 
959     // Insert extra new line before access specifiers.
960     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
961         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
962       ++Newlines;
963 
964     // Remove empty lines after access specifiers.
965     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
966       Newlines = std::min(1u, Newlines);
967 
968     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
969                                    Indent, InPPDirective &&
970                                                !RootToken.HasUnescapedNewline);
971   }
972 
973   /// \brief Get the indent of \p Level from \p IndentForLevel.
974   ///
975   /// \p IndentForLevel must contain the indent for the level \c l
976   /// at \p IndentForLevel[l], or a value < 0 if the indent for
977   /// that level is unknown.
978   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
979     if (IndentForLevel[Level] != -1)
980       return IndentForLevel[Level];
981     if (Level == 0)
982       return 0;
983     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
984   }
985 
986   void join(AnnotatedLine &A, const AnnotatedLine &B) {
987     assert(!A.Last->Next);
988     assert(!B.First->Previous);
989     if (B.Affected)
990       A.Affected = true;
991     A.Last->Next = B.First;
992     B.First->Previous = A.Last;
993     B.First->CanBreakBefore = true;
994     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
995     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
996       Tok->TotalLength += LengthA;
997       A.Last = Tok;
998     }
999   }
1000 
1001   unsigned getColumnLimit(bool InPPDirective) const {
1002     // In preprocessor directives reserve two chars for trailing " \"
1003     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
1004   }
1005 
1006   /// \brief Analyze the entire solution space starting from \p InitialState.
1007   ///
1008   /// This implements a variant of Dijkstra's algorithm on the graph that spans
1009   /// the solution space (\c LineStates are the nodes). The algorithm tries to
1010   /// find the shortest path (the one with lowest penalty) from \p InitialState
1011   /// to a state where all tokens are placed. Returns the penalty.
1012   ///
1013   /// If \p DryRun is \c false, directly applies the changes.
1014   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
1015     std::set<LineState> Seen;
1016 
1017     // Increasing count of \c StateNode items we have created. This is used to
1018     // create a deterministic order independent of the container.
1019     unsigned Count = 0;
1020     QueueType Queue;
1021 
1022     // Insert start element into queue.
1023     StateNode *Node =
1024         new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
1025     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
1026     ++Count;
1027 
1028     unsigned Penalty = 0;
1029 
1030     // While not empty, take first element and follow edges.
1031     while (!Queue.empty()) {
1032       Penalty = Queue.top().first.first;
1033       StateNode *Node = Queue.top().second;
1034       if (!Node->State.NextToken) {
1035         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
1036         break;
1037       }
1038       Queue.pop();
1039 
1040       // Cut off the analysis of certain solutions if the analysis gets too
1041       // complex. See description of IgnoreStackForComparison.
1042       if (Count > 10000)
1043         Node->State.IgnoreStackForComparison = true;
1044 
1045       if (!Seen.insert(Node->State).second)
1046         // State already examined with lower penalty.
1047         continue;
1048 
1049       FormatDecision LastFormat = Node->State.NextToken->Decision;
1050       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
1051         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
1052       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
1053         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
1054     }
1055 
1056     if (Queue.empty()) {
1057       // We were unable to find a solution, do nothing.
1058       // FIXME: Add diagnostic?
1059       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
1060       return 0;
1061     }
1062 
1063     // Reconstruct the solution.
1064     if (!DryRun)
1065       reconstructPath(InitialState, Queue.top().second);
1066 
1067     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
1068     DEBUG(llvm::dbgs() << "---\n");
1069 
1070     return Penalty;
1071   }
1072 
1073   void reconstructPath(LineState &State, StateNode *Current) {
1074     std::deque<StateNode *> Path;
1075     // We do not need a break before the initial token.
1076     while (Current->Previous) {
1077       Path.push_front(Current);
1078       Current = Current->Previous;
1079     }
1080     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
1081          I != E; ++I) {
1082       unsigned Penalty = 0;
1083       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
1084       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
1085 
1086       DEBUG({
1087         if ((*I)->NewLine) {
1088           llvm::dbgs() << "Penalty for placing "
1089                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1090                        << Penalty << "\n";
1091         }
1092       });
1093     }
1094   }
1095 
1096   /// \brief Add the following state to the analysis queue \c Queue.
1097   ///
1098   /// Assume the current state is \p PreviousNode and has been reached with a
1099   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1100   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1101                            bool NewLine, unsigned *Count, QueueType *Queue) {
1102     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1103       return;
1104     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1105       return;
1106 
1107     StateNode *Node = new (Allocator.Allocate())
1108         StateNode(PreviousNode->State, NewLine, PreviousNode);
1109     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1110       return;
1111 
1112     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1113 
1114     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1115     ++(*Count);
1116   }
1117 
1118   /// \brief If the \p State's next token is an r_brace closing a nested block,
1119   /// format the nested block before it.
1120   ///
1121   /// Returns \c true if all children could be placed successfully and adapts
1122   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
1123   /// creates changes using \c Whitespaces.
1124   ///
1125   /// The crucial idea here is that children always get formatted upon
1126   /// encountering the closing brace right after the nested block. Now, if we
1127   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
1128   /// \c false), the entire block has to be kept on the same line (which is only
1129   /// possible if it fits on the line, only contains a single statement, etc.
1130   ///
1131   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1132   /// break after the "{", format all lines with correct indentation and the put
1133   /// the closing "}" on yet another new line.
1134   ///
1135   /// This enables us to keep the simple structure of the
1136   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1137   /// break or don't break.
1138   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1139                       unsigned &Penalty) {
1140     FormatToken &Previous = *State.NextToken->Previous;
1141     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1142     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1143         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1144       // The previous token does not open a block. Nothing to do. We don't
1145       // assert so that we can simply call this function for all tokens.
1146       return true;
1147 
1148     if (NewLine) {
1149       int AdditionalIndent = State.Stack.back().Indent -
1150                              Previous.Children[0]->Level * Style.IndentWidth;
1151       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1152                         /*FixBadIndentation=*/true);
1153       return true;
1154     }
1155 
1156     // Cannot merge multiple statements into a single line.
1157     if (Previous.Children.size() > 1)
1158       return false;
1159 
1160     // Cannot merge into one line if this line ends on a comment.
1161     if (Previous.is(tok::comment))
1162       return false;
1163 
1164     // We can't put the closing "}" on a line with a trailing comment.
1165     if (Previous.Children[0]->Last->isTrailingComment())
1166       return false;
1167 
1168     // If the child line exceeds the column limit, we wouldn't want to merge it.
1169     // We add +2 for the trailing " }".
1170     if (Style.ColumnLimit > 0 &&
1171         Previous.Children[0]->Last->TotalLength + State.Column + 2 >
1172             Style.ColumnLimit)
1173       return false;
1174 
1175     if (!DryRun) {
1176       Whitespaces->replaceWhitespace(
1177           *Previous.Children[0]->First,
1178           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1179           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1180     }
1181     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1182 
1183     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1184     return true;
1185   }
1186 
1187   ContinuationIndenter *Indenter;
1188   WhitespaceManager *Whitespaces;
1189   FormatStyle Style;
1190   LineJoiner Joiner;
1191 
1192   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1193 
1194   // Cache to store the penalty of formatting a vector of AnnotatedLines
1195   // starting from a specific additional offset. Improves performance if there
1196   // are many nested blocks.
1197   std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>,
1198            unsigned> PenaltyCache;
1199 };
1200 
1201 class FormatTokenLexer {
1202 public:
1203   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1204                    encoding::Encoding Encoding)
1205       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
1206         Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr),
1207         Style(Style), IdentTable(getFormattingLangOpts()), Encoding(Encoding),
1208         FirstInLineIndex(0) {
1209     Lex.SetKeepWhitespaceMode(true);
1210 
1211     for (const std::string &ForEachMacro : Style.ForEachMacros)
1212       ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
1213     std::sort(ForEachMacros.begin(), ForEachMacros.end());
1214   }
1215 
1216   ArrayRef<FormatToken *> lex() {
1217     assert(Tokens.empty());
1218     assert(FirstInLineIndex == 0);
1219     do {
1220       Tokens.push_back(getNextToken());
1221       tryMergePreviousTokens();
1222       if (Tokens.back()->NewlinesBefore > 0)
1223         FirstInLineIndex = Tokens.size() - 1;
1224     } while (Tokens.back()->Tok.isNot(tok::eof));
1225     return Tokens;
1226   }
1227 
1228   IdentifierTable &getIdentTable() { return IdentTable; }
1229 
1230 private:
1231   void tryMergePreviousTokens() {
1232     if (tryMerge_TMacro())
1233       return;
1234     if (tryMergeConflictMarkers())
1235       return;
1236 
1237     if (Style.Language == FormatStyle::LK_JavaScript) {
1238       if (tryMergeEscapeSequence())
1239         return;
1240       if (tryMergeJSRegexLiteral())
1241         return;
1242 
1243       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1244       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1245       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1246                                                tok::greaterequal };
1247       static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater };
1248       // FIXME: We probably need to change token type to mimic operator with the
1249       // correct priority.
1250       if (tryMergeTokens(JSIdentity))
1251         return;
1252       if (tryMergeTokens(JSNotIdentity))
1253         return;
1254       if (tryMergeTokens(JSShiftEqual))
1255         return;
1256       if (tryMergeTokens(JSRightArrow))
1257         return;
1258     }
1259   }
1260 
1261   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1262     if (Tokens.size() < Kinds.size())
1263       return false;
1264 
1265     SmallVectorImpl<FormatToken *>::const_iterator First =
1266         Tokens.end() - Kinds.size();
1267     if (!First[0]->is(Kinds[0]))
1268       return false;
1269     unsigned AddLength = 0;
1270     for (unsigned i = 1; i < Kinds.size(); ++i) {
1271       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1272                                          First[i]->WhitespaceRange.getEnd())
1273         return false;
1274       AddLength += First[i]->TokenText.size();
1275     }
1276     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1277     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1278                                     First[0]->TokenText.size() + AddLength);
1279     First[0]->ColumnWidth += AddLength;
1280     return true;
1281   }
1282 
1283   // Tries to merge an escape sequence, i.e. a "\\" and the following
1284   // character. Use e.g. inside JavaScript regex literals.
1285   bool tryMergeEscapeSequence() {
1286     if (Tokens.size() < 2)
1287       return false;
1288     FormatToken *Previous = Tokens[Tokens.size() - 2];
1289     if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\" ||
1290         Tokens.back()->NewlinesBefore != 0)
1291       return false;
1292     Previous->ColumnWidth += Tokens.back()->ColumnWidth;
1293     StringRef Text = Previous->TokenText;
1294     Previous->TokenText =
1295         StringRef(Text.data(), Text.size() + Tokens.back()->TokenText.size());
1296     Tokens.resize(Tokens.size() - 1);
1297     return true;
1298   }
1299 
1300   // Try to determine whether the current token ends a JavaScript regex literal.
1301   // We heuristically assume that this is a regex literal if we find two
1302   // unescaped slashes on a line and the token before the first slash is one of
1303   // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
1304   // a division.
1305   bool tryMergeJSRegexLiteral() {
1306     if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) ||
1307         (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
1308          Tokens[Tokens.size() - 2]->TokenText == "\\"))
1309       return false;
1310     unsigned TokenCount = 0;
1311     unsigned LastColumn = Tokens.back()->OriginalColumn;
1312     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
1313       ++TokenCount;
1314       if (I[0]->is(tok::slash) && I + 1 != E &&
1315           (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
1316                          tok::exclaim, tok::l_square, tok::colon, tok::comma,
1317                          tok::question, tok::kw_return) ||
1318            I[1]->isBinaryOperator())) {
1319         Tokens.resize(Tokens.size() - TokenCount);
1320         Tokens.back()->Tok.setKind(tok::unknown);
1321         Tokens.back()->Type = TT_RegexLiteral;
1322         Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
1323         return true;
1324       }
1325 
1326       // There can't be a newline inside a regex literal.
1327       if (I[0]->NewlinesBefore > 0)
1328         return false;
1329     }
1330     return false;
1331   }
1332 
1333   bool tryMerge_TMacro() {
1334     if (Tokens.size() < 4)
1335       return false;
1336     FormatToken *Last = Tokens.back();
1337     if (!Last->is(tok::r_paren))
1338       return false;
1339 
1340     FormatToken *String = Tokens[Tokens.size() - 2];
1341     if (!String->is(tok::string_literal) || String->IsMultiline)
1342       return false;
1343 
1344     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1345       return false;
1346 
1347     FormatToken *Macro = Tokens[Tokens.size() - 4];
1348     if (Macro->TokenText != "_T")
1349       return false;
1350 
1351     const char *Start = Macro->TokenText.data();
1352     const char *End = Last->TokenText.data() + Last->TokenText.size();
1353     String->TokenText = StringRef(Start, End - Start);
1354     String->IsFirst = Macro->IsFirst;
1355     String->LastNewlineOffset = Macro->LastNewlineOffset;
1356     String->WhitespaceRange = Macro->WhitespaceRange;
1357     String->OriginalColumn = Macro->OriginalColumn;
1358     String->ColumnWidth = encoding::columnWidthWithTabs(
1359         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1360 
1361     Tokens.pop_back();
1362     Tokens.pop_back();
1363     Tokens.pop_back();
1364     Tokens.back() = String;
1365     return true;
1366   }
1367 
1368   bool tryMergeConflictMarkers() {
1369     if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1370       return false;
1371 
1372     // Conflict lines look like:
1373     // <marker> <text from the vcs>
1374     // For example:
1375     // >>>>>>> /file/in/file/system at revision 1234
1376     //
1377     // We merge all tokens in a line that starts with a conflict marker
1378     // into a single token with a special token type that the unwrapped line
1379     // parser will use to correctly rebuild the underlying code.
1380 
1381     FileID ID;
1382     // Get the position of the first token in the line.
1383     unsigned FirstInLineOffset;
1384     std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
1385         Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1386     StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
1387     // Calculate the offset of the start of the current line.
1388     auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
1389     if (LineOffset == StringRef::npos) {
1390       LineOffset = 0;
1391     } else {
1392       ++LineOffset;
1393     }
1394 
1395     auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
1396     StringRef LineStart;
1397     if (FirstSpace == StringRef::npos) {
1398       LineStart = Buffer.substr(LineOffset);
1399     } else {
1400       LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1401     }
1402 
1403     TokenType Type = TT_Unknown;
1404     if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
1405       Type = TT_ConflictStart;
1406     } else if (LineStart == "|||||||" || LineStart == "=======" ||
1407                LineStart == "====") {
1408       Type = TT_ConflictAlternative;
1409     } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1410       Type = TT_ConflictEnd;
1411     }
1412 
1413     if (Type != TT_Unknown) {
1414       FormatToken *Next = Tokens.back();
1415 
1416       Tokens.resize(FirstInLineIndex + 1);
1417       // We do not need to build a complete token here, as we will skip it
1418       // during parsing anyway (as we must not touch whitespace around conflict
1419       // markers).
1420       Tokens.back()->Type = Type;
1421       Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1422 
1423       Tokens.push_back(Next);
1424       return true;
1425     }
1426 
1427     return false;
1428   }
1429 
1430   FormatToken *getNextToken() {
1431     if (GreaterStashed) {
1432       // Create a synthesized second '>' token.
1433       // FIXME: Increment Column and set OriginalColumn.
1434       Token Greater = FormatTok->Tok;
1435       FormatTok = new (Allocator.Allocate()) FormatToken;
1436       FormatTok->Tok = Greater;
1437       SourceLocation GreaterLocation =
1438           FormatTok->Tok.getLocation().getLocWithOffset(1);
1439       FormatTok->WhitespaceRange =
1440           SourceRange(GreaterLocation, GreaterLocation);
1441       FormatTok->TokenText = ">";
1442       FormatTok->ColumnWidth = 1;
1443       GreaterStashed = false;
1444       return FormatTok;
1445     }
1446 
1447     FormatTok = new (Allocator.Allocate()) FormatToken;
1448     readRawToken(*FormatTok);
1449     SourceLocation WhitespaceStart =
1450         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1451     FormatTok->IsFirst = IsFirstToken;
1452     IsFirstToken = false;
1453 
1454     // Consume and record whitespace until we find a significant token.
1455     unsigned WhitespaceLength = TrailingWhitespace;
1456     while (FormatTok->Tok.is(tok::unknown)) {
1457       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1458         switch (FormatTok->TokenText[i]) {
1459         case '\n':
1460           ++FormatTok->NewlinesBefore;
1461           // FIXME: This is technically incorrect, as it could also
1462           // be a literal backslash at the end of the line.
1463           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1464                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1465                           FormatTok->TokenText[i - 2] != '\\')))
1466             FormatTok->HasUnescapedNewline = true;
1467           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1468           Column = 0;
1469           break;
1470         case '\r':
1471         case '\f':
1472         case '\v':
1473           Column = 0;
1474           break;
1475         case ' ':
1476           ++Column;
1477           break;
1478         case '\t':
1479           Column += Style.TabWidth - Column % Style.TabWidth;
1480           break;
1481         case '\\':
1482           ++Column;
1483           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1484                              FormatTok->TokenText[i + 1] != '\n'))
1485             FormatTok->Type = TT_ImplicitStringLiteral;
1486           break;
1487         default:
1488           FormatTok->Type = TT_ImplicitStringLiteral;
1489           ++Column;
1490           break;
1491         }
1492       }
1493 
1494       if (FormatTok->Type == TT_ImplicitStringLiteral)
1495         break;
1496       WhitespaceLength += FormatTok->Tok.getLength();
1497 
1498       readRawToken(*FormatTok);
1499     }
1500 
1501     // In case the token starts with escaped newlines, we want to
1502     // take them into account as whitespace - this pattern is quite frequent
1503     // in macro definitions.
1504     // FIXME: Add a more explicit test.
1505     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1506            FormatTok->TokenText[1] == '\n') {
1507       ++FormatTok->NewlinesBefore;
1508       WhitespaceLength += 2;
1509       Column = 0;
1510       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1511     }
1512 
1513     FormatTok->WhitespaceRange = SourceRange(
1514         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1515 
1516     FormatTok->OriginalColumn = Column;
1517 
1518     TrailingWhitespace = 0;
1519     if (FormatTok->Tok.is(tok::comment)) {
1520       // FIXME: Add the trimmed whitespace to Column.
1521       StringRef UntrimmedText = FormatTok->TokenText;
1522       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1523       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1524     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1525       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1526       FormatTok->Tok.setIdentifierInfo(&Info);
1527       FormatTok->Tok.setKind(Info.getTokenID());
1528     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1529       FormatTok->Tok.setKind(tok::greater);
1530       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1531       GreaterStashed = true;
1532     }
1533 
1534     // Now FormatTok is the next non-whitespace token.
1535 
1536     StringRef Text = FormatTok->TokenText;
1537     size_t FirstNewlinePos = Text.find('\n');
1538     if (FirstNewlinePos == StringRef::npos) {
1539       // FIXME: ColumnWidth actually depends on the start column, we need to
1540       // take this into account when the token is moved.
1541       FormatTok->ColumnWidth =
1542           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1543       Column += FormatTok->ColumnWidth;
1544     } else {
1545       FormatTok->IsMultiline = true;
1546       // FIXME: ColumnWidth actually depends on the start column, we need to
1547       // take this into account when the token is moved.
1548       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1549           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1550 
1551       // The last line of the token always starts in column 0.
1552       // Thus, the length can be precomputed even in the presence of tabs.
1553       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1554           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1555           Encoding);
1556       Column = FormatTok->LastLineColumnWidth;
1557     }
1558 
1559     FormatTok->IsForEachMacro =
1560         std::binary_search(ForEachMacros.begin(), ForEachMacros.end(),
1561                            FormatTok->Tok.getIdentifierInfo());
1562 
1563     return FormatTok;
1564   }
1565 
1566   FormatToken *FormatTok;
1567   bool IsFirstToken;
1568   bool GreaterStashed;
1569   unsigned Column;
1570   unsigned TrailingWhitespace;
1571   Lexer &Lex;
1572   SourceManager &SourceMgr;
1573   FormatStyle &Style;
1574   IdentifierTable IdentTable;
1575   encoding::Encoding Encoding;
1576   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1577   // Index (in 'Tokens') of the last token that starts a new line.
1578   unsigned FirstInLineIndex;
1579   SmallVector<FormatToken *, 16> Tokens;
1580   SmallVector<IdentifierInfo *, 8> ForEachMacros;
1581 
1582   void readRawToken(FormatToken &Tok) {
1583     Lex.LexFromRawLexer(Tok.Tok);
1584     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1585                               Tok.Tok.getLength());
1586     // For formatting, treat unterminated string literals like normal string
1587     // literals.
1588     if (Tok.is(tok::unknown)) {
1589       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1590         Tok.Tok.setKind(tok::string_literal);
1591         Tok.IsUnterminatedLiteral = true;
1592       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1593                  Tok.TokenText == "''") {
1594         Tok.Tok.setKind(tok::char_constant);
1595       }
1596     }
1597   }
1598 };
1599 
1600 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1601   switch (Language) {
1602   case FormatStyle::LK_Cpp:
1603     return "C++";
1604   case FormatStyle::LK_JavaScript:
1605     return "JavaScript";
1606   case FormatStyle::LK_Proto:
1607     return "Proto";
1608   default:
1609     return "Unknown";
1610   }
1611 }
1612 
1613 class Formatter : public UnwrappedLineConsumer {
1614 public:
1615   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1616             const std::vector<CharSourceRange> &Ranges)
1617       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1618         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1619         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1620         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1621     DEBUG(llvm::dbgs() << "File encoding: "
1622                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1623                                                                : "unknown")
1624                        << "\n");
1625     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1626                        << "\n");
1627   }
1628 
1629   tooling::Replacements format() {
1630     tooling::Replacements Result;
1631     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1632 
1633     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1634     bool StructuralError = Parser.parse();
1635     assert(UnwrappedLines.rbegin()->empty());
1636     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1637          ++Run) {
1638       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1639       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1640       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1641         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1642       }
1643       tooling::Replacements RunResult =
1644           format(AnnotatedLines, StructuralError, Tokens);
1645       DEBUG({
1646         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1647         for (tooling::Replacements::iterator I = RunResult.begin(),
1648                                              E = RunResult.end();
1649              I != E; ++I) {
1650           llvm::dbgs() << I->toString() << "\n";
1651         }
1652       });
1653       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1654         delete AnnotatedLines[i];
1655       }
1656       Result.insert(RunResult.begin(), RunResult.end());
1657       Whitespaces.reset();
1658     }
1659     return Result;
1660   }
1661 
1662   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1663                                bool StructuralError, FormatTokenLexer &Tokens) {
1664     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1665     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1666       Annotator.annotate(*AnnotatedLines[i]);
1667     }
1668     deriveLocalStyle(AnnotatedLines);
1669     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1670       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1671     }
1672     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1673 
1674     Annotator.setCommentLineLevels(AnnotatedLines);
1675     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1676                                   BinPackInconclusiveFunctions);
1677     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1678     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1679     return Whitespaces.generateReplacements();
1680   }
1681 
1682 private:
1683   // Determines which lines are affected by the SourceRanges given as input.
1684   // Returns \c true if at least one line between I and E or one of their
1685   // children is affected.
1686   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1687                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1688     bool SomeLineAffected = false;
1689     const AnnotatedLine *PreviousLine = nullptr;
1690     while (I != E) {
1691       AnnotatedLine *Line = *I;
1692       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1693 
1694       // If a line is part of a preprocessor directive, it needs to be formatted
1695       // if any token within the directive is affected.
1696       if (Line->InPPDirective) {
1697         FormatToken *Last = Line->Last;
1698         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1699         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1700           Last = (*PPEnd)->Last;
1701           ++PPEnd;
1702         }
1703 
1704         if (affectsTokenRange(*Line->First, *Last,
1705                               /*IncludeLeadingNewlines=*/false)) {
1706           SomeLineAffected = true;
1707           markAllAsAffected(I, PPEnd);
1708         }
1709         I = PPEnd;
1710         continue;
1711       }
1712 
1713       if (nonPPLineAffected(Line, PreviousLine))
1714         SomeLineAffected = true;
1715 
1716       PreviousLine = Line;
1717       ++I;
1718     }
1719     return SomeLineAffected;
1720   }
1721 
1722   // Determines whether 'Line' is affected by the SourceRanges given as input.
1723   // Returns \c true if line or one if its children is affected.
1724   bool nonPPLineAffected(AnnotatedLine *Line,
1725                          const AnnotatedLine *PreviousLine) {
1726     bool SomeLineAffected = false;
1727     Line->ChildrenAffected =
1728         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1729     if (Line->ChildrenAffected)
1730       SomeLineAffected = true;
1731 
1732     // Stores whether one of the line's tokens is directly affected.
1733     bool SomeTokenAffected = false;
1734     // Stores whether we need to look at the leading newlines of the next token
1735     // in order to determine whether it was affected.
1736     bool IncludeLeadingNewlines = false;
1737 
1738     // Stores whether the first child line of any of this line's tokens is
1739     // affected.
1740     bool SomeFirstChildAffected = false;
1741 
1742     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1743       // Determine whether 'Tok' was affected.
1744       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1745         SomeTokenAffected = true;
1746 
1747       // Determine whether the first child of 'Tok' was affected.
1748       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1749         SomeFirstChildAffected = true;
1750 
1751       IncludeLeadingNewlines = Tok->Children.empty();
1752     }
1753 
1754     // Was this line moved, i.e. has it previously been on the same line as an
1755     // affected line?
1756     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1757                      Line->First->NewlinesBefore == 0;
1758 
1759     bool IsContinuedComment =
1760         Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1761         Line->First->NewlinesBefore < 2 && PreviousLine &&
1762         PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1763 
1764     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1765         IsContinuedComment) {
1766       Line->Affected = true;
1767       SomeLineAffected = true;
1768     }
1769     return SomeLineAffected;
1770   }
1771 
1772   // Marks all lines between I and E as well as all their children as affected.
1773   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1774                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1775     while (I != E) {
1776       (*I)->Affected = true;
1777       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1778       ++I;
1779     }
1780   }
1781 
1782   // Returns true if the range from 'First' to 'Last' intersects with one of the
1783   // input ranges.
1784   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1785                          bool IncludeLeadingNewlines) {
1786     SourceLocation Start = First.WhitespaceRange.getBegin();
1787     if (!IncludeLeadingNewlines)
1788       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1789     SourceLocation End = Last.getStartOfNonWhitespace();
1790     if (Last.TokenText.size() > 0)
1791       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1792     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1793     return affectsCharSourceRange(Range);
1794   }
1795 
1796   // Returns true if one of the input ranges intersect the leading empty lines
1797   // before 'Tok'.
1798   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1799     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1800         Tok.WhitespaceRange.getBegin(),
1801         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1802     return affectsCharSourceRange(EmptyLineRange);
1803   }
1804 
1805   // Returns true if 'Range' intersects with one of the input ranges.
1806   bool affectsCharSourceRange(const CharSourceRange &Range) {
1807     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1808                                                           E = Ranges.end();
1809          I != E; ++I) {
1810       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1811           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1812         return true;
1813     }
1814     return false;
1815   }
1816 
1817   static bool inputUsesCRLF(StringRef Text) {
1818     return Text.count('\r') * 2 > Text.count('\n');
1819   }
1820 
1821   void
1822   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1823     unsigned CountBoundToVariable = 0;
1824     unsigned CountBoundToType = 0;
1825     bool HasCpp03IncompatibleFormat = false;
1826     bool HasBinPackedFunction = false;
1827     bool HasOnePerLineFunction = false;
1828     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1829       if (!AnnotatedLines[i]->First->Next)
1830         continue;
1831       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1832       while (Tok->Next) {
1833         if (Tok->Type == TT_PointerOrReference) {
1834           bool SpacesBefore =
1835               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1836           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1837                              Tok->Next->WhitespaceRange.getEnd();
1838           if (SpacesBefore && !SpacesAfter)
1839             ++CountBoundToVariable;
1840           else if (!SpacesBefore && SpacesAfter)
1841             ++CountBoundToType;
1842         }
1843 
1844         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1845           if (Tok->is(tok::coloncolon) &&
1846               Tok->Previous->Type == TT_TemplateOpener)
1847             HasCpp03IncompatibleFormat = true;
1848           if (Tok->Type == TT_TemplateCloser &&
1849               Tok->Previous->Type == TT_TemplateCloser)
1850             HasCpp03IncompatibleFormat = true;
1851         }
1852 
1853         if (Tok->PackingKind == PPK_BinPacked)
1854           HasBinPackedFunction = true;
1855         if (Tok->PackingKind == PPK_OnePerLine)
1856           HasOnePerLineFunction = true;
1857 
1858         Tok = Tok->Next;
1859       }
1860     }
1861     if (Style.DerivePointerBinding) {
1862       if (CountBoundToType > CountBoundToVariable)
1863         Style.PointerBindsToType = true;
1864       else if (CountBoundToType < CountBoundToVariable)
1865         Style.PointerBindsToType = false;
1866     }
1867     if (Style.Standard == FormatStyle::LS_Auto) {
1868       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1869                                                   : FormatStyle::LS_Cpp03;
1870     }
1871     BinPackInconclusiveFunctions =
1872         HasBinPackedFunction || !HasOnePerLineFunction;
1873   }
1874 
1875   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1876     assert(!UnwrappedLines.empty());
1877     UnwrappedLines.back().push_back(TheLine);
1878   }
1879 
1880   void finishRun() override {
1881     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1882   }
1883 
1884   FormatStyle Style;
1885   Lexer &Lex;
1886   SourceManager &SourceMgr;
1887   WhitespaceManager Whitespaces;
1888   SmallVector<CharSourceRange, 8> Ranges;
1889   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1890 
1891   encoding::Encoding Encoding;
1892   bool BinPackInconclusiveFunctions;
1893 };
1894 
1895 } // end anonymous namespace
1896 
1897 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1898                                SourceManager &SourceMgr,
1899                                std::vector<CharSourceRange> Ranges) {
1900   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1901   return formatter.format();
1902 }
1903 
1904 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1905                                std::vector<tooling::Range> Ranges,
1906                                StringRef FileName) {
1907   FileManager Files((FileSystemOptions()));
1908   DiagnosticsEngine Diagnostics(
1909       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1910       new DiagnosticOptions);
1911   SourceManager SourceMgr(Diagnostics, Files);
1912   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1913   const clang::FileEntry *Entry =
1914       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1915   SourceMgr.overrideFileContents(Entry, Buf);
1916   FileID ID =
1917       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1918   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1919             getFormattingLangOpts(Style.Standard));
1920   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1921   std::vector<CharSourceRange> CharRanges;
1922   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1923     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1924     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1925     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1926   }
1927   return reformat(Style, Lex, SourceMgr, CharRanges);
1928 }
1929 
1930 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1931   LangOptions LangOpts;
1932   LangOpts.CPlusPlus = 1;
1933   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1934   LangOpts.CPlusPlus1y = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1935   LangOpts.LineComment = 1;
1936   LangOpts.CXXOperatorNames = 1;
1937   LangOpts.Bool = 1;
1938   LangOpts.ObjC1 = 1;
1939   LangOpts.ObjC2 = 1;
1940   return LangOpts;
1941 }
1942 
1943 const char *StyleOptionHelpDescription =
1944     "Coding style, currently supports:\n"
1945     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1946     "Use -style=file to load style configuration from\n"
1947     ".clang-format file located in one of the parent\n"
1948     "directories of the source file (or current\n"
1949     "directory for stdin).\n"
1950     "Use -style=\"{key: value, ...}\" to set specific\n"
1951     "parameters, e.g.:\n"
1952     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1953 
1954 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1955   if (FileName.endswith_lower(".js")) {
1956     return FormatStyle::LK_JavaScript;
1957   } else if (FileName.endswith_lower(".proto") ||
1958              FileName.endswith_lower(".protodevel")) {
1959     return FormatStyle::LK_Proto;
1960   }
1961   return FormatStyle::LK_Cpp;
1962 }
1963 
1964 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1965                      StringRef FallbackStyle) {
1966   FormatStyle Style = getLLVMStyle();
1967   Style.Language = getLanguageByFileName(FileName);
1968   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1969     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1970                  << "\" using LLVM style\n";
1971     return Style;
1972   }
1973 
1974   if (StyleName.startswith("{")) {
1975     // Parse YAML/JSON style from the command line.
1976     if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1977       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1978                    << FallbackStyle << " style\n";
1979     }
1980     return Style;
1981   }
1982 
1983   if (!StyleName.equals_lower("file")) {
1984     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1985       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1986                    << " style\n";
1987     return Style;
1988   }
1989 
1990   // Look for .clang-format/_clang-format file in the file's parent directories.
1991   SmallString<128> UnsuitableConfigFiles;
1992   SmallString<128> Path(FileName);
1993   llvm::sys::fs::make_absolute(Path);
1994   for (StringRef Directory = Path; !Directory.empty();
1995        Directory = llvm::sys::path::parent_path(Directory)) {
1996     if (!llvm::sys::fs::is_directory(Directory))
1997       continue;
1998     SmallString<128> ConfigFile(Directory);
1999 
2000     llvm::sys::path::append(ConfigFile, ".clang-format");
2001     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2002     bool IsFile = false;
2003     // Ignore errors from is_regular_file: we only need to know if we can read
2004     // the file or not.
2005     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2006 
2007     if (!IsFile) {
2008       // Try _clang-format too, since dotfiles are not commonly used on Windows.
2009       ConfigFile = Directory;
2010       llvm::sys::path::append(ConfigFile, "_clang-format");
2011       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2012       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2013     }
2014 
2015     if (IsFile) {
2016       std::unique_ptr<llvm::MemoryBuffer> Text;
2017       if (llvm::error_code ec =
2018               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
2019         llvm::errs() << ec.message() << "\n";
2020         break;
2021       }
2022       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
2023         if (ec == llvm::errc::not_supported) {
2024           if (!UnsuitableConfigFiles.empty())
2025             UnsuitableConfigFiles.append(", ");
2026           UnsuitableConfigFiles.append(ConfigFile);
2027           continue;
2028         }
2029         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
2030                      << "\n";
2031         break;
2032       }
2033       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2034       return Style;
2035     }
2036   }
2037   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
2038                << " style\n";
2039   if (!UnsuitableConfigFiles.empty()) {
2040     llvm::errs() << "Configuration file(s) do(es) not support "
2041                  << getLanguageName(Style.Language) << ": "
2042                  << UnsuitableConfigFiles << "\n";
2043   }
2044   return Style;
2045 }
2046 
2047 } // namespace format
2048 } // namespace clang
2049