1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "ContinuationIndenter.h"
17 #include "TokenAnnotator.h"
18 #include "UnwrappedLineParser.h"
19 #include "WhitespaceManager.h"
20 #include "clang/Basic/Diagnostic.h"
21 #include "clang/Basic/DiagnosticOptions.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Format/Format.h"
24 #include "clang/Lex/Lexer.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/YAMLTraits.h"
30 #include <queue>
31 #include <string>
32 
33 #define DEBUG_TYPE "format-formatter"
34 
35 using clang::format::FormatStyle;
36 
37 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
38 
39 namespace llvm {
40 namespace yaml {
41 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
42   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
43     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
44     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
45     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
46   }
47 };
48 
49 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
50   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
51     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
52     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
53     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
54     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
55     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
56   }
57 };
58 
59 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
60   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
61     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
62     IO.enumCase(Value, "false", FormatStyle::UT_Never);
63     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
64     IO.enumCase(Value, "true", FormatStyle::UT_Always);
65     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
66   }
67 };
68 
69 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
70   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
71     IO.enumCase(Value, "None", FormatStyle::SFS_None);
72     IO.enumCase(Value, "false", FormatStyle::SFS_None);
73     IO.enumCase(Value, "All", FormatStyle::SFS_All);
74     IO.enumCase(Value, "true", FormatStyle::SFS_All);
75     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
76   }
77 };
78 
79 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
80   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
81     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
82     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
83     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
84     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
85     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
86   }
87 };
88 
89 template <>
90 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
91   static void enumeration(IO &IO,
92                           FormatStyle::NamespaceIndentationKind &Value) {
93     IO.enumCase(Value, "None", FormatStyle::NI_None);
94     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
95     IO.enumCase(Value, "All", FormatStyle::NI_All);
96   }
97 };
98 
99 template <>
100 struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
101   static void enumeration(IO &IO,
102                           FormatStyle::PointerAlignmentStyle &Value) {
103     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
104     IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
105     IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
106 
107     // For backward compability.
108     IO.enumCase(Value, "true", FormatStyle::PAS_Left);
109     IO.enumCase(Value, "false", FormatStyle::PAS_Right);
110   }
111 };
112 
113 template <>
114 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
115   static void enumeration(IO &IO,
116                           FormatStyle::SpaceBeforeParensOptions &Value) {
117     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
118     IO.enumCase(Value, "ControlStatements",
119                 FormatStyle::SBPO_ControlStatements);
120     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
121 
122     // For backward compatibility.
123     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
124     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
125   }
126 };
127 
128 template <> struct MappingTraits<FormatStyle> {
129   static void mapping(IO &IO, FormatStyle &Style) {
130     // When reading, read the language first, we need it for getPredefinedStyle.
131     IO.mapOptional("Language", Style.Language);
132 
133     if (IO.outputting()) {
134       StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
135                                   "Mozilla", "WebKit", "GNU" };
136       ArrayRef<StringRef> Styles(StylesArray);
137       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
138         StringRef StyleName(Styles[i]);
139         FormatStyle PredefinedStyle;
140         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
141             Style == PredefinedStyle) {
142           IO.mapOptional("# BasedOnStyle", StyleName);
143           break;
144         }
145       }
146     } else {
147       StringRef BasedOnStyle;
148       IO.mapOptional("BasedOnStyle", BasedOnStyle);
149       if (!BasedOnStyle.empty()) {
150         FormatStyle::LanguageKind OldLanguage = Style.Language;
151         FormatStyle::LanguageKind Language =
152             ((FormatStyle *)IO.getContext())->Language;
153         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
154           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
155           return;
156         }
157         Style.Language = OldLanguage;
158       }
159     }
160 
161     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
162     IO.mapOptional("ConstructorInitializerIndentWidth",
163                    Style.ConstructorInitializerIndentWidth);
164     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
165     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
166     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
167                    Style.AllowAllParametersOfDeclarationOnNextLine);
168     IO.mapOptional("AllowShortBlocksOnASingleLine",
169                    Style.AllowShortBlocksOnASingleLine);
170     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
171                    Style.AllowShortIfStatementsOnASingleLine);
172     IO.mapOptional("AllowShortLoopsOnASingleLine",
173                    Style.AllowShortLoopsOnASingleLine);
174     IO.mapOptional("AllowShortFunctionsOnASingleLine",
175                    Style.AllowShortFunctionsOnASingleLine);
176     IO.mapOptional("AlwaysBreakTemplateDeclarations",
177                    Style.AlwaysBreakTemplateDeclarations);
178     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
179                    Style.AlwaysBreakBeforeMultilineStrings);
180     IO.mapOptional("BreakBeforeBinaryOperators",
181                    Style.BreakBeforeBinaryOperators);
182     IO.mapOptional("BreakBeforeTernaryOperators",
183                    Style.BreakBeforeTernaryOperators);
184     IO.mapOptional("BreakConstructorInitializersBeforeComma",
185                    Style.BreakConstructorInitializersBeforeComma);
186     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
187     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
188     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
189                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
190     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
191     IO.mapOptional("ExperimentalAutoDetectBinPacking",
192                    Style.ExperimentalAutoDetectBinPacking);
193     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
194     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
195     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
196                    Style.KeepEmptyLinesAtTheStartOfBlocks);
197     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
198     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
199     IO.mapOptional("ObjCSpaceBeforeProtocolList",
200                    Style.ObjCSpaceBeforeProtocolList);
201     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
202                    Style.PenaltyBreakBeforeFirstCallParameter);
203     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
204     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
205     IO.mapOptional("PenaltyBreakFirstLessLess",
206                    Style.PenaltyBreakFirstLessLess);
207     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
208     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
209                    Style.PenaltyReturnTypeOnItsOwnLine);
210     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
211     IO.mapOptional("SpacesBeforeTrailingComments",
212                    Style.SpacesBeforeTrailingComments);
213     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
214     IO.mapOptional("Standard", Style.Standard);
215     IO.mapOptional("IndentWidth", Style.IndentWidth);
216     IO.mapOptional("TabWidth", Style.TabWidth);
217     IO.mapOptional("UseTab", Style.UseTab);
218     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
219     IO.mapOptional("IndentFunctionDeclarationAfterType",
220                    Style.IndentFunctionDeclarationAfterType);
221     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
222     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
223     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
224     IO.mapOptional("SpacesInCStyleCastParentheses",
225                    Style.SpacesInCStyleCastParentheses);
226     IO.mapOptional("SpacesInContainerLiterals",
227                    Style.SpacesInContainerLiterals);
228     IO.mapOptional("SpaceBeforeAssignmentOperators",
229                    Style.SpaceBeforeAssignmentOperators);
230     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
231     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
232     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
233 
234     // For backward compatibility.
235     if (!IO.outputting()) {
236       IO.mapOptional("SpaceAfterControlStatementKeyword",
237                      Style.SpaceBeforeParens);
238       IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
239       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
240     }
241     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
242     IO.mapOptional("DisableFormat", Style.DisableFormat);
243   }
244 };
245 
246 // Allows to read vector<FormatStyle> while keeping default values.
247 // IO.getContext() should contain a pointer to the FormatStyle structure, that
248 // will be used to get default values for missing keys.
249 // If the first element has no Language specified, it will be treated as the
250 // default one for the following elements.
251 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
252   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
253     return Seq.size();
254   }
255   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
256                               size_t Index) {
257     if (Index >= Seq.size()) {
258       assert(Index == Seq.size());
259       FormatStyle Template;
260       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
261         Template = Seq[0];
262       } else {
263         Template = *((const FormatStyle *)IO.getContext());
264         Template.Language = FormatStyle::LK_None;
265       }
266       Seq.resize(Index + 1, Template);
267     }
268     return Seq[Index];
269   }
270 };
271 }
272 }
273 
274 namespace clang {
275 namespace format {
276 
277 const std::error_category &getParseCategory() {
278   static ParseErrorCategory C;
279   return C;
280 }
281 std::error_code make_error_code(ParseError e) {
282   return std::error_code(static_cast<int>(e), getParseCategory());
283 }
284 
285 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
286   return "clang-format.parse_error";
287 }
288 
289 std::string ParseErrorCategory::message(int EV) const {
290   switch (static_cast<ParseError>(EV)) {
291   case ParseError::Success:
292     return "Success";
293   case ParseError::Error:
294     return "Invalid argument";
295   case ParseError::Unsuitable:
296     return "Unsuitable";
297   }
298   llvm_unreachable("unexpected parse error");
299 }
300 
301 FormatStyle getLLVMStyle() {
302   FormatStyle LLVMStyle;
303   LLVMStyle.Language = FormatStyle::LK_Cpp;
304   LLVMStyle.AccessModifierOffset = -2;
305   LLVMStyle.AlignEscapedNewlinesLeft = false;
306   LLVMStyle.AlignTrailingComments = true;
307   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
308   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
309   LLVMStyle.AllowShortBlocksOnASingleLine = false;
310   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
311   LLVMStyle.AllowShortLoopsOnASingleLine = false;
312   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
313   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
314   LLVMStyle.BinPackParameters = true;
315   LLVMStyle.BreakBeforeBinaryOperators = false;
316   LLVMStyle.BreakBeforeTernaryOperators = true;
317   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
318   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
319   LLVMStyle.ColumnLimit = 80;
320   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
321   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
322   LLVMStyle.ConstructorInitializerIndentWidth = 4;
323   LLVMStyle.ContinuationIndentWidth = 4;
324   LLVMStyle.Cpp11BracedListStyle = true;
325   LLVMStyle.DerivePointerAlignment = false;
326   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
327   LLVMStyle.ForEachMacros.push_back("foreach");
328   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
329   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
330   LLVMStyle.IndentCaseLabels = false;
331   LLVMStyle.IndentFunctionDeclarationAfterType = false;
332   LLVMStyle.IndentWidth = 2;
333   LLVMStyle.TabWidth = 8;
334   LLVMStyle.MaxEmptyLinesToKeep = 1;
335   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
336   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
337   LLVMStyle.ObjCSpaceAfterProperty = false;
338   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
339   LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
340   LLVMStyle.SpacesBeforeTrailingComments = 1;
341   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
342   LLVMStyle.UseTab = FormatStyle::UT_Never;
343   LLVMStyle.SpacesInParentheses = false;
344   LLVMStyle.SpaceInEmptyParentheses = false;
345   LLVMStyle.SpacesInContainerLiterals = true;
346   LLVMStyle.SpacesInCStyleCastParentheses = false;
347   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
348   LLVMStyle.SpaceBeforeAssignmentOperators = true;
349   LLVMStyle.SpacesInAngles = false;
350 
351   LLVMStyle.PenaltyBreakComment = 300;
352   LLVMStyle.PenaltyBreakFirstLessLess = 120;
353   LLVMStyle.PenaltyBreakString = 1000;
354   LLVMStyle.PenaltyExcessCharacter = 1000000;
355   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
356   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
357 
358   LLVMStyle.DisableFormat = false;
359 
360   return LLVMStyle;
361 }
362 
363 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
364   FormatStyle GoogleStyle = getLLVMStyle();
365   GoogleStyle.Language = Language;
366 
367   GoogleStyle.AccessModifierOffset = -1;
368   GoogleStyle.AlignEscapedNewlinesLeft = true;
369   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
370   GoogleStyle.AllowShortLoopsOnASingleLine = true;
371   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
372   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
373   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
374   GoogleStyle.DerivePointerAlignment = true;
375   GoogleStyle.IndentCaseLabels = true;
376   GoogleStyle.IndentFunctionDeclarationAfterType = true;
377   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
378   GoogleStyle.ObjCSpaceAfterProperty = false;
379   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
380   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
381   GoogleStyle.SpacesBeforeTrailingComments = 2;
382   GoogleStyle.Standard = FormatStyle::LS_Auto;
383 
384   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
385   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
386 
387   if (Language == FormatStyle::LK_JavaScript) {
388     GoogleStyle.BreakBeforeTernaryOperators = false;
389     GoogleStyle.MaxEmptyLinesToKeep = 3;
390     GoogleStyle.SpacesInContainerLiterals = false;
391   } else if (Language == FormatStyle::LK_Proto) {
392     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
393     GoogleStyle.SpacesInContainerLiterals = false;
394   }
395 
396   return GoogleStyle;
397 }
398 
399 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
400   FormatStyle ChromiumStyle = getGoogleStyle(Language);
401   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
402   ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
403   ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
404   ChromiumStyle.AllowShortLoopsOnASingleLine = false;
405   ChromiumStyle.BinPackParameters = false;
406   ChromiumStyle.DerivePointerAlignment = false;
407   ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
408   return ChromiumStyle;
409 }
410 
411 FormatStyle getMozillaStyle() {
412   FormatStyle MozillaStyle = getLLVMStyle();
413   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
414   MozillaStyle.Cpp11BracedListStyle = false;
415   MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
416   MozillaStyle.DerivePointerAlignment = true;
417   MozillaStyle.IndentCaseLabels = true;
418   MozillaStyle.ObjCSpaceAfterProperty = true;
419   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
420   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
421   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
422   MozillaStyle.Standard = FormatStyle::LS_Cpp03;
423   return MozillaStyle;
424 }
425 
426 FormatStyle getWebKitStyle() {
427   FormatStyle Style = getLLVMStyle();
428   Style.AccessModifierOffset = -4;
429   Style.AlignTrailingComments = false;
430   Style.BreakBeforeBinaryOperators = true;
431   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
432   Style.BreakConstructorInitializersBeforeComma = true;
433   Style.Cpp11BracedListStyle = false;
434   Style.ColumnLimit = 0;
435   Style.IndentWidth = 4;
436   Style.NamespaceIndentation = FormatStyle::NI_Inner;
437   Style.ObjCSpaceAfterProperty = true;
438   Style.PointerAlignment = FormatStyle::PAS_Left;
439   Style.Standard = FormatStyle::LS_Cpp03;
440   return Style;
441 }
442 
443 FormatStyle getGNUStyle() {
444   FormatStyle Style = getLLVMStyle();
445   Style.BreakBeforeBinaryOperators = true;
446   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
447   Style.BreakBeforeTernaryOperators = true;
448   Style.Cpp11BracedListStyle = false;
449   Style.ColumnLimit = 79;
450   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
451   Style.Standard = FormatStyle::LS_Cpp03;
452   return Style;
453 }
454 
455 FormatStyle getNoStyle() {
456   FormatStyle NoStyle = getLLVMStyle();
457   NoStyle.DisableFormat = true;
458   return NoStyle;
459 }
460 
461 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
462                         FormatStyle *Style) {
463   if (Name.equals_lower("llvm")) {
464     *Style = getLLVMStyle();
465   } else if (Name.equals_lower("chromium")) {
466     *Style = getChromiumStyle(Language);
467   } else if (Name.equals_lower("mozilla")) {
468     *Style = getMozillaStyle();
469   } else if (Name.equals_lower("google")) {
470     *Style = getGoogleStyle(Language);
471   } else if (Name.equals_lower("webkit")) {
472     *Style = getWebKitStyle();
473   } else if (Name.equals_lower("gnu")) {
474     *Style = getGNUStyle();
475   } else if (Name.equals_lower("none")) {
476     *Style = getNoStyle();
477   } else {
478     return false;
479   }
480 
481   Style->Language = Language;
482   return true;
483 }
484 
485 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
486   assert(Style);
487   FormatStyle::LanguageKind Language = Style->Language;
488   assert(Language != FormatStyle::LK_None);
489   if (Text.trim().empty())
490     return make_error_code(ParseError::Error);
491 
492   std::vector<FormatStyle> Styles;
493   llvm::yaml::Input Input(Text);
494   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
495   // values for the fields, keys for which are missing from the configuration.
496   // Mapping also uses the context to get the language to find the correct
497   // base style.
498   Input.setContext(Style);
499   Input >> Styles;
500   if (Input.error())
501     return Input.error();
502 
503   for (unsigned i = 0; i < Styles.size(); ++i) {
504     // Ensures that only the first configuration can skip the Language option.
505     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
506       return make_error_code(ParseError::Error);
507     // Ensure that each language is configured at most once.
508     for (unsigned j = 0; j < i; ++j) {
509       if (Styles[i].Language == Styles[j].Language) {
510         DEBUG(llvm::dbgs()
511               << "Duplicate languages in the config file on positions " << j
512               << " and " << i << "\n");
513         return make_error_code(ParseError::Error);
514       }
515     }
516   }
517   // Look for a suitable configuration starting from the end, so we can
518   // find the configuration for the specific language first, and the default
519   // configuration (which can only be at slot 0) after it.
520   for (int i = Styles.size() - 1; i >= 0; --i) {
521     if (Styles[i].Language == Language ||
522         Styles[i].Language == FormatStyle::LK_None) {
523       *Style = Styles[i];
524       Style->Language = Language;
525       return make_error_code(ParseError::Success);
526     }
527   }
528   return make_error_code(ParseError::Unsuitable);
529 }
530 
531 std::string configurationAsText(const FormatStyle &Style) {
532   std::string Text;
533   llvm::raw_string_ostream Stream(Text);
534   llvm::yaml::Output Output(Stream);
535   // We use the same mapping method for input and output, so we need a non-const
536   // reference here.
537   FormatStyle NonConstStyle = Style;
538   Output << NonConstStyle;
539   return Stream.str();
540 }
541 
542 namespace {
543 
544 class NoColumnLimitFormatter {
545 public:
546   NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
547 
548   /// \brief Formats the line starting at \p State, simply keeping all of the
549   /// input's line breaking decisions.
550   void format(unsigned FirstIndent, const AnnotatedLine *Line) {
551     LineState State =
552         Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
553     while (State.NextToken) {
554       bool Newline =
555           Indenter->mustBreak(State) ||
556           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
557       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
558     }
559   }
560 
561 private:
562   ContinuationIndenter *Indenter;
563 };
564 
565 class LineJoiner {
566 public:
567   LineJoiner(const FormatStyle &Style) : Style(Style) {}
568 
569   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
570   unsigned
571   tryFitMultipleLinesInOne(unsigned Indent,
572                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
573                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
574     // We can never merge stuff if there are trailing line comments.
575     const AnnotatedLine *TheLine = *I;
576     if (TheLine->Last->Type == TT_LineComment)
577       return 0;
578 
579     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
580       return 0;
581 
582     unsigned Limit =
583         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
584     // If we already exceed the column limit, we set 'Limit' to 0. The different
585     // tryMerge..() functions can then decide whether to still do merging.
586     Limit = TheLine->Last->TotalLength > Limit
587                 ? 0
588                 : Limit - TheLine->Last->TotalLength;
589 
590     if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
591       return 0;
592 
593     // FIXME: TheLine->Level != 0 might or might not be the right check to do.
594     // If necessary, change to something smarter.
595     bool MergeShortFunctions =
596         Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
597         (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
598          TheLine->Level != 0);
599 
600     if (TheLine->Last->Type == TT_FunctionLBrace &&
601         TheLine->First != TheLine->Last) {
602       return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
603     }
604     if (TheLine->Last->is(tok::l_brace)) {
605       return Style.BreakBeforeBraces == FormatStyle::BS_Attach
606                  ? tryMergeSimpleBlock(I, E, Limit)
607                  : 0;
608     }
609     if (I[1]->First->Type == TT_FunctionLBrace &&
610         Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
611       // Check for Limit <= 2 to account for the " {".
612       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
613         return 0;
614       Limit -= 2;
615 
616       unsigned MergedLines = 0;
617       if (MergeShortFunctions) {
618         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
619         // If we managed to merge the block, count the function header, which is
620         // on a separate line.
621         if (MergedLines > 0)
622           ++MergedLines;
623       }
624       return MergedLines;
625     }
626     if (TheLine->First->is(tok::kw_if)) {
627       return Style.AllowShortIfStatementsOnASingleLine
628                  ? tryMergeSimpleControlStatement(I, E, Limit)
629                  : 0;
630     }
631     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
632       return Style.AllowShortLoopsOnASingleLine
633                  ? tryMergeSimpleControlStatement(I, E, Limit)
634                  : 0;
635     }
636     if (TheLine->InPPDirective &&
637         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
638       return tryMergeSimplePPDirective(I, E, Limit);
639     }
640     return 0;
641   }
642 
643 private:
644   unsigned
645   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
646                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
647                             unsigned Limit) {
648     if (Limit == 0)
649       return 0;
650     if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
651       return 0;
652     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
653       return 0;
654     if (1 + I[1]->Last->TotalLength > Limit)
655       return 0;
656     return 1;
657   }
658 
659   unsigned tryMergeSimpleControlStatement(
660       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
661       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
662     if (Limit == 0)
663       return 0;
664     if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
665          Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
666         (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
667       return 0;
668     if (I[1]->InPPDirective != (*I)->InPPDirective ||
669         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
670       return 0;
671     Limit = limitConsideringMacros(I + 1, E, Limit);
672     AnnotatedLine &Line = **I;
673     if (Line.Last->isNot(tok::r_paren))
674       return 0;
675     if (1 + I[1]->Last->TotalLength > Limit)
676       return 0;
677     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
678                              tok::kw_while) ||
679         I[1]->First->Type == TT_LineComment)
680       return 0;
681     // Only inline simple if's (no nested if or else).
682     if (I + 2 != E && Line.First->is(tok::kw_if) &&
683         I[2]->First->is(tok::kw_else))
684       return 0;
685     return 1;
686   }
687 
688   unsigned
689   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
690                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
691                       unsigned Limit) {
692     AnnotatedLine &Line = **I;
693 
694     // Don't merge ObjC @ keywords and methods.
695     if (Line.First->isOneOf(tok::at, tok::minus, tok::plus))
696       return 0;
697 
698     // Check that the current line allows merging. This depends on whether we
699     // are in a control flow statements as well as several style flags.
700     if (Line.First->isOneOf(tok::kw_else, tok::kw_case))
701       return 0;
702     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
703                             tok::kw_catch, tok::kw_for, tok::r_brace)) {
704       if (!Style.AllowShortBlocksOnASingleLine)
705         return 0;
706       if (!Style.AllowShortIfStatementsOnASingleLine &&
707           Line.First->is(tok::kw_if))
708         return 0;
709       if (!Style.AllowShortLoopsOnASingleLine &&
710           Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
711         return 0;
712       // FIXME: Consider an option to allow short exception handling clauses on
713       // a single line.
714       if (Line.First->isOneOf(tok::kw_try, tok::kw_catch))
715         return 0;
716     }
717 
718     FormatToken *Tok = I[1]->First;
719     if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
720         (Tok->getNextNonComment() == nullptr ||
721          Tok->getNextNonComment()->is(tok::semi))) {
722       // We merge empty blocks even if the line exceeds the column limit.
723       Tok->SpacesRequiredBefore = 0;
724       Tok->CanBreakBefore = true;
725       return 1;
726     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
727       // We don't merge short records.
728       if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct))
729         return 0;
730 
731       // Check that we still have three lines and they fit into the limit.
732       if (I + 2 == E || I[2]->Type == LT_Invalid)
733         return 0;
734       Limit = limitConsideringMacros(I + 2, E, Limit);
735 
736       if (!nextTwoLinesFitInto(I, Limit))
737         return 0;
738 
739       // Second, check that the next line does not contain any braces - if it
740       // does, readability declines when putting it into a single line.
741       if (I[1]->Last->Type == TT_LineComment)
742         return 0;
743       do {
744         if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
745           return 0;
746         Tok = Tok->Next;
747       } while (Tok);
748 
749       // Last, check that the third line starts with a closing brace.
750       Tok = I[2]->First;
751       if (Tok->isNot(tok::r_brace))
752         return 0;
753 
754       return 2;
755     }
756     return 0;
757   }
758 
759   /// Returns the modified column limit for \p I if it is inside a macro and
760   /// needs a trailing '\'.
761   unsigned
762   limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
763                          SmallVectorImpl<AnnotatedLine *>::const_iterator E,
764                          unsigned Limit) {
765     if (I[0]->InPPDirective && I + 1 != E &&
766         !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
767       return Limit < 2 ? 0 : Limit - 2;
768     }
769     return Limit;
770   }
771 
772   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
773                            unsigned Limit) {
774     if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
775       return false;
776     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
777   }
778 
779   bool containsMustBreak(const AnnotatedLine *Line) {
780     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
781       if (Tok->MustBreakBefore)
782         return true;
783     }
784     return false;
785   }
786 
787   const FormatStyle &Style;
788 };
789 
790 class UnwrappedLineFormatter {
791 public:
792   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
793                          WhitespaceManager *Whitespaces,
794                          const FormatStyle &Style)
795       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
796         Joiner(Style) {}
797 
798   unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
799                   int AdditionalIndent = 0, bool FixBadIndentation = false) {
800     // Try to look up already computed penalty in DryRun-mode.
801     std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
802         &Lines, AdditionalIndent);
803     auto CacheIt = PenaltyCache.find(CacheKey);
804     if (DryRun && CacheIt != PenaltyCache.end())
805       return CacheIt->second;
806 
807     assert(!Lines.empty());
808     unsigned Penalty = 0;
809     std::vector<int> IndentForLevel;
810     for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
811       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
812     const AnnotatedLine *PreviousLine = nullptr;
813     for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
814                                                           E = Lines.end();
815          I != E; ++I) {
816       const AnnotatedLine &TheLine = **I;
817       const FormatToken *FirstTok = TheLine.First;
818       int Offset = getIndentOffset(*FirstTok);
819 
820       // Determine indent and try to merge multiple unwrapped lines.
821       unsigned Indent;
822       if (TheLine.InPPDirective) {
823         Indent = TheLine.Level * Style.IndentWidth;
824       } else {
825         while (IndentForLevel.size() <= TheLine.Level)
826           IndentForLevel.push_back(-1);
827         IndentForLevel.resize(TheLine.Level + 1);
828         Indent = getIndent(IndentForLevel, TheLine.Level);
829       }
830       unsigned LevelIndent = Indent;
831       if (static_cast<int>(Indent) + Offset >= 0)
832         Indent += Offset;
833 
834       // Merge multiple lines if possible.
835       unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
836       if (MergedLines > 0 && Style.ColumnLimit == 0) {
837         // Disallow line merging if there is a break at the start of one of the
838         // input lines.
839         for (unsigned i = 0; i < MergedLines; ++i) {
840           if (I[i + 1]->First->NewlinesBefore > 0)
841             MergedLines = 0;
842         }
843       }
844       if (!DryRun) {
845         for (unsigned i = 0; i < MergedLines; ++i) {
846           join(*I[i], *I[i + 1]);
847         }
848       }
849       I += MergedLines;
850 
851       bool FixIndentation =
852           FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
853       if (TheLine.First->is(tok::eof)) {
854         if (PreviousLine && PreviousLine->Affected && !DryRun) {
855           // Remove the file's trailing whitespace.
856           unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
857           Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
858                                          /*IndentLevel=*/0, /*Spaces=*/0,
859                                          /*TargetColumn=*/0);
860         }
861       } else if (TheLine.Type != LT_Invalid &&
862                  (TheLine.Affected || FixIndentation)) {
863         if (FirstTok->WhitespaceRange.isValid()) {
864           if (!DryRun)
865             formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
866                              Indent, TheLine.InPPDirective);
867         } else {
868           Indent = LevelIndent = FirstTok->OriginalColumn;
869         }
870 
871         // If everything fits on a single line, just put it there.
872         unsigned ColumnLimit = Style.ColumnLimit;
873         if (I + 1 != E) {
874           AnnotatedLine *NextLine = I[1];
875           if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
876             ColumnLimit = getColumnLimit(TheLine.InPPDirective);
877         }
878 
879         if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
880           LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
881           while (State.NextToken) {
882             formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty);
883             Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
884           }
885         } else if (Style.ColumnLimit == 0) {
886           // FIXME: Implement nested blocks for ColumnLimit = 0.
887           NoColumnLimitFormatter Formatter(Indenter);
888           if (!DryRun)
889             Formatter.format(Indent, &TheLine);
890         } else {
891           Penalty += format(TheLine, Indent, DryRun);
892         }
893 
894         if (!TheLine.InPPDirective)
895           IndentForLevel[TheLine.Level] = LevelIndent;
896       } else if (TheLine.ChildrenAffected) {
897         format(TheLine.Children, DryRun);
898       } else {
899         // Format the first token if necessary, and notify the WhitespaceManager
900         // about the unchanged whitespace.
901         for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) {
902           if (Tok == TheLine.First &&
903               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
904             unsigned LevelIndent = Tok->OriginalColumn;
905             if (!DryRun) {
906               // Remove trailing whitespace of the previous line.
907               if ((PreviousLine && PreviousLine->Affected) ||
908                   TheLine.LeadingEmptyLinesAffected) {
909                 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
910                                  TheLine.InPPDirective);
911               } else {
912                 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
913               }
914             }
915 
916             if (static_cast<int>(LevelIndent) - Offset >= 0)
917               LevelIndent -= Offset;
918             if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
919               IndentForLevel[TheLine.Level] = LevelIndent;
920           } else if (!DryRun) {
921             Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
922           }
923         }
924       }
925       if (!DryRun) {
926         for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) {
927           Tok->Finalized = true;
928         }
929       }
930       PreviousLine = *I;
931     }
932     PenaltyCache[CacheKey] = Penalty;
933     return Penalty;
934   }
935 
936 private:
937   /// \brief Formats an \c AnnotatedLine and returns the penalty.
938   ///
939   /// If \p DryRun is \c false, directly applies the changes.
940   unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
941                   bool DryRun) {
942     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
943 
944     // If the ObjC method declaration does not fit on a line, we should format
945     // it with one arg per line.
946     if (State.Line->Type == LT_ObjCMethodDecl)
947       State.Stack.back().BreakBeforeParameter = true;
948 
949     // Find best solution in solution space.
950     return analyzeSolutionSpace(State, DryRun);
951   }
952 
953   /// \brief An edge in the solution space from \c Previous->State to \c State,
954   /// inserting a newline dependent on the \c NewLine.
955   struct StateNode {
956     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
957         : State(State), NewLine(NewLine), Previous(Previous) {}
958     LineState State;
959     bool NewLine;
960     StateNode *Previous;
961   };
962 
963   /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
964   ///
965   /// In case of equal penalties, we want to prefer states that were inserted
966   /// first. During state generation we make sure that we insert states first
967   /// that break the line as late as possible.
968   typedef std::pair<unsigned, unsigned> OrderedPenalty;
969 
970   /// \brief An item in the prioritized BFS search queue. The \c StateNode's
971   /// \c State has the given \c OrderedPenalty.
972   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
973 
974   /// \brief The BFS queue type.
975   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
976                               std::greater<QueueItem> > QueueType;
977 
978   /// \brief Get the offset of the line relatively to the level.
979   ///
980   /// For example, 'public:' labels in classes are offset by 1 or 2
981   /// characters to the left from their level.
982   int getIndentOffset(const FormatToken &RootToken) {
983     if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
984       return Style.AccessModifierOffset;
985     return 0;
986   }
987 
988   /// \brief Add a new line and the required indent before the first Token
989   /// of the \c UnwrappedLine if there was no structural parsing error.
990   void formatFirstToken(FormatToken &RootToken,
991                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
992                         unsigned Indent, bool InPPDirective) {
993     unsigned Newlines =
994         std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
995     // Remove empty lines before "}" where applicable.
996     if (RootToken.is(tok::r_brace) &&
997         (!RootToken.Next ||
998          (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
999       Newlines = std::min(Newlines, 1u);
1000     if (Newlines == 0 && !RootToken.IsFirst)
1001       Newlines = 1;
1002     if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
1003       Newlines = 0;
1004 
1005     // Remove empty lines after "{".
1006     if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
1007         PreviousLine->Last->is(tok::l_brace) &&
1008         PreviousLine->First->isNot(tok::kw_namespace))
1009       Newlines = 1;
1010 
1011     // Insert extra new line before access specifiers.
1012     if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
1013         RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
1014       ++Newlines;
1015 
1016     // Remove empty lines after access specifiers.
1017     if (PreviousLine && PreviousLine->First->isAccessSpecifier())
1018       Newlines = std::min(1u, Newlines);
1019 
1020     Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
1021                                    Indent, InPPDirective &&
1022                                                !RootToken.HasUnescapedNewline);
1023   }
1024 
1025   /// \brief Get the indent of \p Level from \p IndentForLevel.
1026   ///
1027   /// \p IndentForLevel must contain the indent for the level \c l
1028   /// at \p IndentForLevel[l], or a value < 0 if the indent for
1029   /// that level is unknown.
1030   unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
1031     if (IndentForLevel[Level] != -1)
1032       return IndentForLevel[Level];
1033     if (Level == 0)
1034       return 0;
1035     return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
1036   }
1037 
1038   void join(AnnotatedLine &A, const AnnotatedLine &B) {
1039     assert(!A.Last->Next);
1040     assert(!B.First->Previous);
1041     if (B.Affected)
1042       A.Affected = true;
1043     A.Last->Next = B.First;
1044     B.First->Previous = A.Last;
1045     B.First->CanBreakBefore = true;
1046     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
1047     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
1048       Tok->TotalLength += LengthA;
1049       A.Last = Tok;
1050     }
1051   }
1052 
1053   unsigned getColumnLimit(bool InPPDirective) const {
1054     // In preprocessor directives reserve two chars for trailing " \"
1055     return Style.ColumnLimit - (InPPDirective ? 2 : 0);
1056   }
1057 
1058   struct CompareLineStatePointers {
1059     bool operator()(LineState *obj1, LineState *obj2) const {
1060       return *obj1 < *obj2;
1061     }
1062   };
1063 
1064   /// \brief Analyze the entire solution space starting from \p InitialState.
1065   ///
1066   /// This implements a variant of Dijkstra's algorithm on the graph that spans
1067   /// the solution space (\c LineStates are the nodes). The algorithm tries to
1068   /// find the shortest path (the one with lowest penalty) from \p InitialState
1069   /// to a state where all tokens are placed. Returns the penalty.
1070   ///
1071   /// If \p DryRun is \c false, directly applies the changes.
1072   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
1073     std::set<LineState *, CompareLineStatePointers> Seen;
1074 
1075     // Increasing count of \c StateNode items we have created. This is used to
1076     // create a deterministic order independent of the container.
1077     unsigned Count = 0;
1078     QueueType Queue;
1079 
1080     // Insert start element into queue.
1081     StateNode *Node =
1082         new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
1083     Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
1084     ++Count;
1085 
1086     unsigned Penalty = 0;
1087 
1088     // While not empty, take first element and follow edges.
1089     while (!Queue.empty()) {
1090       Penalty = Queue.top().first.first;
1091       StateNode *Node = Queue.top().second;
1092       if (!Node->State.NextToken) {
1093         DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
1094         break;
1095       }
1096       Queue.pop();
1097 
1098       // Cut off the analysis of certain solutions if the analysis gets too
1099       // complex. See description of IgnoreStackForComparison.
1100       if (Count > 10000)
1101         Node->State.IgnoreStackForComparison = true;
1102 
1103       if (!Seen.insert(&Node->State).second)
1104         // State already examined with lower penalty.
1105         continue;
1106 
1107       FormatDecision LastFormat = Node->State.NextToken->Decision;
1108       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
1109         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
1110       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
1111         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
1112     }
1113 
1114     if (Queue.empty()) {
1115       // We were unable to find a solution, do nothing.
1116       // FIXME: Add diagnostic?
1117       DEBUG(llvm::dbgs() << "Could not find a solution.\n");
1118       return 0;
1119     }
1120 
1121     // Reconstruct the solution.
1122     if (!DryRun)
1123       reconstructPath(InitialState, Queue.top().second);
1124 
1125     DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
1126     DEBUG(llvm::dbgs() << "---\n");
1127 
1128     return Penalty;
1129   }
1130 
1131   void reconstructPath(LineState &State, StateNode *Current) {
1132     std::deque<StateNode *> Path;
1133     // We do not need a break before the initial token.
1134     while (Current->Previous) {
1135       Path.push_front(Current);
1136       Current = Current->Previous;
1137     }
1138     for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
1139          I != E; ++I) {
1140       unsigned Penalty = 0;
1141       formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
1142       Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
1143 
1144       DEBUG({
1145         if ((*I)->NewLine) {
1146           llvm::dbgs() << "Penalty for placing "
1147                        << (*I)->Previous->State.NextToken->Tok.getName() << ": "
1148                        << Penalty << "\n";
1149         }
1150       });
1151     }
1152   }
1153 
1154   /// \brief Add the following state to the analysis queue \c Queue.
1155   ///
1156   /// Assume the current state is \p PreviousNode and has been reached with a
1157   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1158   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1159                            bool NewLine, unsigned *Count, QueueType *Queue) {
1160     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1161       return;
1162     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1163       return;
1164 
1165     StateNode *Node = new (Allocator.Allocate())
1166         StateNode(PreviousNode->State, NewLine, PreviousNode);
1167     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1168       return;
1169 
1170     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1171 
1172     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1173     ++(*Count);
1174   }
1175 
1176   /// \brief If the \p State's next token is an r_brace closing a nested block,
1177   /// format the nested block before it.
1178   ///
1179   /// Returns \c true if all children could be placed successfully and adapts
1180   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
1181   /// creates changes using \c Whitespaces.
1182   ///
1183   /// The crucial idea here is that children always get formatted upon
1184   /// encountering the closing brace right after the nested block. Now, if we
1185   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
1186   /// \c false), the entire block has to be kept on the same line (which is only
1187   /// possible if it fits on the line, only contains a single statement, etc.
1188   ///
1189   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
1190   /// break after the "{", format all lines with correct indentation and the put
1191   /// the closing "}" on yet another new line.
1192   ///
1193   /// This enables us to keep the simple structure of the
1194   /// \c UnwrappedLineFormatter, where we only have two options for each token:
1195   /// break or don't break.
1196   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
1197                       unsigned &Penalty) {
1198     FormatToken &Previous = *State.NextToken->Previous;
1199     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
1200     if (!LBrace || LBrace->isNot(tok::l_brace) ||
1201         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
1202       // The previous token does not open a block. Nothing to do. We don't
1203       // assert so that we can simply call this function for all tokens.
1204       return true;
1205 
1206     if (NewLine) {
1207       int AdditionalIndent =
1208           State.FirstIndent - State.Line->Level * Style.IndentWidth;
1209       if (State.Stack.size() < 2 ||
1210           !State.Stack[State.Stack.size() - 2].JSFunctionInlined) {
1211         AdditionalIndent = State.Stack.back().Indent -
1212                            Previous.Children[0]->Level * Style.IndentWidth;
1213       }
1214 
1215       Penalty += format(Previous.Children, DryRun, AdditionalIndent,
1216                         /*FixBadIndentation=*/true);
1217       return true;
1218     }
1219 
1220     // Cannot merge multiple statements into a single line.
1221     if (Previous.Children.size() > 1)
1222       return false;
1223 
1224     // Cannot merge into one line if this line ends on a comment.
1225     if (Previous.is(tok::comment))
1226       return false;
1227 
1228     // We can't put the closing "}" on a line with a trailing comment.
1229     if (Previous.Children[0]->Last->isTrailingComment())
1230       return false;
1231 
1232     // If the child line exceeds the column limit, we wouldn't want to merge it.
1233     // We add +2 for the trailing " }".
1234     if (Style.ColumnLimit > 0 &&
1235         Previous.Children[0]->Last->TotalLength + State.Column + 2 >
1236             Style.ColumnLimit)
1237       return false;
1238 
1239     if (!DryRun) {
1240       Whitespaces->replaceWhitespace(
1241           *Previous.Children[0]->First,
1242           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
1243           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
1244     }
1245     Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
1246 
1247     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
1248     return true;
1249   }
1250 
1251   ContinuationIndenter *Indenter;
1252   WhitespaceManager *Whitespaces;
1253   FormatStyle Style;
1254   LineJoiner Joiner;
1255 
1256   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1257 
1258   // Cache to store the penalty of formatting a vector of AnnotatedLines
1259   // starting from a specific additional offset. Improves performance if there
1260   // are many nested blocks.
1261   std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>,
1262            unsigned> PenaltyCache;
1263 };
1264 
1265 class FormatTokenLexer {
1266 public:
1267   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
1268                    encoding::Encoding Encoding)
1269       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
1270         Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr),
1271         Style(Style), IdentTable(getFormattingLangOpts()), Encoding(Encoding),
1272         FirstInLineIndex(0) {
1273     Lex.SetKeepWhitespaceMode(true);
1274 
1275     for (const std::string &ForEachMacro : Style.ForEachMacros)
1276       ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
1277     std::sort(ForEachMacros.begin(), ForEachMacros.end());
1278   }
1279 
1280   ArrayRef<FormatToken *> lex() {
1281     assert(Tokens.empty());
1282     assert(FirstInLineIndex == 0);
1283     do {
1284       Tokens.push_back(getNextToken());
1285       tryMergePreviousTokens();
1286       if (Tokens.back()->NewlinesBefore > 0)
1287         FirstInLineIndex = Tokens.size() - 1;
1288     } while (Tokens.back()->Tok.isNot(tok::eof));
1289     return Tokens;
1290   }
1291 
1292   IdentifierTable &getIdentTable() { return IdentTable; }
1293 
1294 private:
1295   void tryMergePreviousTokens() {
1296     if (tryMerge_TMacro())
1297       return;
1298     if (tryMergeConflictMarkers())
1299       return;
1300 
1301     if (Style.Language == FormatStyle::LK_JavaScript) {
1302       if (tryMergeEscapeSequence())
1303         return;
1304       if (tryMergeJSRegexLiteral())
1305         return;
1306 
1307       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
1308       static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
1309       static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
1310                                                tok::greaterequal };
1311       static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater };
1312       // FIXME: We probably need to change token type to mimic operator with the
1313       // correct priority.
1314       if (tryMergeTokens(JSIdentity))
1315         return;
1316       if (tryMergeTokens(JSNotIdentity))
1317         return;
1318       if (tryMergeTokens(JSShiftEqual))
1319         return;
1320       if (tryMergeTokens(JSRightArrow))
1321         return;
1322     }
1323   }
1324 
1325   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
1326     if (Tokens.size() < Kinds.size())
1327       return false;
1328 
1329     SmallVectorImpl<FormatToken *>::const_iterator First =
1330         Tokens.end() - Kinds.size();
1331     if (!First[0]->is(Kinds[0]))
1332       return false;
1333     unsigned AddLength = 0;
1334     for (unsigned i = 1; i < Kinds.size(); ++i) {
1335       if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
1336                                          First[i]->WhitespaceRange.getEnd())
1337         return false;
1338       AddLength += First[i]->TokenText.size();
1339     }
1340     Tokens.resize(Tokens.size() - Kinds.size() + 1);
1341     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
1342                                     First[0]->TokenText.size() + AddLength);
1343     First[0]->ColumnWidth += AddLength;
1344     return true;
1345   }
1346 
1347   // Tries to merge an escape sequence, i.e. a "\\" and the following
1348   // character. Use e.g. inside JavaScript regex literals.
1349   bool tryMergeEscapeSequence() {
1350     if (Tokens.size() < 2)
1351       return false;
1352     FormatToken *Previous = Tokens[Tokens.size() - 2];
1353     if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\" ||
1354         Tokens.back()->NewlinesBefore != 0)
1355       return false;
1356     Previous->ColumnWidth += Tokens.back()->ColumnWidth;
1357     StringRef Text = Previous->TokenText;
1358     Previous->TokenText =
1359         StringRef(Text.data(), Text.size() + Tokens.back()->TokenText.size());
1360     Tokens.resize(Tokens.size() - 1);
1361     return true;
1362   }
1363 
1364   // Try to determine whether the current token ends a JavaScript regex literal.
1365   // We heuristically assume that this is a regex literal if we find two
1366   // unescaped slashes on a line and the token before the first slash is one of
1367   // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
1368   // a division.
1369   bool tryMergeJSRegexLiteral() {
1370     if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) ||
1371         (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
1372          Tokens[Tokens.size() - 2]->TokenText == "\\"))
1373       return false;
1374     unsigned TokenCount = 0;
1375     unsigned LastColumn = Tokens.back()->OriginalColumn;
1376     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
1377       ++TokenCount;
1378       if (I[0]->is(tok::slash) && I + 1 != E &&
1379           (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
1380                          tok::exclaim, tok::l_square, tok::colon, tok::comma,
1381                          tok::question, tok::kw_return) ||
1382            I[1]->isBinaryOperator())) {
1383         Tokens.resize(Tokens.size() - TokenCount);
1384         Tokens.back()->Tok.setKind(tok::unknown);
1385         Tokens.back()->Type = TT_RegexLiteral;
1386         Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
1387         return true;
1388       }
1389 
1390       // There can't be a newline inside a regex literal.
1391       if (I[0]->NewlinesBefore > 0)
1392         return false;
1393     }
1394     return false;
1395   }
1396 
1397   bool tryMerge_TMacro() {
1398     if (Tokens.size() < 4)
1399       return false;
1400     FormatToken *Last = Tokens.back();
1401     if (!Last->is(tok::r_paren))
1402       return false;
1403 
1404     FormatToken *String = Tokens[Tokens.size() - 2];
1405     if (!String->is(tok::string_literal) || String->IsMultiline)
1406       return false;
1407 
1408     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1409       return false;
1410 
1411     FormatToken *Macro = Tokens[Tokens.size() - 4];
1412     if (Macro->TokenText != "_T")
1413       return false;
1414 
1415     const char *Start = Macro->TokenText.data();
1416     const char *End = Last->TokenText.data() + Last->TokenText.size();
1417     String->TokenText = StringRef(Start, End - Start);
1418     String->IsFirst = Macro->IsFirst;
1419     String->LastNewlineOffset = Macro->LastNewlineOffset;
1420     String->WhitespaceRange = Macro->WhitespaceRange;
1421     String->OriginalColumn = Macro->OriginalColumn;
1422     String->ColumnWidth = encoding::columnWidthWithTabs(
1423         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1424 
1425     Tokens.pop_back();
1426     Tokens.pop_back();
1427     Tokens.pop_back();
1428     Tokens.back() = String;
1429     return true;
1430   }
1431 
1432   bool tryMergeConflictMarkers() {
1433     if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1434       return false;
1435 
1436     // Conflict lines look like:
1437     // <marker> <text from the vcs>
1438     // For example:
1439     // >>>>>>> /file/in/file/system at revision 1234
1440     //
1441     // We merge all tokens in a line that starts with a conflict marker
1442     // into a single token with a special token type that the unwrapped line
1443     // parser will use to correctly rebuild the underlying code.
1444 
1445     FileID ID;
1446     // Get the position of the first token in the line.
1447     unsigned FirstInLineOffset;
1448     std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
1449         Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1450     StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
1451     // Calculate the offset of the start of the current line.
1452     auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
1453     if (LineOffset == StringRef::npos) {
1454       LineOffset = 0;
1455     } else {
1456       ++LineOffset;
1457     }
1458 
1459     auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
1460     StringRef LineStart;
1461     if (FirstSpace == StringRef::npos) {
1462       LineStart = Buffer.substr(LineOffset);
1463     } else {
1464       LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1465     }
1466 
1467     TokenType Type = TT_Unknown;
1468     if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
1469       Type = TT_ConflictStart;
1470     } else if (LineStart == "|||||||" || LineStart == "=======" ||
1471                LineStart == "====") {
1472       Type = TT_ConflictAlternative;
1473     } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1474       Type = TT_ConflictEnd;
1475     }
1476 
1477     if (Type != TT_Unknown) {
1478       FormatToken *Next = Tokens.back();
1479 
1480       Tokens.resize(FirstInLineIndex + 1);
1481       // We do not need to build a complete token here, as we will skip it
1482       // during parsing anyway (as we must not touch whitespace around conflict
1483       // markers).
1484       Tokens.back()->Type = Type;
1485       Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1486 
1487       Tokens.push_back(Next);
1488       return true;
1489     }
1490 
1491     return false;
1492   }
1493 
1494   FormatToken *getNextToken() {
1495     if (GreaterStashed) {
1496       // Create a synthesized second '>' token.
1497       // FIXME: Increment Column and set OriginalColumn.
1498       Token Greater = FormatTok->Tok;
1499       FormatTok = new (Allocator.Allocate()) FormatToken;
1500       FormatTok->Tok = Greater;
1501       SourceLocation GreaterLocation =
1502           FormatTok->Tok.getLocation().getLocWithOffset(1);
1503       FormatTok->WhitespaceRange =
1504           SourceRange(GreaterLocation, GreaterLocation);
1505       FormatTok->TokenText = ">";
1506       FormatTok->ColumnWidth = 1;
1507       GreaterStashed = false;
1508       return FormatTok;
1509     }
1510 
1511     FormatTok = new (Allocator.Allocate()) FormatToken;
1512     readRawToken(*FormatTok);
1513     SourceLocation WhitespaceStart =
1514         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1515     FormatTok->IsFirst = IsFirstToken;
1516     IsFirstToken = false;
1517 
1518     // Consume and record whitespace until we find a significant token.
1519     unsigned WhitespaceLength = TrailingWhitespace;
1520     while (FormatTok->Tok.is(tok::unknown)) {
1521       for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1522         switch (FormatTok->TokenText[i]) {
1523         case '\n':
1524           ++FormatTok->NewlinesBefore;
1525           // FIXME: This is technically incorrect, as it could also
1526           // be a literal backslash at the end of the line.
1527           if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1528                          (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1529                           FormatTok->TokenText[i - 2] != '\\')))
1530             FormatTok->HasUnescapedNewline = true;
1531           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1532           Column = 0;
1533           break;
1534         case '\r':
1535         case '\f':
1536         case '\v':
1537           Column = 0;
1538           break;
1539         case ' ':
1540           ++Column;
1541           break;
1542         case '\t':
1543           Column += Style.TabWidth - Column % Style.TabWidth;
1544           break;
1545         case '\\':
1546           ++Column;
1547           if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1548                              FormatTok->TokenText[i + 1] != '\n'))
1549             FormatTok->Type = TT_ImplicitStringLiteral;
1550           break;
1551         default:
1552           FormatTok->Type = TT_ImplicitStringLiteral;
1553           ++Column;
1554           break;
1555         }
1556       }
1557 
1558       if (FormatTok->Type == TT_ImplicitStringLiteral)
1559         break;
1560       WhitespaceLength += FormatTok->Tok.getLength();
1561 
1562       readRawToken(*FormatTok);
1563     }
1564 
1565     // In case the token starts with escaped newlines, we want to
1566     // take them into account as whitespace - this pattern is quite frequent
1567     // in macro definitions.
1568     // FIXME: Add a more explicit test.
1569     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1570            FormatTok->TokenText[1] == '\n') {
1571       ++FormatTok->NewlinesBefore;
1572       WhitespaceLength += 2;
1573       Column = 0;
1574       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1575     }
1576 
1577     FormatTok->WhitespaceRange = SourceRange(
1578         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1579 
1580     FormatTok->OriginalColumn = Column;
1581 
1582     TrailingWhitespace = 0;
1583     if (FormatTok->Tok.is(tok::comment)) {
1584       // FIXME: Add the trimmed whitespace to Column.
1585       StringRef UntrimmedText = FormatTok->TokenText;
1586       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1587       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1588     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1589       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1590       FormatTok->Tok.setIdentifierInfo(&Info);
1591       FormatTok->Tok.setKind(Info.getTokenID());
1592     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1593       FormatTok->Tok.setKind(tok::greater);
1594       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1595       GreaterStashed = true;
1596     }
1597 
1598     // Now FormatTok is the next non-whitespace token.
1599 
1600     StringRef Text = FormatTok->TokenText;
1601     size_t FirstNewlinePos = Text.find('\n');
1602     if (FirstNewlinePos == StringRef::npos) {
1603       // FIXME: ColumnWidth actually depends on the start column, we need to
1604       // take this into account when the token is moved.
1605       FormatTok->ColumnWidth =
1606           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1607       Column += FormatTok->ColumnWidth;
1608     } else {
1609       FormatTok->IsMultiline = true;
1610       // FIXME: ColumnWidth actually depends on the start column, we need to
1611       // take this into account when the token is moved.
1612       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1613           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1614 
1615       // The last line of the token always starts in column 0.
1616       // Thus, the length can be precomputed even in the presence of tabs.
1617       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1618           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1619           Encoding);
1620       Column = FormatTok->LastLineColumnWidth;
1621     }
1622 
1623     FormatTok->IsForEachMacro =
1624         std::binary_search(ForEachMacros.begin(), ForEachMacros.end(),
1625                            FormatTok->Tok.getIdentifierInfo());
1626 
1627     return FormatTok;
1628   }
1629 
1630   FormatToken *FormatTok;
1631   bool IsFirstToken;
1632   bool GreaterStashed;
1633   unsigned Column;
1634   unsigned TrailingWhitespace;
1635   Lexer &Lex;
1636   SourceManager &SourceMgr;
1637   FormatStyle &Style;
1638   IdentifierTable IdentTable;
1639   encoding::Encoding Encoding;
1640   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1641   // Index (in 'Tokens') of the last token that starts a new line.
1642   unsigned FirstInLineIndex;
1643   SmallVector<FormatToken *, 16> Tokens;
1644   SmallVector<IdentifierInfo *, 8> ForEachMacros;
1645 
1646   void readRawToken(FormatToken &Tok) {
1647     Lex.LexFromRawLexer(Tok.Tok);
1648     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1649                               Tok.Tok.getLength());
1650     // For formatting, treat unterminated string literals like normal string
1651     // literals.
1652     if (Tok.is(tok::unknown)) {
1653       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1654         Tok.Tok.setKind(tok::string_literal);
1655         Tok.IsUnterminatedLiteral = true;
1656       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1657                  Tok.TokenText == "''") {
1658         Tok.Tok.setKind(tok::char_constant);
1659       }
1660     }
1661   }
1662 };
1663 
1664 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1665   switch (Language) {
1666   case FormatStyle::LK_Cpp:
1667     return "C++";
1668   case FormatStyle::LK_JavaScript:
1669     return "JavaScript";
1670   case FormatStyle::LK_Proto:
1671     return "Proto";
1672   default:
1673     return "Unknown";
1674   }
1675 }
1676 
1677 class Formatter : public UnwrappedLineConsumer {
1678 public:
1679   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1680             const std::vector<CharSourceRange> &Ranges)
1681       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1682         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1683         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1684         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1685     DEBUG(llvm::dbgs() << "File encoding: "
1686                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1687                                                                : "unknown")
1688                        << "\n");
1689     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1690                        << "\n");
1691   }
1692 
1693   tooling::Replacements format() {
1694     tooling::Replacements Result;
1695     FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1696 
1697     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1698     bool StructuralError = Parser.parse();
1699     assert(UnwrappedLines.rbegin()->empty());
1700     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1701          ++Run) {
1702       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1703       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1704       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1705         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1706       }
1707       tooling::Replacements RunResult =
1708           format(AnnotatedLines, StructuralError, Tokens);
1709       DEBUG({
1710         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1711         for (tooling::Replacements::iterator I = RunResult.begin(),
1712                                              E = RunResult.end();
1713              I != E; ++I) {
1714           llvm::dbgs() << I->toString() << "\n";
1715         }
1716       });
1717       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1718         delete AnnotatedLines[i];
1719       }
1720       Result.insert(RunResult.begin(), RunResult.end());
1721       Whitespaces.reset();
1722     }
1723     return Result;
1724   }
1725 
1726   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1727                                bool StructuralError, FormatTokenLexer &Tokens) {
1728     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1729     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1730       Annotator.annotate(*AnnotatedLines[i]);
1731     }
1732     deriveLocalStyle(AnnotatedLines);
1733     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1734       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1735     }
1736     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1737 
1738     Annotator.setCommentLineLevels(AnnotatedLines);
1739     ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1740                                   BinPackInconclusiveFunctions);
1741     UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1742     Formatter.format(AnnotatedLines, /*DryRun=*/false);
1743     return Whitespaces.generateReplacements();
1744   }
1745 
1746 private:
1747   // Determines which lines are affected by the SourceRanges given as input.
1748   // Returns \c true if at least one line between I and E or one of their
1749   // children is affected.
1750   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1751                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1752     bool SomeLineAffected = false;
1753     const AnnotatedLine *PreviousLine = nullptr;
1754     while (I != E) {
1755       AnnotatedLine *Line = *I;
1756       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1757 
1758       // If a line is part of a preprocessor directive, it needs to be formatted
1759       // if any token within the directive is affected.
1760       if (Line->InPPDirective) {
1761         FormatToken *Last = Line->Last;
1762         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1763         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1764           Last = (*PPEnd)->Last;
1765           ++PPEnd;
1766         }
1767 
1768         if (affectsTokenRange(*Line->First, *Last,
1769                               /*IncludeLeadingNewlines=*/false)) {
1770           SomeLineAffected = true;
1771           markAllAsAffected(I, PPEnd);
1772         }
1773         I = PPEnd;
1774         continue;
1775       }
1776 
1777       if (nonPPLineAffected(Line, PreviousLine))
1778         SomeLineAffected = true;
1779 
1780       PreviousLine = Line;
1781       ++I;
1782     }
1783     return SomeLineAffected;
1784   }
1785 
1786   // Determines whether 'Line' is affected by the SourceRanges given as input.
1787   // Returns \c true if line or one if its children is affected.
1788   bool nonPPLineAffected(AnnotatedLine *Line,
1789                          const AnnotatedLine *PreviousLine) {
1790     bool SomeLineAffected = false;
1791     Line->ChildrenAffected =
1792         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1793     if (Line->ChildrenAffected)
1794       SomeLineAffected = true;
1795 
1796     // Stores whether one of the line's tokens is directly affected.
1797     bool SomeTokenAffected = false;
1798     // Stores whether we need to look at the leading newlines of the next token
1799     // in order to determine whether it was affected.
1800     bool IncludeLeadingNewlines = false;
1801 
1802     // Stores whether the first child line of any of this line's tokens is
1803     // affected.
1804     bool SomeFirstChildAffected = false;
1805 
1806     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1807       // Determine whether 'Tok' was affected.
1808       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1809         SomeTokenAffected = true;
1810 
1811       // Determine whether the first child of 'Tok' was affected.
1812       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1813         SomeFirstChildAffected = true;
1814 
1815       IncludeLeadingNewlines = Tok->Children.empty();
1816     }
1817 
1818     // Was this line moved, i.e. has it previously been on the same line as an
1819     // affected line?
1820     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1821                      Line->First->NewlinesBefore == 0;
1822 
1823     bool IsContinuedComment =
1824         Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1825         Line->First->NewlinesBefore < 2 && PreviousLine &&
1826         PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1827 
1828     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1829         IsContinuedComment) {
1830       Line->Affected = true;
1831       SomeLineAffected = true;
1832     }
1833     return SomeLineAffected;
1834   }
1835 
1836   // Marks all lines between I and E as well as all their children as affected.
1837   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1838                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1839     while (I != E) {
1840       (*I)->Affected = true;
1841       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1842       ++I;
1843     }
1844   }
1845 
1846   // Returns true if the range from 'First' to 'Last' intersects with one of the
1847   // input ranges.
1848   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1849                          bool IncludeLeadingNewlines) {
1850     SourceLocation Start = First.WhitespaceRange.getBegin();
1851     if (!IncludeLeadingNewlines)
1852       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1853     SourceLocation End = Last.getStartOfNonWhitespace();
1854     if (Last.TokenText.size() > 0)
1855       End = End.getLocWithOffset(Last.TokenText.size() - 1);
1856     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1857     return affectsCharSourceRange(Range);
1858   }
1859 
1860   // Returns true if one of the input ranges intersect the leading empty lines
1861   // before 'Tok'.
1862   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1863     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1864         Tok.WhitespaceRange.getBegin(),
1865         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1866     return affectsCharSourceRange(EmptyLineRange);
1867   }
1868 
1869   // Returns true if 'Range' intersects with one of the input ranges.
1870   bool affectsCharSourceRange(const CharSourceRange &Range) {
1871     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1872                                                           E = Ranges.end();
1873          I != E; ++I) {
1874       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1875           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1876         return true;
1877     }
1878     return false;
1879   }
1880 
1881   static bool inputUsesCRLF(StringRef Text) {
1882     return Text.count('\r') * 2 > Text.count('\n');
1883   }
1884 
1885   void
1886   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1887     unsigned CountBoundToVariable = 0;
1888     unsigned CountBoundToType = 0;
1889     bool HasCpp03IncompatibleFormat = false;
1890     bool HasBinPackedFunction = false;
1891     bool HasOnePerLineFunction = false;
1892     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1893       if (!AnnotatedLines[i]->First->Next)
1894         continue;
1895       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1896       while (Tok->Next) {
1897         if (Tok->Type == TT_PointerOrReference) {
1898           bool SpacesBefore =
1899               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1900           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1901                              Tok->Next->WhitespaceRange.getEnd();
1902           if (SpacesBefore && !SpacesAfter)
1903             ++CountBoundToVariable;
1904           else if (!SpacesBefore && SpacesAfter)
1905             ++CountBoundToType;
1906         }
1907 
1908         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1909           if (Tok->is(tok::coloncolon) &&
1910               Tok->Previous->Type == TT_TemplateOpener)
1911             HasCpp03IncompatibleFormat = true;
1912           if (Tok->Type == TT_TemplateCloser &&
1913               Tok->Previous->Type == TT_TemplateCloser)
1914             HasCpp03IncompatibleFormat = true;
1915         }
1916 
1917         if (Tok->PackingKind == PPK_BinPacked)
1918           HasBinPackedFunction = true;
1919         if (Tok->PackingKind == PPK_OnePerLine)
1920           HasOnePerLineFunction = true;
1921 
1922         Tok = Tok->Next;
1923       }
1924     }
1925     if (Style.DerivePointerAlignment) {
1926       if (CountBoundToType > CountBoundToVariable)
1927         Style.PointerAlignment = FormatStyle::PAS_Left;
1928       else if (CountBoundToType < CountBoundToVariable)
1929         Style.PointerAlignment = FormatStyle::PAS_Right;
1930     }
1931     if (Style.Standard == FormatStyle::LS_Auto) {
1932       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1933                                                   : FormatStyle::LS_Cpp03;
1934     }
1935     BinPackInconclusiveFunctions =
1936         HasBinPackedFunction || !HasOnePerLineFunction;
1937   }
1938 
1939   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1940     assert(!UnwrappedLines.empty());
1941     UnwrappedLines.back().push_back(TheLine);
1942   }
1943 
1944   void finishRun() override {
1945     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1946   }
1947 
1948   FormatStyle Style;
1949   Lexer &Lex;
1950   SourceManager &SourceMgr;
1951   WhitespaceManager Whitespaces;
1952   SmallVector<CharSourceRange, 8> Ranges;
1953   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1954 
1955   encoding::Encoding Encoding;
1956   bool BinPackInconclusiveFunctions;
1957 };
1958 
1959 } // end anonymous namespace
1960 
1961 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1962                                SourceManager &SourceMgr,
1963                                std::vector<CharSourceRange> Ranges) {
1964   if (Style.DisableFormat) {
1965     tooling::Replacements EmptyResult;
1966     return EmptyResult;
1967   }
1968 
1969   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1970   return formatter.format();
1971 }
1972 
1973 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1974                                std::vector<tooling::Range> Ranges,
1975                                StringRef FileName) {
1976   FileManager Files((FileSystemOptions()));
1977   DiagnosticsEngine Diagnostics(
1978       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1979       new DiagnosticOptions);
1980   SourceManager SourceMgr(Diagnostics, Files);
1981   llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1982   const clang::FileEntry *Entry =
1983       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1984   SourceMgr.overrideFileContents(Entry, Buf);
1985   FileID ID =
1986       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1987   Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1988             getFormattingLangOpts(Style.Standard));
1989   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1990   std::vector<CharSourceRange> CharRanges;
1991   for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1992     SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1993     SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1994     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1995   }
1996   return reformat(Style, Lex, SourceMgr, CharRanges);
1997 }
1998 
1999 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
2000   LangOptions LangOpts;
2001   LangOpts.CPlusPlus = 1;
2002   LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2003   LangOpts.CPlusPlus1y = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2004   LangOpts.LineComment = 1;
2005   LangOpts.CXXOperatorNames = 1;
2006   LangOpts.Bool = 1;
2007   LangOpts.ObjC1 = 1;
2008   LangOpts.ObjC2 = 1;
2009   return LangOpts;
2010 }
2011 
2012 const char *StyleOptionHelpDescription =
2013     "Coding style, currently supports:\n"
2014     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
2015     "Use -style=file to load style configuration from\n"
2016     ".clang-format file located in one of the parent\n"
2017     "directories of the source file (or current\n"
2018     "directory for stdin).\n"
2019     "Use -style=\"{key: value, ...}\" to set specific\n"
2020     "parameters, e.g.:\n"
2021     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
2022 
2023 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
2024   if (FileName.endswith_lower(".js")) {
2025     return FormatStyle::LK_JavaScript;
2026   } else if (FileName.endswith_lower(".proto") ||
2027              FileName.endswith_lower(".protodevel")) {
2028     return FormatStyle::LK_Proto;
2029   }
2030   return FormatStyle::LK_Cpp;
2031 }
2032 
2033 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
2034                      StringRef FallbackStyle) {
2035   FormatStyle Style = getLLVMStyle();
2036   Style.Language = getLanguageByFileName(FileName);
2037   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
2038     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
2039                  << "\" using LLVM style\n";
2040     return Style;
2041   }
2042 
2043   if (StyleName.startswith("{")) {
2044     // Parse YAML/JSON style from the command line.
2045     if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
2046       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
2047                    << FallbackStyle << " style\n";
2048     }
2049     return Style;
2050   }
2051 
2052   if (!StyleName.equals_lower("file")) {
2053     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
2054       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
2055                    << " style\n";
2056     return Style;
2057   }
2058 
2059   // Look for .clang-format/_clang-format file in the file's parent directories.
2060   SmallString<128> UnsuitableConfigFiles;
2061   SmallString<128> Path(FileName);
2062   llvm::sys::fs::make_absolute(Path);
2063   for (StringRef Directory = Path; !Directory.empty();
2064        Directory = llvm::sys::path::parent_path(Directory)) {
2065     if (!llvm::sys::fs::is_directory(Directory))
2066       continue;
2067     SmallString<128> ConfigFile(Directory);
2068 
2069     llvm::sys::path::append(ConfigFile, ".clang-format");
2070     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2071     bool IsFile = false;
2072     // Ignore errors from is_regular_file: we only need to know if we can read
2073     // the file or not.
2074     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2075 
2076     if (!IsFile) {
2077       // Try _clang-format too, since dotfiles are not commonly used on Windows.
2078       ConfigFile = Directory;
2079       llvm::sys::path::append(ConfigFile, "_clang-format");
2080       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2081       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2082     }
2083 
2084     if (IsFile) {
2085       std::unique_ptr<llvm::MemoryBuffer> Text;
2086       if (std::error_code ec =
2087               llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
2088         llvm::errs() << ec.message() << "\n";
2089         break;
2090       }
2091       if (std::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
2092         if (ec == ParseError::Unsuitable) {
2093           if (!UnsuitableConfigFiles.empty())
2094             UnsuitableConfigFiles.append(", ");
2095           UnsuitableConfigFiles.append(ConfigFile);
2096           continue;
2097         }
2098         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
2099                      << "\n";
2100         break;
2101       }
2102       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2103       return Style;
2104     }
2105   }
2106   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
2107                << " style\n";
2108   if (!UnsuitableConfigFiles.empty()) {
2109     llvm::errs() << "Configuration file(s) do(es) not support "
2110                  << getLanguageName(Style.Language) << ": "
2111                  << UnsuitableConfigFiles << "\n";
2112   }
2113   return Style;
2114 }
2115 
2116 } // namespace format
2117 } // namespace clang
2118