1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Format/Format.h"
17 #include "ContinuationIndenter.h"
18 #include "TokenAnnotator.h"
19 #include "UnwrappedLineFormatter.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/DiagnosticOptions.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Regex.h"
31 #include "llvm/Support/YAMLTraits.h"
32 #include <queue>
33 #include <string>
34 
35 #define DEBUG_TYPE "format-formatter"
36 
37 using clang::format::FormatStyle;
38 
39 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
40 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
41 
42 namespace llvm {
43 namespace yaml {
44 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
45   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
46     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
47     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
48     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
49     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
50     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
51   }
52 };
53 
54 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
55   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
56     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
57     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
58     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
59     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
60     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
61   }
62 };
63 
64 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
65   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
66     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
67     IO.enumCase(Value, "false", FormatStyle::UT_Never);
68     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
69     IO.enumCase(Value, "true", FormatStyle::UT_Always);
70     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
71   }
72 };
73 
74 template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
75   static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
76     IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
77     IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
78     IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
79   }
80 };
81 
82 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
83   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
84     IO.enumCase(Value, "None", FormatStyle::SFS_None);
85     IO.enumCase(Value, "false", FormatStyle::SFS_None);
86     IO.enumCase(Value, "All", FormatStyle::SFS_All);
87     IO.enumCase(Value, "true", FormatStyle::SFS_All);
88     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
89     IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
90   }
91 };
92 
93 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
94   static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
95     IO.enumCase(Value, "All", FormatStyle::BOS_All);
96     IO.enumCase(Value, "true", FormatStyle::BOS_All);
97     IO.enumCase(Value, "None", FormatStyle::BOS_None);
98     IO.enumCase(Value, "false", FormatStyle::BOS_None);
99     IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
100   }
101 };
102 
103 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
104   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
105     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
106     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
107     IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
108     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
109     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
110     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
111     IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
112     IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
113   }
114 };
115 
116 template <>
117 struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
118   static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
119     IO.enumCase(Value, "None", FormatStyle::RTBS_None);
120     IO.enumCase(Value, "All", FormatStyle::RTBS_All);
121     IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
122     IO.enumCase(Value, "TopLevelDefinitions",
123                 FormatStyle::RTBS_TopLevelDefinitions);
124     IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
125   }
126 };
127 
128 template <>
129 struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
130   static void
131   enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
132     IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
133     IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
134     IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
135 
136     // For backward compatibility.
137     IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
138     IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
139   }
140 };
141 
142 template <>
143 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
144   static void enumeration(IO &IO,
145                           FormatStyle::NamespaceIndentationKind &Value) {
146     IO.enumCase(Value, "None", FormatStyle::NI_None);
147     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
148     IO.enumCase(Value, "All", FormatStyle::NI_All);
149   }
150 };
151 
152 template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
153   static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
154     IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
155     IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
156     IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
157 
158     // For backward compatibility.
159     IO.enumCase(Value, "true", FormatStyle::BAS_Align);
160     IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
161   }
162 };
163 
164 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
165   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
166     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
167     IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
168     IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
169 
170     // For backward compatibility.
171     IO.enumCase(Value, "true", FormatStyle::PAS_Left);
172     IO.enumCase(Value, "false", FormatStyle::PAS_Right);
173   }
174 };
175 
176 template <>
177 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
178   static void enumeration(IO &IO,
179                           FormatStyle::SpaceBeforeParensOptions &Value) {
180     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
181     IO.enumCase(Value, "ControlStatements",
182                 FormatStyle::SBPO_ControlStatements);
183     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
184 
185     // For backward compatibility.
186     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
187     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
188   }
189 };
190 
191 template <> struct MappingTraits<FormatStyle> {
192   static void mapping(IO &IO, FormatStyle &Style) {
193     // When reading, read the language first, we need it for getPredefinedStyle.
194     IO.mapOptional("Language", Style.Language);
195 
196     if (IO.outputting()) {
197       StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
198                                  "Mozilla", "WebKit", "GNU"};
199       ArrayRef<StringRef> Styles(StylesArray);
200       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
201         StringRef StyleName(Styles[i]);
202         FormatStyle PredefinedStyle;
203         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
204             Style == PredefinedStyle) {
205           IO.mapOptional("# BasedOnStyle", StyleName);
206           break;
207         }
208       }
209     } else {
210       StringRef BasedOnStyle;
211       IO.mapOptional("BasedOnStyle", BasedOnStyle);
212       if (!BasedOnStyle.empty()) {
213         FormatStyle::LanguageKind OldLanguage = Style.Language;
214         FormatStyle::LanguageKind Language =
215             ((FormatStyle *)IO.getContext())->Language;
216         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
217           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
218           return;
219         }
220         Style.Language = OldLanguage;
221       }
222     }
223 
224     // For backward compatibility.
225     if (!IO.outputting()) {
226       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
227       IO.mapOptional("IndentFunctionDeclarationAfterType",
228                      Style.IndentWrappedFunctionNames);
229       IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
230       IO.mapOptional("SpaceAfterControlStatementKeyword",
231                      Style.SpaceBeforeParens);
232     }
233 
234     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
235     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
236     IO.mapOptional("AlignConsecutiveAssignments",
237                    Style.AlignConsecutiveAssignments);
238     IO.mapOptional("AlignConsecutiveDeclarations",
239                    Style.AlignConsecutiveDeclarations);
240     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
241     IO.mapOptional("AlignOperands", Style.AlignOperands);
242     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
243     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
244                    Style.AllowAllParametersOfDeclarationOnNextLine);
245     IO.mapOptional("AllowShortBlocksOnASingleLine",
246                    Style.AllowShortBlocksOnASingleLine);
247     IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
248                    Style.AllowShortCaseLabelsOnASingleLine);
249     IO.mapOptional("AllowShortFunctionsOnASingleLine",
250                    Style.AllowShortFunctionsOnASingleLine);
251     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
252                    Style.AllowShortIfStatementsOnASingleLine);
253     IO.mapOptional("AllowShortLoopsOnASingleLine",
254                    Style.AllowShortLoopsOnASingleLine);
255     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
256                    Style.AlwaysBreakAfterDefinitionReturnType);
257     IO.mapOptional("AlwaysBreakAfterReturnType",
258                    Style.AlwaysBreakAfterReturnType);
259     // If AlwaysBreakAfterDefinitionReturnType was specified but
260     // AlwaysBreakAfterReturnType was not, initialize the latter from the
261     // former for backwards compatibility.
262     if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
263         Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
264       if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
265         Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
266       else if (Style.AlwaysBreakAfterDefinitionReturnType ==
267                FormatStyle::DRTBS_TopLevel)
268         Style.AlwaysBreakAfterReturnType =
269             FormatStyle::RTBS_TopLevelDefinitions;
270     }
271 
272     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
273                    Style.AlwaysBreakBeforeMultilineStrings);
274     IO.mapOptional("AlwaysBreakTemplateDeclarations",
275                    Style.AlwaysBreakTemplateDeclarations);
276     IO.mapOptional("BinPackArguments", Style.BinPackArguments);
277     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
278     IO.mapOptional("BraceWrapping", Style.BraceWrapping);
279     IO.mapOptional("BreakBeforeBinaryOperators",
280                    Style.BreakBeforeBinaryOperators);
281     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
282     IO.mapOptional("BreakBeforeTernaryOperators",
283                    Style.BreakBeforeTernaryOperators);
284     IO.mapOptional("BreakConstructorInitializersBeforeComma",
285                    Style.BreakConstructorInitializersBeforeComma);
286     IO.mapOptional("BreakAfterJavaFieldAnnotations",
287                    Style.BreakAfterJavaFieldAnnotations);
288     IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
289     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
290     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
291     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
292                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
293     IO.mapOptional("ConstructorInitializerIndentWidth",
294                    Style.ConstructorInitializerIndentWidth);
295     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
296     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
297     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
298     IO.mapOptional("DisableFormat", Style.DisableFormat);
299     IO.mapOptional("ExperimentalAutoDetectBinPacking",
300                    Style.ExperimentalAutoDetectBinPacking);
301     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
302     IO.mapOptional("IncludeCategories", Style.IncludeCategories);
303     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
304     IO.mapOptional("IndentWidth", Style.IndentWidth);
305     IO.mapOptional("IndentWrappedFunctionNames",
306                    Style.IndentWrappedFunctionNames);
307     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
308                    Style.KeepEmptyLinesAtTheStartOfBlocks);
309     IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
310     IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
311     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
312     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
313     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
314     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
315     IO.mapOptional("ObjCSpaceBeforeProtocolList",
316                    Style.ObjCSpaceBeforeProtocolList);
317     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
318                    Style.PenaltyBreakBeforeFirstCallParameter);
319     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
320     IO.mapOptional("PenaltyBreakFirstLessLess",
321                    Style.PenaltyBreakFirstLessLess);
322     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
323     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
324     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
325                    Style.PenaltyReturnTypeOnItsOwnLine);
326     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
327     IO.mapOptional("ReflowComments", Style.ReflowComments);
328     IO.mapOptional("SortIncludes", Style.SortIncludes);
329     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
330     IO.mapOptional("SpaceBeforeAssignmentOperators",
331                    Style.SpaceBeforeAssignmentOperators);
332     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
333     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
334     IO.mapOptional("SpacesBeforeTrailingComments",
335                    Style.SpacesBeforeTrailingComments);
336     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
337     IO.mapOptional("SpacesInContainerLiterals",
338                    Style.SpacesInContainerLiterals);
339     IO.mapOptional("SpacesInCStyleCastParentheses",
340                    Style.SpacesInCStyleCastParentheses);
341     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
342     IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
343     IO.mapOptional("Standard", Style.Standard);
344     IO.mapOptional("TabWidth", Style.TabWidth);
345     IO.mapOptional("UseTab", Style.UseTab);
346     IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
347   }
348 };
349 
350 template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
351   static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
352     IO.mapOptional("AfterClass", Wrapping.AfterClass);
353     IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
354     IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
355     IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
356     IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
357     IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
358     IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
359     IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
360     IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
361     IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
362     IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
363   }
364 };
365 
366 template <> struct MappingTraits<FormatStyle::IncludeCategory> {
367   static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
368     IO.mapOptional("Regex", Category.Regex);
369     IO.mapOptional("Priority", Category.Priority);
370   }
371 };
372 
373 // Allows to read vector<FormatStyle> while keeping default values.
374 // IO.getContext() should contain a pointer to the FormatStyle structure, that
375 // will be used to get default values for missing keys.
376 // If the first element has no Language specified, it will be treated as the
377 // default one for the following elements.
378 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
379   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
380     return Seq.size();
381   }
382   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
383                               size_t Index) {
384     if (Index >= Seq.size()) {
385       assert(Index == Seq.size());
386       FormatStyle Template;
387       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
388         Template = Seq[0];
389       } else {
390         Template = *((const FormatStyle *)IO.getContext());
391         Template.Language = FormatStyle::LK_None;
392       }
393       Seq.resize(Index + 1, Template);
394     }
395     return Seq[Index];
396   }
397 };
398 } // namespace yaml
399 } // namespace llvm
400 
401 namespace clang {
402 namespace format {
403 
404 const std::error_category &getParseCategory() {
405   static ParseErrorCategory C;
406   return C;
407 }
408 std::error_code make_error_code(ParseError e) {
409   return std::error_code(static_cast<int>(e), getParseCategory());
410 }
411 
412 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
413   return "clang-format.parse_error";
414 }
415 
416 std::string ParseErrorCategory::message(int EV) const {
417   switch (static_cast<ParseError>(EV)) {
418   case ParseError::Success:
419     return "Success";
420   case ParseError::Error:
421     return "Invalid argument";
422   case ParseError::Unsuitable:
423     return "Unsuitable";
424   }
425   llvm_unreachable("unexpected parse error");
426 }
427 
428 static FormatStyle expandPresets(const FormatStyle &Style) {
429   if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
430     return Style;
431   FormatStyle Expanded = Style;
432   Expanded.BraceWrapping = {false, false, false, false, false, false,
433                             false, false, false, false, false};
434   switch (Style.BreakBeforeBraces) {
435   case FormatStyle::BS_Linux:
436     Expanded.BraceWrapping.AfterClass = true;
437     Expanded.BraceWrapping.AfterFunction = true;
438     Expanded.BraceWrapping.AfterNamespace = true;
439     break;
440   case FormatStyle::BS_Mozilla:
441     Expanded.BraceWrapping.AfterClass = true;
442     Expanded.BraceWrapping.AfterEnum = true;
443     Expanded.BraceWrapping.AfterFunction = true;
444     Expanded.BraceWrapping.AfterStruct = true;
445     Expanded.BraceWrapping.AfterUnion = true;
446     break;
447   case FormatStyle::BS_Stroustrup:
448     Expanded.BraceWrapping.AfterFunction = true;
449     Expanded.BraceWrapping.BeforeCatch = true;
450     Expanded.BraceWrapping.BeforeElse = true;
451     break;
452   case FormatStyle::BS_Allman:
453     Expanded.BraceWrapping.AfterClass = true;
454     Expanded.BraceWrapping.AfterControlStatement = true;
455     Expanded.BraceWrapping.AfterEnum = true;
456     Expanded.BraceWrapping.AfterFunction = true;
457     Expanded.BraceWrapping.AfterNamespace = true;
458     Expanded.BraceWrapping.AfterObjCDeclaration = true;
459     Expanded.BraceWrapping.AfterStruct = true;
460     Expanded.BraceWrapping.BeforeCatch = true;
461     Expanded.BraceWrapping.BeforeElse = true;
462     break;
463   case FormatStyle::BS_GNU:
464     Expanded.BraceWrapping = {true, true, true, true, true, true,
465                               true, true, true, true, true};
466     break;
467   case FormatStyle::BS_WebKit:
468     Expanded.BraceWrapping.AfterFunction = true;
469     break;
470   default:
471     break;
472   }
473   return Expanded;
474 }
475 
476 FormatStyle getLLVMStyle() {
477   FormatStyle LLVMStyle;
478   LLVMStyle.Language = FormatStyle::LK_Cpp;
479   LLVMStyle.AccessModifierOffset = -2;
480   LLVMStyle.AlignEscapedNewlinesLeft = false;
481   LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
482   LLVMStyle.AlignOperands = true;
483   LLVMStyle.AlignTrailingComments = true;
484   LLVMStyle.AlignConsecutiveAssignments = false;
485   LLVMStyle.AlignConsecutiveDeclarations = false;
486   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
487   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
488   LLVMStyle.AllowShortBlocksOnASingleLine = false;
489   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
490   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
491   LLVMStyle.AllowShortLoopsOnASingleLine = false;
492   LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
493   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
494   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
495   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
496   LLVMStyle.BinPackParameters = true;
497   LLVMStyle.BinPackArguments = true;
498   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
499   LLVMStyle.BreakBeforeTernaryOperators = true;
500   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
501   LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
502                              false, false, false, false, false};
503   LLVMStyle.BreakAfterJavaFieldAnnotations = false;
504   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
505   LLVMStyle.BreakStringLiterals = true;
506   LLVMStyle.ColumnLimit = 80;
507   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
508   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
509   LLVMStyle.ConstructorInitializerIndentWidth = 4;
510   LLVMStyle.ContinuationIndentWidth = 4;
511   LLVMStyle.Cpp11BracedListStyle = true;
512   LLVMStyle.DerivePointerAlignment = false;
513   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
514   LLVMStyle.ForEachMacros.push_back("foreach");
515   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
516   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
517   LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
518                                  {"^(<|\"(gtest|isl|json)/)", 3},
519                                  {".*", 1}};
520   LLVMStyle.IndentCaseLabels = false;
521   LLVMStyle.IndentWrappedFunctionNames = false;
522   LLVMStyle.IndentWidth = 2;
523   LLVMStyle.TabWidth = 8;
524   LLVMStyle.MaxEmptyLinesToKeep = 1;
525   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
526   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
527   LLVMStyle.ObjCBlockIndentWidth = 2;
528   LLVMStyle.ObjCSpaceAfterProperty = false;
529   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
530   LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
531   LLVMStyle.SpacesBeforeTrailingComments = 1;
532   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
533   LLVMStyle.UseTab = FormatStyle::UT_Never;
534   LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
535   LLVMStyle.ReflowComments = true;
536   LLVMStyle.SpacesInParentheses = false;
537   LLVMStyle.SpacesInSquareBrackets = false;
538   LLVMStyle.SpaceInEmptyParentheses = false;
539   LLVMStyle.SpacesInContainerLiterals = true;
540   LLVMStyle.SpacesInCStyleCastParentheses = false;
541   LLVMStyle.SpaceAfterCStyleCast = false;
542   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
543   LLVMStyle.SpaceBeforeAssignmentOperators = true;
544   LLVMStyle.SpacesInAngles = false;
545 
546   LLVMStyle.PenaltyBreakComment = 300;
547   LLVMStyle.PenaltyBreakFirstLessLess = 120;
548   LLVMStyle.PenaltyBreakString = 1000;
549   LLVMStyle.PenaltyExcessCharacter = 1000000;
550   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
551   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
552 
553   LLVMStyle.DisableFormat = false;
554   LLVMStyle.SortIncludes = true;
555 
556   return LLVMStyle;
557 }
558 
559 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
560   FormatStyle GoogleStyle = getLLVMStyle();
561   GoogleStyle.Language = Language;
562 
563   GoogleStyle.AccessModifierOffset = -1;
564   GoogleStyle.AlignEscapedNewlinesLeft = true;
565   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
566   GoogleStyle.AllowShortLoopsOnASingleLine = true;
567   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
568   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
569   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
570   GoogleStyle.DerivePointerAlignment = true;
571   GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
572   GoogleStyle.IndentCaseLabels = true;
573   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
574   GoogleStyle.ObjCSpaceAfterProperty = false;
575   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
576   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
577   GoogleStyle.SpacesBeforeTrailingComments = 2;
578   GoogleStyle.Standard = FormatStyle::LS_Auto;
579 
580   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
581   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
582 
583   if (Language == FormatStyle::LK_Java) {
584     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
585     GoogleStyle.AlignOperands = false;
586     GoogleStyle.AlignTrailingComments = false;
587     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
588     GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
589     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
590     GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
591     GoogleStyle.ColumnLimit = 100;
592     GoogleStyle.SpaceAfterCStyleCast = true;
593     GoogleStyle.SpacesBeforeTrailingComments = 1;
594   } else if (Language == FormatStyle::LK_JavaScript) {
595     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
596     GoogleStyle.AlignOperands = false;
597     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
598     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
599     GoogleStyle.BreakBeforeTernaryOperators = false;
600     GoogleStyle.CommentPragmas = "@(export|return|see|visibility) ";
601     GoogleStyle.MaxEmptyLinesToKeep = 3;
602     GoogleStyle.SpacesInContainerLiterals = false;
603     GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
604   } else if (Language == FormatStyle::LK_Proto) {
605     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
606     GoogleStyle.SpacesInContainerLiterals = false;
607   }
608 
609   return GoogleStyle;
610 }
611 
612 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
613   FormatStyle ChromiumStyle = getGoogleStyle(Language);
614   if (Language == FormatStyle::LK_Java) {
615     ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
616     ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
617     ChromiumStyle.ContinuationIndentWidth = 8;
618     ChromiumStyle.IndentWidth = 4;
619   } else {
620     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
621     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
622     ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
623     ChromiumStyle.AllowShortLoopsOnASingleLine = false;
624     ChromiumStyle.BinPackParameters = false;
625     ChromiumStyle.DerivePointerAlignment = false;
626   }
627   ChromiumStyle.SortIncludes = false;
628   return ChromiumStyle;
629 }
630 
631 FormatStyle getMozillaStyle() {
632   FormatStyle MozillaStyle = getLLVMStyle();
633   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
634   MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
635   MozillaStyle.AlwaysBreakAfterReturnType =
636       FormatStyle::RTBS_TopLevelDefinitions;
637   MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
638       FormatStyle::DRTBS_TopLevel;
639   MozillaStyle.AlwaysBreakTemplateDeclarations = true;
640   MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
641   MozillaStyle.BreakConstructorInitializersBeforeComma = true;
642   MozillaStyle.ConstructorInitializerIndentWidth = 2;
643   MozillaStyle.ContinuationIndentWidth = 2;
644   MozillaStyle.Cpp11BracedListStyle = false;
645   MozillaStyle.IndentCaseLabels = true;
646   MozillaStyle.ObjCSpaceAfterProperty = true;
647   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
648   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
649   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
650   return MozillaStyle;
651 }
652 
653 FormatStyle getWebKitStyle() {
654   FormatStyle Style = getLLVMStyle();
655   Style.AccessModifierOffset = -4;
656   Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
657   Style.AlignOperands = false;
658   Style.AlignTrailingComments = false;
659   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
660   Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
661   Style.BreakConstructorInitializersBeforeComma = true;
662   Style.Cpp11BracedListStyle = false;
663   Style.ColumnLimit = 0;
664   Style.IndentWidth = 4;
665   Style.NamespaceIndentation = FormatStyle::NI_Inner;
666   Style.ObjCBlockIndentWidth = 4;
667   Style.ObjCSpaceAfterProperty = true;
668   Style.PointerAlignment = FormatStyle::PAS_Left;
669   Style.Standard = FormatStyle::LS_Cpp03;
670   return Style;
671 }
672 
673 FormatStyle getGNUStyle() {
674   FormatStyle Style = getLLVMStyle();
675   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
676   Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
677   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
678   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
679   Style.BreakBeforeTernaryOperators = true;
680   Style.Cpp11BracedListStyle = false;
681   Style.ColumnLimit = 79;
682   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
683   Style.Standard = FormatStyle::LS_Cpp03;
684   return Style;
685 }
686 
687 FormatStyle getNoStyle() {
688   FormatStyle NoStyle = getLLVMStyle();
689   NoStyle.DisableFormat = true;
690   NoStyle.SortIncludes = false;
691   return NoStyle;
692 }
693 
694 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
695                         FormatStyle *Style) {
696   if (Name.equals_lower("llvm")) {
697     *Style = getLLVMStyle();
698   } else if (Name.equals_lower("chromium")) {
699     *Style = getChromiumStyle(Language);
700   } else if (Name.equals_lower("mozilla")) {
701     *Style = getMozillaStyle();
702   } else if (Name.equals_lower("google")) {
703     *Style = getGoogleStyle(Language);
704   } else if (Name.equals_lower("webkit")) {
705     *Style = getWebKitStyle();
706   } else if (Name.equals_lower("gnu")) {
707     *Style = getGNUStyle();
708   } else if (Name.equals_lower("none")) {
709     *Style = getNoStyle();
710   } else {
711     return false;
712   }
713 
714   Style->Language = Language;
715   return true;
716 }
717 
718 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
719   assert(Style);
720   FormatStyle::LanguageKind Language = Style->Language;
721   assert(Language != FormatStyle::LK_None);
722   if (Text.trim().empty())
723     return make_error_code(ParseError::Error);
724 
725   std::vector<FormatStyle> Styles;
726   llvm::yaml::Input Input(Text);
727   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
728   // values for the fields, keys for which are missing from the configuration.
729   // Mapping also uses the context to get the language to find the correct
730   // base style.
731   Input.setContext(Style);
732   Input >> Styles;
733   if (Input.error())
734     return Input.error();
735 
736   for (unsigned i = 0; i < Styles.size(); ++i) {
737     // Ensures that only the first configuration can skip the Language option.
738     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
739       return make_error_code(ParseError::Error);
740     // Ensure that each language is configured at most once.
741     for (unsigned j = 0; j < i; ++j) {
742       if (Styles[i].Language == Styles[j].Language) {
743         DEBUG(llvm::dbgs()
744               << "Duplicate languages in the config file on positions " << j
745               << " and " << i << "\n");
746         return make_error_code(ParseError::Error);
747       }
748     }
749   }
750   // Look for a suitable configuration starting from the end, so we can
751   // find the configuration for the specific language first, and the default
752   // configuration (which can only be at slot 0) after it.
753   for (int i = Styles.size() - 1; i >= 0; --i) {
754     if (Styles[i].Language == Language ||
755         Styles[i].Language == FormatStyle::LK_None) {
756       *Style = Styles[i];
757       Style->Language = Language;
758       return make_error_code(ParseError::Success);
759     }
760   }
761   return make_error_code(ParseError::Unsuitable);
762 }
763 
764 std::string configurationAsText(const FormatStyle &Style) {
765   std::string Text;
766   llvm::raw_string_ostream Stream(Text);
767   llvm::yaml::Output Output(Stream);
768   // We use the same mapping method for input and output, so we need a non-const
769   // reference here.
770   FormatStyle NonConstStyle = expandPresets(Style);
771   Output << NonConstStyle;
772   return Stream.str();
773 }
774 
775 namespace {
776 
777 class FormatTokenLexer {
778 public:
779   FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
780                    encoding::Encoding Encoding, tooling::Replacements &Replaces)
781       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
782         LessStashed(false), Column(0), TrailingWhitespace(0),
783         SourceMgr(SourceMgr), ID(ID), Style(Style),
784         IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
785         Encoding(Encoding), Replaces(Replaces), FirstInLineIndex(0),
786         FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
787         MacroBlockEndRegex(Style.MacroBlockEnd) {
788     Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
789                         getFormattingLangOpts(Style)));
790     Lex->SetKeepWhitespaceMode(true);
791 
792     for (const std::string &ForEachMacro : Style.ForEachMacros)
793       ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
794     std::sort(ForEachMacros.begin(), ForEachMacros.end());
795   }
796 
797   ArrayRef<FormatToken *> lex() {
798     assert(Tokens.empty());
799     assert(FirstInLineIndex == 0);
800     do {
801       Tokens.push_back(getNextToken());
802       if (Style.Language == FormatStyle::LK_JavaScript)
803         tryParseJSRegexLiteral();
804       tryMergePreviousTokens();
805       if (Style.Language == FormatStyle::LK_JavaScript)
806         tryRequoteJSStringLiteral();
807       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
808         FirstInLineIndex = Tokens.size() - 1;
809     } while (Tokens.back()->Tok.isNot(tok::eof));
810     return Tokens;
811   }
812 
813   const AdditionalKeywords &getKeywords() { return Keywords; }
814 
815 private:
816   void tryMergePreviousTokens() {
817     if (tryMerge_TMacro())
818       return;
819     if (tryMergeConflictMarkers())
820       return;
821     if (tryMergeLessLess())
822       return;
823 
824     if (Style.Language == FormatStyle::LK_JavaScript) {
825       if (tryMergeTemplateString())
826         return;
827 
828       static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
829       static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
830                                                      tok::equal};
831       static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
832                                                     tok::greaterequal};
833       static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
834       // FIXME: Investigate what token type gives the correct operator priority.
835       if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
836         return;
837       if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
838         return;
839       if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
840         return;
841       if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
842         return;
843     }
844   }
845 
846   bool tryMergeLessLess() {
847     // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
848     if (Tokens.size() < 3)
849       return false;
850 
851     bool FourthTokenIsLess = false;
852     if (Tokens.size() > 3)
853       FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
854 
855     auto First = Tokens.end() - 3;
856     if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
857         First[0]->isNot(tok::less) || FourthTokenIsLess)
858       return false;
859 
860     // Only merge if there currently is no whitespace between the two "<".
861     if (First[1]->WhitespaceRange.getBegin() !=
862         First[1]->WhitespaceRange.getEnd())
863       return false;
864 
865     First[0]->Tok.setKind(tok::lessless);
866     First[0]->TokenText = "<<";
867     First[0]->ColumnWidth += 1;
868     Tokens.erase(Tokens.end() - 2);
869     return true;
870   }
871 
872   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
873     if (Tokens.size() < Kinds.size())
874       return false;
875 
876     SmallVectorImpl<FormatToken *>::const_iterator First =
877         Tokens.end() - Kinds.size();
878     if (!First[0]->is(Kinds[0]))
879       return false;
880     unsigned AddLength = 0;
881     for (unsigned i = 1; i < Kinds.size(); ++i) {
882       if (!First[i]->is(Kinds[i]) ||
883           First[i]->WhitespaceRange.getBegin() !=
884               First[i]->WhitespaceRange.getEnd())
885         return false;
886       AddLength += First[i]->TokenText.size();
887     }
888     Tokens.resize(Tokens.size() - Kinds.size() + 1);
889     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
890                                     First[0]->TokenText.size() + AddLength);
891     First[0]->ColumnWidth += AddLength;
892     First[0]->Type = NewType;
893     return true;
894   }
895 
896   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
897   bool precedesOperand(FormatToken *Tok) {
898     // NB: This is not entirely correct, as an r_paren can introduce an operand
899     // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
900     // corner case to not matter in practice, though.
901     return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
902                         tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
903                         tok::colon, tok::question, tok::tilde) ||
904            Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
905                         tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
906                         tok::kw_typeof, Keywords.kw_instanceof,
907                         Keywords.kw_in) ||
908            Tok->isBinaryOperator();
909   }
910 
911   bool canPrecedeRegexLiteral(FormatToken *Prev) {
912     if (!Prev)
913       return true;
914 
915     // Regex literals can only follow after prefix unary operators, not after
916     // postfix unary operators. If the '++' is followed by a non-operand
917     // introducing token, the slash here is the operand and not the start of a
918     // regex.
919     if (Prev->isOneOf(tok::plusplus, tok::minusminus))
920       return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
921 
922     // The previous token must introduce an operand location where regex
923     // literals can occur.
924     if (!precedesOperand(Prev))
925       return false;
926 
927     return true;
928   }
929 
930   // Tries to parse a JavaScript Regex literal starting at the current token,
931   // if that begins with a slash and is in a location where JavaScript allows
932   // regex literals. Changes the current token to a regex literal and updates
933   // its text if successful.
934   void tryParseJSRegexLiteral() {
935     FormatToken *RegexToken = Tokens.back();
936     if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
937       return;
938 
939     FormatToken *Prev = nullptr;
940     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
941       // NB: Because previous pointers are not initialized yet, this cannot use
942       // Token.getPreviousNonComment.
943       if ((*I)->isNot(tok::comment)) {
944         Prev = *I;
945         break;
946       }
947     }
948 
949     if (!canPrecedeRegexLiteral(Prev))
950       return;
951 
952     // 'Manually' lex ahead in the current file buffer.
953     const char *Offset = Lex->getBufferLocation();
954     const char *RegexBegin = Offset - RegexToken->TokenText.size();
955     StringRef Buffer = Lex->getBuffer();
956     bool InCharacterClass = false;
957     bool HaveClosingSlash = false;
958     for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
959       // Regular expressions are terminated with a '/', which can only be
960       // escaped using '\' or a character class between '[' and ']'.
961       // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
962       switch (*Offset) {
963       case '\\':
964         // Skip the escaped character.
965         ++Offset;
966         break;
967       case '[':
968         InCharacterClass = true;
969         break;
970       case ']':
971         InCharacterClass = false;
972         break;
973       case '/':
974         if (!InCharacterClass)
975           HaveClosingSlash = true;
976         break;
977       }
978     }
979 
980     RegexToken->Type = TT_RegexLiteral;
981     // Treat regex literals like other string_literals.
982     RegexToken->Tok.setKind(tok::string_literal);
983     RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
984     RegexToken->ColumnWidth = RegexToken->TokenText.size();
985 
986     resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
987   }
988 
989   bool tryMergeTemplateString() {
990     if (Tokens.size() < 2)
991       return false;
992 
993     FormatToken *EndBacktick = Tokens.back();
994     // Backticks get lexed as tok::unknown tokens. If a template string contains
995     // a comment start, it gets lexed as a tok::comment, or tok::unknown if
996     // unterminated.
997     if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
998                               tok::char_constant, tok::unknown))
999       return false;
1000     size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
1001     // Unknown token that's not actually a backtick, or a comment that doesn't
1002     // contain a backtick.
1003     if (CommentBacktickPos == StringRef::npos)
1004       return false;
1005 
1006     unsigned TokenCount = 0;
1007     bool IsMultiline = false;
1008     unsigned EndColumnInFirstLine =
1009         EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
1010     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
1011       ++TokenCount;
1012       if (I[0]->IsMultiline)
1013         IsMultiline = true;
1014 
1015       // If there was a preceding template string, this must be the start of a
1016       // template string, not the end.
1017       if (I[0]->is(TT_TemplateString))
1018         return false;
1019 
1020       if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
1021         // Keep track of the rhs offset of the last token to wrap across lines -
1022         // its the rhs offset of the first line of the template string, used to
1023         // determine its width.
1024         if (I[0]->IsMultiline)
1025           EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
1026         // If the token has newlines, the token before it (if it exists) is the
1027         // rhs end of the previous line.
1028         if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
1029           EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
1030           IsMultiline = true;
1031         }
1032         continue;
1033       }
1034 
1035       Tokens.resize(Tokens.size() - TokenCount);
1036       Tokens.back()->Type = TT_TemplateString;
1037       const char *EndOffset =
1038           EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
1039       if (CommentBacktickPos != 0) {
1040         // If the backtick was not the first character (e.g. in a comment),
1041         // re-lex after the backtick position.
1042         SourceLocation Loc = EndBacktick->Tok.getLocation();
1043         resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
1044       }
1045       Tokens.back()->TokenText =
1046           StringRef(Tokens.back()->TokenText.data(),
1047                     EndOffset - Tokens.back()->TokenText.data());
1048 
1049       unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
1050       if (EndOriginalColumn == 0) {
1051         SourceLocation Loc = EndBacktick->Tok.getLocation();
1052         EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
1053       }
1054       // If the ` is further down within the token (e.g. in a comment).
1055       EndOriginalColumn += CommentBacktickPos;
1056 
1057       if (IsMultiline) {
1058         // ColumnWidth is from backtick to last token in line.
1059         // LastLineColumnWidth is 0 to backtick.
1060         // x = `some content
1061         //     until here`;
1062         Tokens.back()->ColumnWidth =
1063             EndColumnInFirstLine - Tokens.back()->OriginalColumn;
1064         // +1 for the ` itself.
1065         Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
1066         Tokens.back()->IsMultiline = true;
1067       } else {
1068         // Token simply spans from start to end, +1 for the ` itself.
1069         Tokens.back()->ColumnWidth =
1070             EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
1071       }
1072       return true;
1073     }
1074     return false;
1075   }
1076 
1077   // If the last token is a double/single-quoted string literal, generates a
1078   // replacement with a single/double quoted string literal, re-escaping the
1079   // contents in the process.
1080   void tryRequoteJSStringLiteral() {
1081     if (Style.JavaScriptQuotes == FormatStyle::JSQS_Leave)
1082       return;
1083 
1084     FormatToken *FormatTok = Tokens.back();
1085     StringRef Input = FormatTok->TokenText;
1086     if (!FormatTok->isStringLiteral() ||
1087         // NB: testing for not starting with a double quote to avoid breaking
1088         // `template strings`.
1089         (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
1090          !Input.startswith("\"")) ||
1091         (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
1092          !Input.startswith("\'")))
1093       return;
1094 
1095     // Change start and end quote.
1096     bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
1097     SourceLocation Start = FormatTok->Tok.getLocation();
1098     auto Replace = [&](SourceLocation Start, unsigned Length,
1099                        StringRef ReplacementText) {
1100       Replaces.insert(
1101           tooling::Replacement(SourceMgr, Start, Length, ReplacementText));
1102     };
1103     Replace(Start, 1, IsSingle ? "'" : "\"");
1104     Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
1105             IsSingle ? "'" : "\"");
1106 
1107     // Escape internal quotes.
1108     size_t ColumnWidth = FormatTok->TokenText.size();
1109     bool Escaped = false;
1110     for (size_t i = 1; i < Input.size() - 1; i++) {
1111       switch (Input[i]) {
1112       case '\\':
1113         if (!Escaped && i + 1 < Input.size() &&
1114             ((IsSingle && Input[i + 1] == '"') ||
1115              (!IsSingle && Input[i + 1] == '\''))) {
1116           // Remove this \, it's escaping a " or ' that no longer needs escaping
1117           ColumnWidth--;
1118           Replace(Start.getLocWithOffset(i), 1, "");
1119           continue;
1120         }
1121         Escaped = !Escaped;
1122         break;
1123       case '\"':
1124       case '\'':
1125         if (!Escaped && IsSingle == (Input[i] == '\'')) {
1126           // Escape the quote.
1127           Replace(Start.getLocWithOffset(i), 0, "\\");
1128           ColumnWidth++;
1129         }
1130         Escaped = false;
1131         break;
1132       default:
1133         Escaped = false;
1134         break;
1135       }
1136     }
1137 
1138     // For formatting, count the number of non-escaped single quotes in them
1139     // and adjust ColumnWidth to take the added escapes into account.
1140     // FIXME(martinprobst): this might conflict with code breaking a long string
1141     // literal (which clang-format doesn't do, yet). For that to work, this code
1142     // would have to modify TokenText directly.
1143     FormatTok->ColumnWidth = ColumnWidth;
1144   }
1145 
1146   bool tryMerge_TMacro() {
1147     if (Tokens.size() < 4)
1148       return false;
1149     FormatToken *Last = Tokens.back();
1150     if (!Last->is(tok::r_paren))
1151       return false;
1152 
1153     FormatToken *String = Tokens[Tokens.size() - 2];
1154     if (!String->is(tok::string_literal) || String->IsMultiline)
1155       return false;
1156 
1157     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1158       return false;
1159 
1160     FormatToken *Macro = Tokens[Tokens.size() - 4];
1161     if (Macro->TokenText != "_T")
1162       return false;
1163 
1164     const char *Start = Macro->TokenText.data();
1165     const char *End = Last->TokenText.data() + Last->TokenText.size();
1166     String->TokenText = StringRef(Start, End - Start);
1167     String->IsFirst = Macro->IsFirst;
1168     String->LastNewlineOffset = Macro->LastNewlineOffset;
1169     String->WhitespaceRange = Macro->WhitespaceRange;
1170     String->OriginalColumn = Macro->OriginalColumn;
1171     String->ColumnWidth = encoding::columnWidthWithTabs(
1172         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1173     String->NewlinesBefore = Macro->NewlinesBefore;
1174     String->HasUnescapedNewline = Macro->HasUnescapedNewline;
1175 
1176     Tokens.pop_back();
1177     Tokens.pop_back();
1178     Tokens.pop_back();
1179     Tokens.back() = String;
1180     return true;
1181   }
1182 
1183   bool tryMergeConflictMarkers() {
1184     if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1185       return false;
1186 
1187     // Conflict lines look like:
1188     // <marker> <text from the vcs>
1189     // For example:
1190     // >>>>>>> /file/in/file/system at revision 1234
1191     //
1192     // We merge all tokens in a line that starts with a conflict marker
1193     // into a single token with a special token type that the unwrapped line
1194     // parser will use to correctly rebuild the underlying code.
1195 
1196     FileID ID;
1197     // Get the position of the first token in the line.
1198     unsigned FirstInLineOffset;
1199     std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
1200         Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1201     StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
1202     // Calculate the offset of the start of the current line.
1203     auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
1204     if (LineOffset == StringRef::npos) {
1205       LineOffset = 0;
1206     } else {
1207       ++LineOffset;
1208     }
1209 
1210     auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
1211     StringRef LineStart;
1212     if (FirstSpace == StringRef::npos) {
1213       LineStart = Buffer.substr(LineOffset);
1214     } else {
1215       LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1216     }
1217 
1218     TokenType Type = TT_Unknown;
1219     if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
1220       Type = TT_ConflictStart;
1221     } else if (LineStart == "|||||||" || LineStart == "=======" ||
1222                LineStart == "====") {
1223       Type = TT_ConflictAlternative;
1224     } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1225       Type = TT_ConflictEnd;
1226     }
1227 
1228     if (Type != TT_Unknown) {
1229       FormatToken *Next = Tokens.back();
1230 
1231       Tokens.resize(FirstInLineIndex + 1);
1232       // We do not need to build a complete token here, as we will skip it
1233       // during parsing anyway (as we must not touch whitespace around conflict
1234       // markers).
1235       Tokens.back()->Type = Type;
1236       Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1237 
1238       Tokens.push_back(Next);
1239       return true;
1240     }
1241 
1242     return false;
1243   }
1244 
1245   FormatToken *getStashedToken() {
1246     // Create a synthesized second '>' or '<' token.
1247     Token Tok = FormatTok->Tok;
1248     StringRef TokenText = FormatTok->TokenText;
1249 
1250     unsigned OriginalColumn = FormatTok->OriginalColumn;
1251     FormatTok = new (Allocator.Allocate()) FormatToken;
1252     FormatTok->Tok = Tok;
1253     SourceLocation TokLocation =
1254         FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
1255     FormatTok->Tok.setLocation(TokLocation);
1256     FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
1257     FormatTok->TokenText = TokenText;
1258     FormatTok->ColumnWidth = 1;
1259     FormatTok->OriginalColumn = OriginalColumn + 1;
1260 
1261     return FormatTok;
1262   }
1263 
1264   FormatToken *getNextToken() {
1265     if (GreaterStashed) {
1266       GreaterStashed = false;
1267       return getStashedToken();
1268     }
1269     if (LessStashed) {
1270       LessStashed = false;
1271       return getStashedToken();
1272     }
1273 
1274     FormatTok = new (Allocator.Allocate()) FormatToken;
1275     readRawToken(*FormatTok);
1276     SourceLocation WhitespaceStart =
1277         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1278     FormatTok->IsFirst = IsFirstToken;
1279     IsFirstToken = false;
1280 
1281     // Consume and record whitespace until we find a significant token.
1282     unsigned WhitespaceLength = TrailingWhitespace;
1283     while (FormatTok->Tok.is(tok::unknown)) {
1284       StringRef Text = FormatTok->TokenText;
1285       auto EscapesNewline = [&](int pos) {
1286         // A '\r' here is just part of '\r\n'. Skip it.
1287         if (pos >= 0 && Text[pos] == '\r')
1288           --pos;
1289         // See whether there is an odd number of '\' before this.
1290         unsigned count = 0;
1291         for (; pos >= 0; --pos, ++count)
1292           if (Text[pos] != '\\')
1293             break;
1294         return count & 1;
1295       };
1296       // FIXME: This miscounts tok:unknown tokens that are not just
1297       // whitespace, e.g. a '`' character.
1298       for (int i = 0, e = Text.size(); i != e; ++i) {
1299         switch (Text[i]) {
1300         case '\n':
1301           ++FormatTok->NewlinesBefore;
1302           FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
1303           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1304           Column = 0;
1305           break;
1306         case '\r':
1307           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1308           Column = 0;
1309           break;
1310         case '\f':
1311         case '\v':
1312           Column = 0;
1313           break;
1314         case ' ':
1315           ++Column;
1316           break;
1317         case '\t':
1318           Column += Style.TabWidth - Column % Style.TabWidth;
1319           break;
1320         case '\\':
1321           if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
1322             FormatTok->Type = TT_ImplicitStringLiteral;
1323           break;
1324         default:
1325           FormatTok->Type = TT_ImplicitStringLiteral;
1326           break;
1327         }
1328         if (FormatTok->Type == TT_ImplicitStringLiteral)
1329           break;
1330       }
1331 
1332       if (FormatTok->is(TT_ImplicitStringLiteral))
1333         break;
1334       WhitespaceLength += FormatTok->Tok.getLength();
1335 
1336       readRawToken(*FormatTok);
1337     }
1338 
1339     // In case the token starts with escaped newlines, we want to
1340     // take them into account as whitespace - this pattern is quite frequent
1341     // in macro definitions.
1342     // FIXME: Add a more explicit test.
1343     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1344            FormatTok->TokenText[1] == '\n') {
1345       ++FormatTok->NewlinesBefore;
1346       WhitespaceLength += 2;
1347       FormatTok->LastNewlineOffset = 2;
1348       Column = 0;
1349       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1350     }
1351 
1352     FormatTok->WhitespaceRange = SourceRange(
1353         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1354 
1355     FormatTok->OriginalColumn = Column;
1356 
1357     TrailingWhitespace = 0;
1358     if (FormatTok->Tok.is(tok::comment)) {
1359       // FIXME: Add the trimmed whitespace to Column.
1360       StringRef UntrimmedText = FormatTok->TokenText;
1361       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1362       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1363     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1364       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1365       FormatTok->Tok.setIdentifierInfo(&Info);
1366       FormatTok->Tok.setKind(Info.getTokenID());
1367       if (Style.Language == FormatStyle::LK_Java &&
1368           FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1369                              tok::kw_operator)) {
1370         FormatTok->Tok.setKind(tok::identifier);
1371         FormatTok->Tok.setIdentifierInfo(nullptr);
1372       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1373                  FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
1374                                     tok::kw_operator)) {
1375         FormatTok->Tok.setKind(tok::identifier);
1376         FormatTok->Tok.setIdentifierInfo(nullptr);
1377       }
1378     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1379       FormatTok->Tok.setKind(tok::greater);
1380       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1381       GreaterStashed = true;
1382     } else if (FormatTok->Tok.is(tok::lessless)) {
1383       FormatTok->Tok.setKind(tok::less);
1384       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1385       LessStashed = true;
1386     }
1387 
1388     // Now FormatTok is the next non-whitespace token.
1389 
1390     StringRef Text = FormatTok->TokenText;
1391     size_t FirstNewlinePos = Text.find('\n');
1392     if (FirstNewlinePos == StringRef::npos) {
1393       // FIXME: ColumnWidth actually depends on the start column, we need to
1394       // take this into account when the token is moved.
1395       FormatTok->ColumnWidth =
1396           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1397       Column += FormatTok->ColumnWidth;
1398     } else {
1399       FormatTok->IsMultiline = true;
1400       // FIXME: ColumnWidth actually depends on the start column, we need to
1401       // take this into account when the token is moved.
1402       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1403           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1404 
1405       // The last line of the token always starts in column 0.
1406       // Thus, the length can be precomputed even in the presence of tabs.
1407       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1408           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1409           Encoding);
1410       Column = FormatTok->LastLineColumnWidth;
1411     }
1412 
1413     if (Style.Language == FormatStyle::LK_Cpp) {
1414       if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1415             Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1416                 tok::pp_define) &&
1417           std::find(ForEachMacros.begin(), ForEachMacros.end(),
1418                     FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
1419         FormatTok->Type = TT_ForEachMacro;
1420       } else if (FormatTok->is(tok::identifier)) {
1421         if (MacroBlockBeginRegex.match(Text)) {
1422           FormatTok->Type = TT_MacroBlockBegin;
1423         } else if (MacroBlockEndRegex.match(Text)) {
1424           FormatTok->Type = TT_MacroBlockEnd;
1425         }
1426       }
1427     }
1428 
1429     return FormatTok;
1430   }
1431 
1432   FormatToken *FormatTok;
1433   bool IsFirstToken;
1434   bool GreaterStashed, LessStashed;
1435   unsigned Column;
1436   unsigned TrailingWhitespace;
1437   std::unique_ptr<Lexer> Lex;
1438   SourceManager &SourceMgr;
1439   FileID ID;
1440   FormatStyle &Style;
1441   IdentifierTable IdentTable;
1442   AdditionalKeywords Keywords;
1443   encoding::Encoding Encoding;
1444   tooling::Replacements &Replaces;
1445   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1446   // Index (in 'Tokens') of the last token that starts a new line.
1447   unsigned FirstInLineIndex;
1448   SmallVector<FormatToken *, 16> Tokens;
1449   SmallVector<IdentifierInfo *, 8> ForEachMacros;
1450 
1451   bool FormattingDisabled;
1452 
1453   llvm::Regex MacroBlockBeginRegex;
1454   llvm::Regex MacroBlockEndRegex;
1455 
1456   void readRawToken(FormatToken &Tok) {
1457     Lex->LexFromRawLexer(Tok.Tok);
1458     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1459                               Tok.Tok.getLength());
1460     // For formatting, treat unterminated string literals like normal string
1461     // literals.
1462     if (Tok.is(tok::unknown)) {
1463       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1464         Tok.Tok.setKind(tok::string_literal);
1465         Tok.IsUnterminatedLiteral = true;
1466       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1467                  Tok.TokenText == "''") {
1468         Tok.Tok.setKind(tok::string_literal);
1469       }
1470     }
1471 
1472     if (Style.Language == FormatStyle::LK_JavaScript &&
1473         Tok.is(tok::char_constant)) {
1474       Tok.Tok.setKind(tok::string_literal);
1475     }
1476 
1477     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1478                                  Tok.TokenText == "/* clang-format on */")) {
1479       FormattingDisabled = false;
1480     }
1481 
1482     Tok.Finalized = FormattingDisabled;
1483 
1484     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1485                                  Tok.TokenText == "/* clang-format off */")) {
1486       FormattingDisabled = true;
1487     }
1488   }
1489 
1490   void resetLexer(unsigned Offset) {
1491     StringRef Buffer = SourceMgr.getBufferData(ID);
1492     Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1493                         getFormattingLangOpts(Style), Buffer.begin(),
1494                         Buffer.begin() + Offset, Buffer.end()));
1495     Lex->SetKeepWhitespaceMode(true);
1496     TrailingWhitespace = 0;
1497   }
1498 };
1499 
1500 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1501   switch (Language) {
1502   case FormatStyle::LK_Cpp:
1503     return "C++";
1504   case FormatStyle::LK_Java:
1505     return "Java";
1506   case FormatStyle::LK_JavaScript:
1507     return "JavaScript";
1508   case FormatStyle::LK_Proto:
1509     return "Proto";
1510   default:
1511     return "Unknown";
1512   }
1513 }
1514 
1515 class Formatter : public UnwrappedLineConsumer {
1516 public:
1517   Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
1518             ArrayRef<CharSourceRange> Ranges)
1519       : Style(Style), ID(ID), SourceMgr(SourceMgr),
1520         Whitespaces(SourceMgr, Style,
1521                     inputUsesCRLF(SourceMgr.getBufferData(ID))),
1522         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1523         Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
1524     DEBUG(llvm::dbgs() << "File encoding: "
1525                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1526                                                                : "unknown")
1527                        << "\n");
1528     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1529                        << "\n");
1530   }
1531 
1532   tooling::Replacements format(bool *IncompleteFormat) {
1533     tooling::Replacements Result;
1534     FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding, Result);
1535 
1536     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1537                                *this);
1538     Parser.parse();
1539     assert(UnwrappedLines.rbegin()->empty());
1540     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1541          ++Run) {
1542       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1543       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1544       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1545         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1546       }
1547       tooling::Replacements RunResult =
1548           format(AnnotatedLines, Tokens, IncompleteFormat);
1549       DEBUG({
1550         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1551         for (tooling::Replacements::iterator I = RunResult.begin(),
1552                                              E = RunResult.end();
1553              I != E; ++I) {
1554           llvm::dbgs() << I->toString() << "\n";
1555         }
1556       });
1557       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1558         delete AnnotatedLines[i];
1559       }
1560       Result.insert(RunResult.begin(), RunResult.end());
1561       Whitespaces.reset();
1562     }
1563     return Result;
1564   }
1565 
1566   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1567                                FormatTokenLexer &Tokens,
1568                                bool *IncompleteFormat) {
1569     TokenAnnotator Annotator(Style, Tokens.getKeywords());
1570     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1571       Annotator.annotate(*AnnotatedLines[i]);
1572     }
1573     deriveLocalStyle(AnnotatedLines);
1574     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1575       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1576     }
1577     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1578 
1579     Annotator.setCommentLineLevels(AnnotatedLines);
1580     ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
1581                                   Whitespaces, Encoding,
1582                                   BinPackInconclusiveFunctions);
1583     UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
1584                            IncompleteFormat)
1585         .format(AnnotatedLines);
1586     return Whitespaces.generateReplacements();
1587   }
1588 
1589 private:
1590   // Determines which lines are affected by the SourceRanges given as input.
1591   // Returns \c true if at least one line between I and E or one of their
1592   // children is affected.
1593   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1594                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1595     bool SomeLineAffected = false;
1596     const AnnotatedLine *PreviousLine = nullptr;
1597     while (I != E) {
1598       AnnotatedLine *Line = *I;
1599       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1600 
1601       // If a line is part of a preprocessor directive, it needs to be formatted
1602       // if any token within the directive is affected.
1603       if (Line->InPPDirective) {
1604         FormatToken *Last = Line->Last;
1605         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1606         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1607           Last = (*PPEnd)->Last;
1608           ++PPEnd;
1609         }
1610 
1611         if (affectsTokenRange(*Line->First, *Last,
1612                               /*IncludeLeadingNewlines=*/false)) {
1613           SomeLineAffected = true;
1614           markAllAsAffected(I, PPEnd);
1615         }
1616         I = PPEnd;
1617         continue;
1618       }
1619 
1620       if (nonPPLineAffected(Line, PreviousLine))
1621         SomeLineAffected = true;
1622 
1623       PreviousLine = Line;
1624       ++I;
1625     }
1626     return SomeLineAffected;
1627   }
1628 
1629   // Determines whether 'Line' is affected by the SourceRanges given as input.
1630   // Returns \c true if line or one if its children is affected.
1631   bool nonPPLineAffected(AnnotatedLine *Line,
1632                          const AnnotatedLine *PreviousLine) {
1633     bool SomeLineAffected = false;
1634     Line->ChildrenAffected =
1635         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1636     if (Line->ChildrenAffected)
1637       SomeLineAffected = true;
1638 
1639     // Stores whether one of the line's tokens is directly affected.
1640     bool SomeTokenAffected = false;
1641     // Stores whether we need to look at the leading newlines of the next token
1642     // in order to determine whether it was affected.
1643     bool IncludeLeadingNewlines = false;
1644 
1645     // Stores whether the first child line of any of this line's tokens is
1646     // affected.
1647     bool SomeFirstChildAffected = false;
1648 
1649     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1650       // Determine whether 'Tok' was affected.
1651       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1652         SomeTokenAffected = true;
1653 
1654       // Determine whether the first child of 'Tok' was affected.
1655       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1656         SomeFirstChildAffected = true;
1657 
1658       IncludeLeadingNewlines = Tok->Children.empty();
1659     }
1660 
1661     // Was this line moved, i.e. has it previously been on the same line as an
1662     // affected line?
1663     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1664                      Line->First->NewlinesBefore == 0;
1665 
1666     bool IsContinuedComment =
1667         Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1668         Line->First->NewlinesBefore < 2 && PreviousLine &&
1669         PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1670 
1671     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1672         IsContinuedComment) {
1673       Line->Affected = true;
1674       SomeLineAffected = true;
1675     }
1676     return SomeLineAffected;
1677   }
1678 
1679   // Marks all lines between I and E as well as all their children as affected.
1680   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1681                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1682     while (I != E) {
1683       (*I)->Affected = true;
1684       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1685       ++I;
1686     }
1687   }
1688 
1689   // Returns true if the range from 'First' to 'Last' intersects with one of the
1690   // input ranges.
1691   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1692                          bool IncludeLeadingNewlines) {
1693     SourceLocation Start = First.WhitespaceRange.getBegin();
1694     if (!IncludeLeadingNewlines)
1695       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1696     SourceLocation End = Last.getStartOfNonWhitespace();
1697     End = End.getLocWithOffset(Last.TokenText.size());
1698     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1699     return affectsCharSourceRange(Range);
1700   }
1701 
1702   // Returns true if one of the input ranges intersect the leading empty lines
1703   // before 'Tok'.
1704   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1705     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1706         Tok.WhitespaceRange.getBegin(),
1707         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1708     return affectsCharSourceRange(EmptyLineRange);
1709   }
1710 
1711   // Returns true if 'Range' intersects with one of the input ranges.
1712   bool affectsCharSourceRange(const CharSourceRange &Range) {
1713     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1714                                                           E = Ranges.end();
1715          I != E; ++I) {
1716       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1717           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1718         return true;
1719     }
1720     return false;
1721   }
1722 
1723   static bool inputUsesCRLF(StringRef Text) {
1724     return Text.count('\r') * 2 > Text.count('\n');
1725   }
1726 
1727   bool
1728   hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1729     for (const AnnotatedLine* Line : Lines) {
1730       if (hasCpp03IncompatibleFormat(Line->Children))
1731         return true;
1732       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
1733         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1734           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1735             return true;
1736           if (Tok->is(TT_TemplateCloser) &&
1737               Tok->Previous->is(TT_TemplateCloser))
1738             return true;
1739         }
1740       }
1741     }
1742     return false;
1743   }
1744 
1745   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1746     int AlignmentDiff = 0;
1747     for (const AnnotatedLine* Line : Lines) {
1748       AlignmentDiff += countVariableAlignments(Line->Children);
1749       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
1750         if (!Tok->is(TT_PointerOrReference))
1751           continue;
1752         bool SpaceBefore =
1753             Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1754         bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
1755                           Tok->Next->WhitespaceRange.getEnd();
1756         if (SpaceBefore && !SpaceAfter)
1757           ++AlignmentDiff;
1758         if (!SpaceBefore && SpaceAfter)
1759           --AlignmentDiff;
1760       }
1761     }
1762     return AlignmentDiff;
1763   }
1764 
1765   void
1766   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1767     bool HasBinPackedFunction = false;
1768     bool HasOnePerLineFunction = false;
1769     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1770       if (!AnnotatedLines[i]->First->Next)
1771         continue;
1772       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1773       while (Tok->Next) {
1774         if (Tok->PackingKind == PPK_BinPacked)
1775           HasBinPackedFunction = true;
1776         if (Tok->PackingKind == PPK_OnePerLine)
1777           HasOnePerLineFunction = true;
1778 
1779         Tok = Tok->Next;
1780       }
1781     }
1782     if (Style.DerivePointerAlignment)
1783       Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
1784                                    ? FormatStyle::PAS_Left
1785                                    : FormatStyle::PAS_Right;
1786     if (Style.Standard == FormatStyle::LS_Auto)
1787       Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
1788                            ? FormatStyle::LS_Cpp11
1789                            : FormatStyle::LS_Cpp03;
1790     BinPackInconclusiveFunctions =
1791         HasBinPackedFunction || !HasOnePerLineFunction;
1792   }
1793 
1794   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1795     assert(!UnwrappedLines.empty());
1796     UnwrappedLines.back().push_back(TheLine);
1797   }
1798 
1799   void finishRun() override {
1800     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1801   }
1802 
1803   FormatStyle Style;
1804   FileID ID;
1805   SourceManager &SourceMgr;
1806   WhitespaceManager Whitespaces;
1807   SmallVector<CharSourceRange, 8> Ranges;
1808   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1809 
1810   encoding::Encoding Encoding;
1811   bool BinPackInconclusiveFunctions;
1812 };
1813 
1814 struct IncludeDirective {
1815   StringRef Filename;
1816   StringRef Text;
1817   unsigned Offset;
1818   int Category;
1819 };
1820 
1821 } // end anonymous namespace
1822 
1823 // Determines whether 'Ranges' intersects with ('Start', 'End').
1824 static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
1825                          unsigned End) {
1826   for (auto Range : Ranges) {
1827     if (Range.getOffset() < End &&
1828         Range.getOffset() + Range.getLength() > Start)
1829       return true;
1830   }
1831   return false;
1832 }
1833 
1834 // Sorts a block of includes given by 'Includes' alphabetically adding the
1835 // necessary replacement to 'Replaces'. 'Includes' must be in strict source
1836 // order.
1837 static void sortIncludes(const FormatStyle &Style,
1838                          const SmallVectorImpl<IncludeDirective> &Includes,
1839                          ArrayRef<tooling::Range> Ranges, StringRef FileName,
1840                          tooling::Replacements &Replaces, unsigned *Cursor) {
1841   if (!affectsRange(Ranges, Includes.front().Offset,
1842                     Includes.back().Offset + Includes.back().Text.size()))
1843     return;
1844   SmallVector<unsigned, 16> Indices;
1845   for (unsigned i = 0, e = Includes.size(); i != e; ++i)
1846     Indices.push_back(i);
1847   std::stable_sort(
1848       Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
1849         return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
1850                std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
1851       });
1852 
1853   // If the #includes are out of order, we generate a single replacement fixing
1854   // the entire block. Otherwise, no replacement is generated.
1855   bool OutOfOrder = false;
1856   for (unsigned i = 1, e = Indices.size(); i != e; ++i) {
1857     if (Indices[i] != i) {
1858       OutOfOrder = true;
1859       break;
1860     }
1861   }
1862   if (!OutOfOrder)
1863     return;
1864 
1865   std::string result;
1866   bool CursorMoved = false;
1867   for (unsigned Index : Indices) {
1868     if (!result.empty())
1869       result += "\n";
1870     result += Includes[Index].Text;
1871 
1872     if (Cursor && !CursorMoved) {
1873       unsigned Start = Includes[Index].Offset;
1874       unsigned End = Start + Includes[Index].Text.size();
1875       if (*Cursor >= Start && *Cursor < End) {
1876         *Cursor = Includes.front().Offset + result.size() + *Cursor - End;
1877         CursorMoved = true;
1878       }
1879     }
1880   }
1881 
1882   // Sorting #includes shouldn't change their total number of characters.
1883   // This would otherwise mess up 'Ranges'.
1884   assert(result.size() ==
1885          Includes.back().Offset + Includes.back().Text.size() -
1886              Includes.front().Offset);
1887 
1888   Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset,
1889                                        result.size(), result));
1890 }
1891 
1892 tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
1893                                    ArrayRef<tooling::Range> Ranges,
1894                                    StringRef FileName, unsigned *Cursor) {
1895   tooling::Replacements Replaces;
1896   if (!Style.SortIncludes)
1897     return Replaces;
1898 
1899   unsigned Prev = 0;
1900   unsigned SearchFrom = 0;
1901   llvm::Regex IncludeRegex(
1902       R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
1903   SmallVector<StringRef, 4> Matches;
1904   SmallVector<IncludeDirective, 16> IncludesInBlock;
1905 
1906   // In compiled files, consider the first #include to be the main #include of
1907   // the file if it is not a system #include. This ensures that the header
1908   // doesn't have hidden dependencies
1909   // (http://llvm.org/docs/CodingStandards.html#include-style).
1910   //
1911   // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
1912   // cases where the first #include is unlikely to be the main header.
1913   bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
1914                   FileName.endswith(".cpp") || FileName.endswith(".c++") ||
1915                   FileName.endswith(".cxx") || FileName.endswith(".m") ||
1916                   FileName.endswith(".mm");
1917   StringRef FileStem = llvm::sys::path::stem(FileName);
1918   bool FirstIncludeBlock = true;
1919   bool MainIncludeFound = false;
1920 
1921   // Create pre-compiled regular expressions for the #include categories.
1922   SmallVector<llvm::Regex, 4> CategoryRegexs;
1923   for (const auto &Category : Style.IncludeCategories)
1924     CategoryRegexs.emplace_back(Category.Regex);
1925 
1926   bool FormattingOff = false;
1927 
1928   for (;;) {
1929     auto Pos = Code.find('\n', SearchFrom);
1930     StringRef Line =
1931         Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
1932 
1933     StringRef Trimmed = Line.trim();
1934     if (Trimmed == "// clang-format off")
1935       FormattingOff = true;
1936     else if (Trimmed == "// clang-format on")
1937       FormattingOff = false;
1938 
1939     if (!FormattingOff && !Line.endswith("\\")) {
1940       if (IncludeRegex.match(Line, &Matches)) {
1941         StringRef IncludeName = Matches[2];
1942         int Category = INT_MAX;
1943         for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
1944           if (CategoryRegexs[i].match(IncludeName)) {
1945             Category = Style.IncludeCategories[i].Priority;
1946             break;
1947           }
1948         }
1949         if (IsSource && !MainIncludeFound && Category > 0 &&
1950             FirstIncludeBlock && IncludeName.startswith("\"")) {
1951           StringRef HeaderStem =
1952               llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
1953           if (FileStem.startswith(HeaderStem)) {
1954             Category = 0;
1955             MainIncludeFound = true;
1956           }
1957         }
1958         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
1959       } else if (!IncludesInBlock.empty()) {
1960         sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
1961                      Cursor);
1962         IncludesInBlock.clear();
1963         FirstIncludeBlock = false;
1964       }
1965       Prev = Pos + 1;
1966     }
1967     if (Pos == StringRef::npos || Pos + 1 == Code.size())
1968       break;
1969     SearchFrom = Pos + 1;
1970   }
1971   if (!IncludesInBlock.empty())
1972     sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
1973   return Replaces;
1974 }
1975 
1976 tooling::Replacements formatReplacements(StringRef Code,
1977                                          const tooling::Replacements &Replaces,
1978                                          const FormatStyle &Style) {
1979   if (Replaces.empty())
1980     return tooling::Replacements();
1981 
1982   std::string NewCode = applyAllReplacements(Code, Replaces);
1983   std::vector<tooling::Range> ChangedRanges =
1984       tooling::calculateChangedRangesInFile(Replaces);
1985   StringRef FileName = Replaces.begin()->getFilePath();
1986   tooling::Replacements FormatReplaces =
1987       reformat(Style, NewCode, ChangedRanges, FileName);
1988 
1989   tooling::Replacements MergedReplacements =
1990       mergeReplacements(Replaces, FormatReplaces);
1991   return MergedReplacements;
1992 }
1993 
1994 std::string applyAllReplacementsAndFormat(StringRef Code,
1995                                           const tooling::Replacements &Replaces,
1996                                           const FormatStyle &Style) {
1997   tooling::Replacements NewReplacements =
1998       formatReplacements(Code, Replaces, Style);
1999   if (NewReplacements.empty())
2000     return Code; // Exit early to avoid overhead in `applyAllReplacements`.
2001   return applyAllReplacements(Code, NewReplacements);
2002 }
2003 
2004 tooling::Replacements reformat(const FormatStyle &Style,
2005                                SourceManager &SourceMgr, FileID ID,
2006                                ArrayRef<CharSourceRange> Ranges,
2007                                bool *IncompleteFormat) {
2008   FormatStyle Expanded = expandPresets(Style);
2009   if (Expanded.DisableFormat)
2010     return tooling::Replacements();
2011   Formatter formatter(Expanded, SourceMgr, ID, Ranges);
2012   return formatter.format(IncompleteFormat);
2013 }
2014 
2015 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
2016                                ArrayRef<tooling::Range> Ranges,
2017                                StringRef FileName, bool *IncompleteFormat) {
2018   if (Style.DisableFormat)
2019     return tooling::Replacements();
2020 
2021   IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
2022       new vfs::InMemoryFileSystem);
2023   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
2024   DiagnosticsEngine Diagnostics(
2025       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
2026       new DiagnosticOptions);
2027   SourceManager SourceMgr(Diagnostics, Files);
2028   InMemoryFileSystem->addFile(
2029       FileName, 0, llvm::MemoryBuffer::getMemBuffer(
2030                        Code, FileName, /*RequiresNullTerminator=*/false));
2031   FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
2032                                      clang::SrcMgr::C_User);
2033   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
2034   std::vector<CharSourceRange> CharRanges;
2035   for (const tooling::Range &Range : Ranges) {
2036     SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
2037     SourceLocation End = Start.getLocWithOffset(Range.getLength());
2038     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
2039   }
2040   return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
2041 }
2042 
2043 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
2044   LangOptions LangOpts;
2045   LangOpts.CPlusPlus = 1;
2046   LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2047   LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2048   LangOpts.LineComment = 1;
2049   bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
2050   LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
2051   LangOpts.Bool = 1;
2052   LangOpts.ObjC1 = 1;
2053   LangOpts.ObjC2 = 1;
2054   LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
2055   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
2056   return LangOpts;
2057 }
2058 
2059 const char *StyleOptionHelpDescription =
2060     "Coding style, currently supports:\n"
2061     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
2062     "Use -style=file to load style configuration from\n"
2063     ".clang-format file located in one of the parent\n"
2064     "directories of the source file (or current\n"
2065     "directory for stdin).\n"
2066     "Use -style=\"{key: value, ...}\" to set specific\n"
2067     "parameters, e.g.:\n"
2068     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
2069 
2070 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
2071   if (FileName.endswith(".java"))
2072     return FormatStyle::LK_Java;
2073   if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
2074     return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
2075   if (FileName.endswith_lower(".proto") ||
2076       FileName.endswith_lower(".protodevel"))
2077     return FormatStyle::LK_Proto;
2078   if (FileName.endswith_lower(".td"))
2079     return FormatStyle::LK_TableGen;
2080   return FormatStyle::LK_Cpp;
2081 }
2082 
2083 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
2084                      StringRef FallbackStyle) {
2085   FormatStyle Style = getLLVMStyle();
2086   Style.Language = getLanguageByFileName(FileName);
2087   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
2088     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
2089                  << "\" using LLVM style\n";
2090     return Style;
2091   }
2092 
2093   if (StyleName.startswith("{")) {
2094     // Parse YAML/JSON style from the command line.
2095     if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
2096       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
2097                    << FallbackStyle << " style\n";
2098     }
2099     return Style;
2100   }
2101 
2102   if (!StyleName.equals_lower("file")) {
2103     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
2104       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
2105                    << " style\n";
2106     return Style;
2107   }
2108 
2109   // Look for .clang-format/_clang-format file in the file's parent directories.
2110   SmallString<128> UnsuitableConfigFiles;
2111   SmallString<128> Path(FileName);
2112   llvm::sys::fs::make_absolute(Path);
2113   for (StringRef Directory = Path; !Directory.empty();
2114        Directory = llvm::sys::path::parent_path(Directory)) {
2115     if (!llvm::sys::fs::is_directory(Directory))
2116       continue;
2117     SmallString<128> ConfigFile(Directory);
2118 
2119     llvm::sys::path::append(ConfigFile, ".clang-format");
2120     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2121     bool IsFile = false;
2122     // Ignore errors from is_regular_file: we only need to know if we can read
2123     // the file or not.
2124     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2125 
2126     if (!IsFile) {
2127       // Try _clang-format too, since dotfiles are not commonly used on Windows.
2128       ConfigFile = Directory;
2129       llvm::sys::path::append(ConfigFile, "_clang-format");
2130       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2131       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
2132     }
2133 
2134     if (IsFile) {
2135       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
2136           llvm::MemoryBuffer::getFile(ConfigFile.c_str());
2137       if (std::error_code EC = Text.getError()) {
2138         llvm::errs() << EC.message() << "\n";
2139         break;
2140       }
2141       if (std::error_code ec =
2142               parseConfiguration(Text.get()->getBuffer(), &Style)) {
2143         if (ec == ParseError::Unsuitable) {
2144           if (!UnsuitableConfigFiles.empty())
2145             UnsuitableConfigFiles.append(", ");
2146           UnsuitableConfigFiles.append(ConfigFile);
2147           continue;
2148         }
2149         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
2150                      << "\n";
2151         break;
2152       }
2153       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2154       return Style;
2155     }
2156   }
2157   if (!UnsuitableConfigFiles.empty()) {
2158     llvm::errs() << "Configuration file(s) do(es) not support "
2159                  << getLanguageName(Style.Language) << ": "
2160                  << UnsuitableConfigFiles << "\n";
2161   }
2162   return Style;
2163 }
2164 
2165 } // namespace format
2166 } // namespace clang
2167