1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Format/Format.h"
17 #include "AffectedRangeManager.h"
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineFormatter.h"
21 #include "UnwrappedLineParser.h"
22 #include "WhitespaceManager.h"
23 #include "clang/Basic/Diagnostic.h"
24 #include "clang/Basic/DiagnosticOptions.h"
25 #include "clang/Basic/SourceManager.h"
26 #include "clang/Basic/VirtualFileSystem.h"
27 #include "clang/Lex/Lexer.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/Support/Allocator.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/Regex.h"
33 #include "llvm/Support/YAMLTraits.h"
34 #include <memory>
35 #include <queue>
36 #include <string>
37 
38 #define DEBUG_TYPE "format-formatter"
39 
40 using clang::format::FormatStyle;
41 
42 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
43 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
44 
45 namespace llvm {
46 namespace yaml {
47 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
48   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
49     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
50     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
51     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
52     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
53     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
54   }
55 };
56 
57 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
58   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
59     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
60     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
61     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
62     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
63     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
64   }
65 };
66 
67 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
68   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
69     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
70     IO.enumCase(Value, "false", FormatStyle::UT_Never);
71     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
72     IO.enumCase(Value, "true", FormatStyle::UT_Always);
73     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
74     IO.enumCase(Value, "ForContinuationAndIndentation",
75                 FormatStyle::UT_ForContinuationAndIndentation);
76   }
77 };
78 
79 template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
80   static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
81     IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
82     IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
83     IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
84   }
85 };
86 
87 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
88   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
89     IO.enumCase(Value, "None", FormatStyle::SFS_None);
90     IO.enumCase(Value, "false", FormatStyle::SFS_None);
91     IO.enumCase(Value, "All", FormatStyle::SFS_All);
92     IO.enumCase(Value, "true", FormatStyle::SFS_All);
93     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
94     IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
95   }
96 };
97 
98 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
99   static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
100     IO.enumCase(Value, "All", FormatStyle::BOS_All);
101     IO.enumCase(Value, "true", FormatStyle::BOS_All);
102     IO.enumCase(Value, "None", FormatStyle::BOS_None);
103     IO.enumCase(Value, "false", FormatStyle::BOS_None);
104     IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
105   }
106 };
107 
108 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
109   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
110     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
111     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
112     IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
113     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
114     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
115     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
116     IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
117     IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
118   }
119 };
120 
121 template <>
122 struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
123   static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
124     IO.enumCase(Value, "None", FormatStyle::RTBS_None);
125     IO.enumCase(Value, "All", FormatStyle::RTBS_All);
126     IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
127     IO.enumCase(Value, "TopLevelDefinitions",
128                 FormatStyle::RTBS_TopLevelDefinitions);
129     IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
130   }
131 };
132 
133 template <>
134 struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
135   static void
136   enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
137     IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
138     IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
139     IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
140 
141     // For backward compatibility.
142     IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
143     IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
144   }
145 };
146 
147 template <>
148 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
149   static void enumeration(IO &IO,
150                           FormatStyle::NamespaceIndentationKind &Value) {
151     IO.enumCase(Value, "None", FormatStyle::NI_None);
152     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
153     IO.enumCase(Value, "All", FormatStyle::NI_All);
154   }
155 };
156 
157 template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
158   static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
159     IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
160     IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
161     IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
162 
163     // For backward compatibility.
164     IO.enumCase(Value, "true", FormatStyle::BAS_Align);
165     IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
166   }
167 };
168 
169 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
170   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
171     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
172     IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
173     IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
174 
175     // For backward compatibility.
176     IO.enumCase(Value, "true", FormatStyle::PAS_Left);
177     IO.enumCase(Value, "false", FormatStyle::PAS_Right);
178   }
179 };
180 
181 template <>
182 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
183   static void enumeration(IO &IO,
184                           FormatStyle::SpaceBeforeParensOptions &Value) {
185     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
186     IO.enumCase(Value, "ControlStatements",
187                 FormatStyle::SBPO_ControlStatements);
188     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
189 
190     // For backward compatibility.
191     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
192     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
193   }
194 };
195 
196 template <> struct MappingTraits<FormatStyle> {
197   static void mapping(IO &IO, FormatStyle &Style) {
198     // When reading, read the language first, we need it for getPredefinedStyle.
199     IO.mapOptional("Language", Style.Language);
200 
201     if (IO.outputting()) {
202       StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
203                                  "Mozilla", "WebKit", "GNU"};
204       ArrayRef<StringRef> Styles(StylesArray);
205       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
206         StringRef StyleName(Styles[i]);
207         FormatStyle PredefinedStyle;
208         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
209             Style == PredefinedStyle) {
210           IO.mapOptional("# BasedOnStyle", StyleName);
211           break;
212         }
213       }
214     } else {
215       StringRef BasedOnStyle;
216       IO.mapOptional("BasedOnStyle", BasedOnStyle);
217       if (!BasedOnStyle.empty()) {
218         FormatStyle::LanguageKind OldLanguage = Style.Language;
219         FormatStyle::LanguageKind Language =
220             ((FormatStyle *)IO.getContext())->Language;
221         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
222           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
223           return;
224         }
225         Style.Language = OldLanguage;
226       }
227     }
228 
229     // For backward compatibility.
230     if (!IO.outputting()) {
231       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
232       IO.mapOptional("IndentFunctionDeclarationAfterType",
233                      Style.IndentWrappedFunctionNames);
234       IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
235       IO.mapOptional("SpaceAfterControlStatementKeyword",
236                      Style.SpaceBeforeParens);
237     }
238 
239     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
240     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
241     IO.mapOptional("AlignConsecutiveAssignments",
242                    Style.AlignConsecutiveAssignments);
243     IO.mapOptional("AlignConsecutiveDeclarations",
244                    Style.AlignConsecutiveDeclarations);
245     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
246     IO.mapOptional("AlignOperands", Style.AlignOperands);
247     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
248     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
249                    Style.AllowAllParametersOfDeclarationOnNextLine);
250     IO.mapOptional("AllowShortBlocksOnASingleLine",
251                    Style.AllowShortBlocksOnASingleLine);
252     IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
253                    Style.AllowShortCaseLabelsOnASingleLine);
254     IO.mapOptional("AllowShortFunctionsOnASingleLine",
255                    Style.AllowShortFunctionsOnASingleLine);
256     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
257                    Style.AllowShortIfStatementsOnASingleLine);
258     IO.mapOptional("AllowShortLoopsOnASingleLine",
259                    Style.AllowShortLoopsOnASingleLine);
260     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
261                    Style.AlwaysBreakAfterDefinitionReturnType);
262     IO.mapOptional("AlwaysBreakAfterReturnType",
263                    Style.AlwaysBreakAfterReturnType);
264     // If AlwaysBreakAfterDefinitionReturnType was specified but
265     // AlwaysBreakAfterReturnType was not, initialize the latter from the
266     // former for backwards compatibility.
267     if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
268         Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
269       if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
270         Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
271       else if (Style.AlwaysBreakAfterDefinitionReturnType ==
272                FormatStyle::DRTBS_TopLevel)
273         Style.AlwaysBreakAfterReturnType =
274             FormatStyle::RTBS_TopLevelDefinitions;
275     }
276 
277     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
278                    Style.AlwaysBreakBeforeMultilineStrings);
279     IO.mapOptional("AlwaysBreakTemplateDeclarations",
280                    Style.AlwaysBreakTemplateDeclarations);
281     IO.mapOptional("BinPackArguments", Style.BinPackArguments);
282     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
283     IO.mapOptional("BraceWrapping", Style.BraceWrapping);
284     IO.mapOptional("BreakBeforeBinaryOperators",
285                    Style.BreakBeforeBinaryOperators);
286     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
287     IO.mapOptional("BreakBeforeTernaryOperators",
288                    Style.BreakBeforeTernaryOperators);
289     IO.mapOptional("BreakConstructorInitializersBeforeComma",
290                    Style.BreakConstructorInitializersBeforeComma);
291     IO.mapOptional("BreakAfterJavaFieldAnnotations",
292                    Style.BreakAfterJavaFieldAnnotations);
293     IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
294     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
295     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
296     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
297                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
298     IO.mapOptional("ConstructorInitializerIndentWidth",
299                    Style.ConstructorInitializerIndentWidth);
300     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
301     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
302     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
303     IO.mapOptional("DisableFormat", Style.DisableFormat);
304     IO.mapOptional("ExperimentalAutoDetectBinPacking",
305                    Style.ExperimentalAutoDetectBinPacking);
306     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
307     IO.mapOptional("IncludeCategories", Style.IncludeCategories);
308     IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
309     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
310     IO.mapOptional("IndentWidth", Style.IndentWidth);
311     IO.mapOptional("IndentWrappedFunctionNames",
312                    Style.IndentWrappedFunctionNames);
313     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
314                    Style.KeepEmptyLinesAtTheStartOfBlocks);
315     IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
316     IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
317     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
318     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
319     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
320     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
321     IO.mapOptional("ObjCSpaceBeforeProtocolList",
322                    Style.ObjCSpaceBeforeProtocolList);
323     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
324                    Style.PenaltyBreakBeforeFirstCallParameter);
325     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
326     IO.mapOptional("PenaltyBreakFirstLessLess",
327                    Style.PenaltyBreakFirstLessLess);
328     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
329     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
330     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
331                    Style.PenaltyReturnTypeOnItsOwnLine);
332     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
333     IO.mapOptional("ReflowComments", Style.ReflowComments);
334     IO.mapOptional("SortIncludes", Style.SortIncludes);
335     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
336     IO.mapOptional("SpaceBeforeAssignmentOperators",
337                    Style.SpaceBeforeAssignmentOperators);
338     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
339     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
340     IO.mapOptional("SpacesBeforeTrailingComments",
341                    Style.SpacesBeforeTrailingComments);
342     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
343     IO.mapOptional("SpacesInContainerLiterals",
344                    Style.SpacesInContainerLiterals);
345     IO.mapOptional("SpacesInCStyleCastParentheses",
346                    Style.SpacesInCStyleCastParentheses);
347     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
348     IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
349     IO.mapOptional("Standard", Style.Standard);
350     IO.mapOptional("TabWidth", Style.TabWidth);
351     IO.mapOptional("UseTab", Style.UseTab);
352     IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
353   }
354 };
355 
356 template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
357   static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
358     IO.mapOptional("AfterClass", Wrapping.AfterClass);
359     IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
360     IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
361     IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
362     IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
363     IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
364     IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
365     IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
366     IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
367     IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
368     IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
369   }
370 };
371 
372 template <> struct MappingTraits<FormatStyle::IncludeCategory> {
373   static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
374     IO.mapOptional("Regex", Category.Regex);
375     IO.mapOptional("Priority", Category.Priority);
376   }
377 };
378 
379 // Allows to read vector<FormatStyle> while keeping default values.
380 // IO.getContext() should contain a pointer to the FormatStyle structure, that
381 // will be used to get default values for missing keys.
382 // If the first element has no Language specified, it will be treated as the
383 // default one for the following elements.
384 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
385   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
386     return Seq.size();
387   }
388   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
389                               size_t Index) {
390     if (Index >= Seq.size()) {
391       assert(Index == Seq.size());
392       FormatStyle Template;
393       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
394         Template = Seq[0];
395       } else {
396         Template = *((const FormatStyle *)IO.getContext());
397         Template.Language = FormatStyle::LK_None;
398       }
399       Seq.resize(Index + 1, Template);
400     }
401     return Seq[Index];
402   }
403 };
404 } // namespace yaml
405 } // namespace llvm
406 
407 namespace clang {
408 namespace format {
409 
410 const std::error_category &getParseCategory() {
411   static ParseErrorCategory C;
412   return C;
413 }
414 std::error_code make_error_code(ParseError e) {
415   return std::error_code(static_cast<int>(e), getParseCategory());
416 }
417 
418 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
419   return "clang-format.parse_error";
420 }
421 
422 std::string ParseErrorCategory::message(int EV) const {
423   switch (static_cast<ParseError>(EV)) {
424   case ParseError::Success:
425     return "Success";
426   case ParseError::Error:
427     return "Invalid argument";
428   case ParseError::Unsuitable:
429     return "Unsuitable";
430   }
431   llvm_unreachable("unexpected parse error");
432 }
433 
434 static FormatStyle expandPresets(const FormatStyle &Style) {
435   if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
436     return Style;
437   FormatStyle Expanded = Style;
438   Expanded.BraceWrapping = {false, false, false, false, false, false,
439                             false, false, false, false, false};
440   switch (Style.BreakBeforeBraces) {
441   case FormatStyle::BS_Linux:
442     Expanded.BraceWrapping.AfterClass = true;
443     Expanded.BraceWrapping.AfterFunction = true;
444     Expanded.BraceWrapping.AfterNamespace = true;
445     break;
446   case FormatStyle::BS_Mozilla:
447     Expanded.BraceWrapping.AfterClass = true;
448     Expanded.BraceWrapping.AfterEnum = true;
449     Expanded.BraceWrapping.AfterFunction = true;
450     Expanded.BraceWrapping.AfterStruct = true;
451     Expanded.BraceWrapping.AfterUnion = true;
452     break;
453   case FormatStyle::BS_Stroustrup:
454     Expanded.BraceWrapping.AfterFunction = true;
455     Expanded.BraceWrapping.BeforeCatch = true;
456     Expanded.BraceWrapping.BeforeElse = true;
457     break;
458   case FormatStyle::BS_Allman:
459     Expanded.BraceWrapping.AfterClass = true;
460     Expanded.BraceWrapping.AfterControlStatement = true;
461     Expanded.BraceWrapping.AfterEnum = true;
462     Expanded.BraceWrapping.AfterFunction = true;
463     Expanded.BraceWrapping.AfterNamespace = true;
464     Expanded.BraceWrapping.AfterObjCDeclaration = true;
465     Expanded.BraceWrapping.AfterStruct = true;
466     Expanded.BraceWrapping.BeforeCatch = true;
467     Expanded.BraceWrapping.BeforeElse = true;
468     break;
469   case FormatStyle::BS_GNU:
470     Expanded.BraceWrapping = {true, true, true, true, true, true,
471                               true, true, true, true, true};
472     break;
473   case FormatStyle::BS_WebKit:
474     Expanded.BraceWrapping.AfterFunction = true;
475     break;
476   default:
477     break;
478   }
479   return Expanded;
480 }
481 
482 FormatStyle getLLVMStyle() {
483   FormatStyle LLVMStyle;
484   LLVMStyle.Language = FormatStyle::LK_Cpp;
485   LLVMStyle.AccessModifierOffset = -2;
486   LLVMStyle.AlignEscapedNewlinesLeft = false;
487   LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
488   LLVMStyle.AlignOperands = true;
489   LLVMStyle.AlignTrailingComments = true;
490   LLVMStyle.AlignConsecutiveAssignments = false;
491   LLVMStyle.AlignConsecutiveDeclarations = false;
492   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
493   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
494   LLVMStyle.AllowShortBlocksOnASingleLine = false;
495   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
496   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
497   LLVMStyle.AllowShortLoopsOnASingleLine = false;
498   LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
499   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
500   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
501   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
502   LLVMStyle.BinPackParameters = true;
503   LLVMStyle.BinPackArguments = true;
504   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
505   LLVMStyle.BreakBeforeTernaryOperators = true;
506   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
507   LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
508                              false, false, false, false, false};
509   LLVMStyle.BreakAfterJavaFieldAnnotations = false;
510   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
511   LLVMStyle.BreakStringLiterals = true;
512   LLVMStyle.ColumnLimit = 80;
513   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
514   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
515   LLVMStyle.ConstructorInitializerIndentWidth = 4;
516   LLVMStyle.ContinuationIndentWidth = 4;
517   LLVMStyle.Cpp11BracedListStyle = true;
518   LLVMStyle.DerivePointerAlignment = false;
519   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
520   LLVMStyle.ForEachMacros.push_back("foreach");
521   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
522   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
523   LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
524                                  {"^(<|\"(gtest|isl|json)/)", 3},
525                                  {".*", 1}};
526   LLVMStyle.IncludeIsMainRegex = "$";
527   LLVMStyle.IndentCaseLabels = false;
528   LLVMStyle.IndentWrappedFunctionNames = false;
529   LLVMStyle.IndentWidth = 2;
530   LLVMStyle.TabWidth = 8;
531   LLVMStyle.MaxEmptyLinesToKeep = 1;
532   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
533   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
534   LLVMStyle.ObjCBlockIndentWidth = 2;
535   LLVMStyle.ObjCSpaceAfterProperty = false;
536   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
537   LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
538   LLVMStyle.SpacesBeforeTrailingComments = 1;
539   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
540   LLVMStyle.UseTab = FormatStyle::UT_Never;
541   LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
542   LLVMStyle.ReflowComments = true;
543   LLVMStyle.SpacesInParentheses = false;
544   LLVMStyle.SpacesInSquareBrackets = false;
545   LLVMStyle.SpaceInEmptyParentheses = false;
546   LLVMStyle.SpacesInContainerLiterals = true;
547   LLVMStyle.SpacesInCStyleCastParentheses = false;
548   LLVMStyle.SpaceAfterCStyleCast = false;
549   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
550   LLVMStyle.SpaceBeforeAssignmentOperators = true;
551   LLVMStyle.SpacesInAngles = false;
552 
553   LLVMStyle.PenaltyBreakComment = 300;
554   LLVMStyle.PenaltyBreakFirstLessLess = 120;
555   LLVMStyle.PenaltyBreakString = 1000;
556   LLVMStyle.PenaltyExcessCharacter = 1000000;
557   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
558   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
559 
560   LLVMStyle.DisableFormat = false;
561   LLVMStyle.SortIncludes = true;
562 
563   return LLVMStyle;
564 }
565 
566 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
567   FormatStyle GoogleStyle = getLLVMStyle();
568   GoogleStyle.Language = Language;
569 
570   GoogleStyle.AccessModifierOffset = -1;
571   GoogleStyle.AlignEscapedNewlinesLeft = true;
572   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
573   GoogleStyle.AllowShortLoopsOnASingleLine = true;
574   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
575   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
576   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
577   GoogleStyle.DerivePointerAlignment = true;
578   GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
579   GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
580   GoogleStyle.IndentCaseLabels = true;
581   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
582   GoogleStyle.ObjCSpaceAfterProperty = false;
583   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
584   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
585   GoogleStyle.SpacesBeforeTrailingComments = 2;
586   GoogleStyle.Standard = FormatStyle::LS_Auto;
587 
588   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
589   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
590 
591   if (Language == FormatStyle::LK_Java) {
592     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
593     GoogleStyle.AlignOperands = false;
594     GoogleStyle.AlignTrailingComments = false;
595     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
596     GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
597     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
598     GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
599     GoogleStyle.ColumnLimit = 100;
600     GoogleStyle.SpaceAfterCStyleCast = true;
601     GoogleStyle.SpacesBeforeTrailingComments = 1;
602   } else if (Language == FormatStyle::LK_JavaScript) {
603     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
604     GoogleStyle.AlignOperands = false;
605     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
606     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
607     GoogleStyle.BreakBeforeTernaryOperators = false;
608     GoogleStyle.CommentPragmas = "@(export|return|see|visibility) ";
609     GoogleStyle.MaxEmptyLinesToKeep = 3;
610     GoogleStyle.SpacesInContainerLiterals = false;
611     GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
612   } else if (Language == FormatStyle::LK_Proto) {
613     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
614     GoogleStyle.SpacesInContainerLiterals = false;
615   }
616 
617   return GoogleStyle;
618 }
619 
620 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
621   FormatStyle ChromiumStyle = getGoogleStyle(Language);
622   if (Language == FormatStyle::LK_Java) {
623     ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
624     ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
625     ChromiumStyle.ContinuationIndentWidth = 8;
626     ChromiumStyle.IndentWidth = 4;
627   } else {
628     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
629     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
630     ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
631     ChromiumStyle.AllowShortLoopsOnASingleLine = false;
632     ChromiumStyle.BinPackParameters = false;
633     ChromiumStyle.DerivePointerAlignment = false;
634   }
635   ChromiumStyle.SortIncludes = false;
636   return ChromiumStyle;
637 }
638 
639 FormatStyle getMozillaStyle() {
640   FormatStyle MozillaStyle = getLLVMStyle();
641   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
642   MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
643   MozillaStyle.AlwaysBreakAfterReturnType =
644       FormatStyle::RTBS_TopLevelDefinitions;
645   MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
646       FormatStyle::DRTBS_TopLevel;
647   MozillaStyle.AlwaysBreakTemplateDeclarations = true;
648   MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
649   MozillaStyle.BreakConstructorInitializersBeforeComma = true;
650   MozillaStyle.ConstructorInitializerIndentWidth = 2;
651   MozillaStyle.ContinuationIndentWidth = 2;
652   MozillaStyle.Cpp11BracedListStyle = false;
653   MozillaStyle.IndentCaseLabels = true;
654   MozillaStyle.ObjCSpaceAfterProperty = true;
655   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
656   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
657   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
658   return MozillaStyle;
659 }
660 
661 FormatStyle getWebKitStyle() {
662   FormatStyle Style = getLLVMStyle();
663   Style.AccessModifierOffset = -4;
664   Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
665   Style.AlignOperands = false;
666   Style.AlignTrailingComments = false;
667   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
668   Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
669   Style.BreakConstructorInitializersBeforeComma = true;
670   Style.Cpp11BracedListStyle = false;
671   Style.ColumnLimit = 0;
672   Style.IndentWidth = 4;
673   Style.NamespaceIndentation = FormatStyle::NI_Inner;
674   Style.ObjCBlockIndentWidth = 4;
675   Style.ObjCSpaceAfterProperty = true;
676   Style.PointerAlignment = FormatStyle::PAS_Left;
677   Style.Standard = FormatStyle::LS_Cpp03;
678   return Style;
679 }
680 
681 FormatStyle getGNUStyle() {
682   FormatStyle Style = getLLVMStyle();
683   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
684   Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
685   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
686   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
687   Style.BreakBeforeTernaryOperators = true;
688   Style.Cpp11BracedListStyle = false;
689   Style.ColumnLimit = 79;
690   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
691   Style.Standard = FormatStyle::LS_Cpp03;
692   return Style;
693 }
694 
695 FormatStyle getNoStyle() {
696   FormatStyle NoStyle = getLLVMStyle();
697   NoStyle.DisableFormat = true;
698   NoStyle.SortIncludes = false;
699   return NoStyle;
700 }
701 
702 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
703                         FormatStyle *Style) {
704   if (Name.equals_lower("llvm")) {
705     *Style = getLLVMStyle();
706   } else if (Name.equals_lower("chromium")) {
707     *Style = getChromiumStyle(Language);
708   } else if (Name.equals_lower("mozilla")) {
709     *Style = getMozillaStyle();
710   } else if (Name.equals_lower("google")) {
711     *Style = getGoogleStyle(Language);
712   } else if (Name.equals_lower("webkit")) {
713     *Style = getWebKitStyle();
714   } else if (Name.equals_lower("gnu")) {
715     *Style = getGNUStyle();
716   } else if (Name.equals_lower("none")) {
717     *Style = getNoStyle();
718   } else {
719     return false;
720   }
721 
722   Style->Language = Language;
723   return true;
724 }
725 
726 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
727   assert(Style);
728   FormatStyle::LanguageKind Language = Style->Language;
729   assert(Language != FormatStyle::LK_None);
730   if (Text.trim().empty())
731     return make_error_code(ParseError::Error);
732 
733   std::vector<FormatStyle> Styles;
734   llvm::yaml::Input Input(Text);
735   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
736   // values for the fields, keys for which are missing from the configuration.
737   // Mapping also uses the context to get the language to find the correct
738   // base style.
739   Input.setContext(Style);
740   Input >> Styles;
741   if (Input.error())
742     return Input.error();
743 
744   for (unsigned i = 0; i < Styles.size(); ++i) {
745     // Ensures that only the first configuration can skip the Language option.
746     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
747       return make_error_code(ParseError::Error);
748     // Ensure that each language is configured at most once.
749     for (unsigned j = 0; j < i; ++j) {
750       if (Styles[i].Language == Styles[j].Language) {
751         DEBUG(llvm::dbgs()
752               << "Duplicate languages in the config file on positions " << j
753               << " and " << i << "\n");
754         return make_error_code(ParseError::Error);
755       }
756     }
757   }
758   // Look for a suitable configuration starting from the end, so we can
759   // find the configuration for the specific language first, and the default
760   // configuration (which can only be at slot 0) after it.
761   for (int i = Styles.size() - 1; i >= 0; --i) {
762     if (Styles[i].Language == Language ||
763         Styles[i].Language == FormatStyle::LK_None) {
764       *Style = Styles[i];
765       Style->Language = Language;
766       return make_error_code(ParseError::Success);
767     }
768   }
769   return make_error_code(ParseError::Unsuitable);
770 }
771 
772 std::string configurationAsText(const FormatStyle &Style) {
773   std::string Text;
774   llvm::raw_string_ostream Stream(Text);
775   llvm::yaml::Output Output(Stream);
776   // We use the same mapping method for input and output, so we need a non-const
777   // reference here.
778   FormatStyle NonConstStyle = expandPresets(Style);
779   Output << NonConstStyle;
780   return Stream.str();
781 }
782 
783 namespace {
784 
785 class FormatTokenLexer {
786 public:
787   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
788                    const FormatStyle &Style, encoding::Encoding Encoding)
789       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
790         LessStashed(false), Column(0), TrailingWhitespace(0),
791         SourceMgr(SourceMgr), ID(ID), Style(Style),
792         IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
793         Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
794         MacroBlockBeginRegex(Style.MacroBlockBegin),
795         MacroBlockEndRegex(Style.MacroBlockEnd) {
796     Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
797                         getFormattingLangOpts(Style)));
798     Lex->SetKeepWhitespaceMode(true);
799 
800     for (const std::string &ForEachMacro : Style.ForEachMacros)
801       ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
802     std::sort(ForEachMacros.begin(), ForEachMacros.end());
803   }
804 
805   ArrayRef<FormatToken *> lex() {
806     assert(Tokens.empty());
807     assert(FirstInLineIndex == 0);
808     do {
809       Tokens.push_back(getNextToken());
810       if (Style.Language == FormatStyle::LK_JavaScript) {
811         tryParseJSRegexLiteral();
812         tryParseTemplateString();
813       }
814       tryMergePreviousTokens();
815       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
816         FirstInLineIndex = Tokens.size() - 1;
817     } while (Tokens.back()->Tok.isNot(tok::eof));
818     return Tokens;
819   }
820 
821   const AdditionalKeywords &getKeywords() { return Keywords; }
822 
823 private:
824   void tryMergePreviousTokens() {
825     if (tryMerge_TMacro())
826       return;
827     if (tryMergeConflictMarkers())
828       return;
829     if (tryMergeLessLess())
830       return;
831 
832     if (Style.Language == FormatStyle::LK_JavaScript) {
833       static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
834       static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
835                                                      tok::equal};
836       static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
837                                                     tok::greaterequal};
838       static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
839       // FIXME: Investigate what token type gives the correct operator priority.
840       if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
841         return;
842       if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
843         return;
844       if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
845         return;
846       if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
847         return;
848     }
849   }
850 
851   bool tryMergeLessLess() {
852     // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
853     if (Tokens.size() < 3)
854       return false;
855 
856     bool FourthTokenIsLess = false;
857     if (Tokens.size() > 3)
858       FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
859 
860     auto First = Tokens.end() - 3;
861     if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
862         First[0]->isNot(tok::less) || FourthTokenIsLess)
863       return false;
864 
865     // Only merge if there currently is no whitespace between the two "<".
866     if (First[1]->WhitespaceRange.getBegin() !=
867         First[1]->WhitespaceRange.getEnd())
868       return false;
869 
870     First[0]->Tok.setKind(tok::lessless);
871     First[0]->TokenText = "<<";
872     First[0]->ColumnWidth += 1;
873     Tokens.erase(Tokens.end() - 2);
874     return true;
875   }
876 
877   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
878     if (Tokens.size() < Kinds.size())
879       return false;
880 
881     SmallVectorImpl<FormatToken *>::const_iterator First =
882         Tokens.end() - Kinds.size();
883     if (!First[0]->is(Kinds[0]))
884       return false;
885     unsigned AddLength = 0;
886     for (unsigned i = 1; i < Kinds.size(); ++i) {
887       if (!First[i]->is(Kinds[i]) ||
888           First[i]->WhitespaceRange.getBegin() !=
889               First[i]->WhitespaceRange.getEnd())
890         return false;
891       AddLength += First[i]->TokenText.size();
892     }
893     Tokens.resize(Tokens.size() - Kinds.size() + 1);
894     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
895                                     First[0]->TokenText.size() + AddLength);
896     First[0]->ColumnWidth += AddLength;
897     First[0]->Type = NewType;
898     return true;
899   }
900 
901   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
902   bool precedesOperand(FormatToken *Tok) {
903     // NB: This is not entirely correct, as an r_paren can introduce an operand
904     // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
905     // corner case to not matter in practice, though.
906     return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
907                         tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
908                         tok::colon, tok::question, tok::tilde) ||
909            Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
910                         tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
911                         tok::kw_typeof, Keywords.kw_instanceof,
912                         Keywords.kw_in) ||
913            Tok->isBinaryOperator();
914   }
915 
916   bool canPrecedeRegexLiteral(FormatToken *Prev) {
917     if (!Prev)
918       return true;
919 
920     // Regex literals can only follow after prefix unary operators, not after
921     // postfix unary operators. If the '++' is followed by a non-operand
922     // introducing token, the slash here is the operand and not the start of a
923     // regex.
924     if (Prev->isOneOf(tok::plusplus, tok::minusminus))
925       return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
926 
927     // The previous token must introduce an operand location where regex
928     // literals can occur.
929     if (!precedesOperand(Prev))
930       return false;
931 
932     return true;
933   }
934 
935   // Tries to parse a JavaScript Regex literal starting at the current token,
936   // if that begins with a slash and is in a location where JavaScript allows
937   // regex literals. Changes the current token to a regex literal and updates
938   // its text if successful.
939   void tryParseJSRegexLiteral() {
940     FormatToken *RegexToken = Tokens.back();
941     if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
942       return;
943 
944     FormatToken *Prev = nullptr;
945     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
946       // NB: Because previous pointers are not initialized yet, this cannot use
947       // Token.getPreviousNonComment.
948       if ((*I)->isNot(tok::comment)) {
949         Prev = *I;
950         break;
951       }
952     }
953 
954     if (!canPrecedeRegexLiteral(Prev))
955       return;
956 
957     // 'Manually' lex ahead in the current file buffer.
958     const char *Offset = Lex->getBufferLocation();
959     const char *RegexBegin = Offset - RegexToken->TokenText.size();
960     StringRef Buffer = Lex->getBuffer();
961     bool InCharacterClass = false;
962     bool HaveClosingSlash = false;
963     for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
964       // Regular expressions are terminated with a '/', which can only be
965       // escaped using '\' or a character class between '[' and ']'.
966       // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
967       switch (*Offset) {
968       case '\\':
969         // Skip the escaped character.
970         ++Offset;
971         break;
972       case '[':
973         InCharacterClass = true;
974         break;
975       case ']':
976         InCharacterClass = false;
977         break;
978       case '/':
979         if (!InCharacterClass)
980           HaveClosingSlash = true;
981         break;
982       }
983     }
984 
985     RegexToken->Type = TT_RegexLiteral;
986     // Treat regex literals like other string_literals.
987     RegexToken->Tok.setKind(tok::string_literal);
988     RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
989     RegexToken->ColumnWidth = RegexToken->TokenText.size();
990 
991     resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
992   }
993 
994   void tryParseTemplateString() {
995     FormatToken *BacktickToken = Tokens.back();
996     if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
997       return;
998 
999     // 'Manually' lex ahead in the current file buffer.
1000     const char *Offset = Lex->getBufferLocation();
1001     const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
1002     for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
1003       if (*Offset == '\\')
1004         ++Offset; // Skip the escaped character.
1005     }
1006 
1007     StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
1008     BacktickToken->Type = TT_TemplateString;
1009     BacktickToken->Tok.setKind(tok::string_literal);
1010     BacktickToken->TokenText = LiteralText;
1011 
1012     // Adjust width for potentially multiline string literals.
1013     size_t FirstBreak = LiteralText.find('\n');
1014     StringRef FirstLineText = FirstBreak == StringRef::npos
1015                                   ? LiteralText
1016                                   : LiteralText.substr(0, FirstBreak);
1017     BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
1018         FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
1019     size_t LastBreak = LiteralText.rfind('\n');
1020     if (LastBreak != StringRef::npos) {
1021       BacktickToken->IsMultiline = true;
1022       unsigned StartColumn = 0; // The template tail spans the entire line.
1023       BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
1024           LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
1025           Style.TabWidth, Encoding);
1026     }
1027 
1028     resetLexer(
1029         SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
1030   }
1031 
1032   bool tryMerge_TMacro() {
1033     if (Tokens.size() < 4)
1034       return false;
1035     FormatToken *Last = Tokens.back();
1036     if (!Last->is(tok::r_paren))
1037       return false;
1038 
1039     FormatToken *String = Tokens[Tokens.size() - 2];
1040     if (!String->is(tok::string_literal) || String->IsMultiline)
1041       return false;
1042 
1043     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1044       return false;
1045 
1046     FormatToken *Macro = Tokens[Tokens.size() - 4];
1047     if (Macro->TokenText != "_T")
1048       return false;
1049 
1050     const char *Start = Macro->TokenText.data();
1051     const char *End = Last->TokenText.data() + Last->TokenText.size();
1052     String->TokenText = StringRef(Start, End - Start);
1053     String->IsFirst = Macro->IsFirst;
1054     String->LastNewlineOffset = Macro->LastNewlineOffset;
1055     String->WhitespaceRange = Macro->WhitespaceRange;
1056     String->OriginalColumn = Macro->OriginalColumn;
1057     String->ColumnWidth = encoding::columnWidthWithTabs(
1058         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1059     String->NewlinesBefore = Macro->NewlinesBefore;
1060     String->HasUnescapedNewline = Macro->HasUnescapedNewline;
1061 
1062     Tokens.pop_back();
1063     Tokens.pop_back();
1064     Tokens.pop_back();
1065     Tokens.back() = String;
1066     return true;
1067   }
1068 
1069   bool tryMergeConflictMarkers() {
1070     if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1071       return false;
1072 
1073     // Conflict lines look like:
1074     // <marker> <text from the vcs>
1075     // For example:
1076     // >>>>>>> /file/in/file/system at revision 1234
1077     //
1078     // We merge all tokens in a line that starts with a conflict marker
1079     // into a single token with a special token type that the unwrapped line
1080     // parser will use to correctly rebuild the underlying code.
1081 
1082     FileID ID;
1083     // Get the position of the first token in the line.
1084     unsigned FirstInLineOffset;
1085     std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
1086         Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1087     StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
1088     // Calculate the offset of the start of the current line.
1089     auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
1090     if (LineOffset == StringRef::npos) {
1091       LineOffset = 0;
1092     } else {
1093       ++LineOffset;
1094     }
1095 
1096     auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
1097     StringRef LineStart;
1098     if (FirstSpace == StringRef::npos) {
1099       LineStart = Buffer.substr(LineOffset);
1100     } else {
1101       LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1102     }
1103 
1104     TokenType Type = TT_Unknown;
1105     if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
1106       Type = TT_ConflictStart;
1107     } else if (LineStart == "|||||||" || LineStart == "=======" ||
1108                LineStart == "====") {
1109       Type = TT_ConflictAlternative;
1110     } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1111       Type = TT_ConflictEnd;
1112     }
1113 
1114     if (Type != TT_Unknown) {
1115       FormatToken *Next = Tokens.back();
1116 
1117       Tokens.resize(FirstInLineIndex + 1);
1118       // We do not need to build a complete token here, as we will skip it
1119       // during parsing anyway (as we must not touch whitespace around conflict
1120       // markers).
1121       Tokens.back()->Type = Type;
1122       Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1123 
1124       Tokens.push_back(Next);
1125       return true;
1126     }
1127 
1128     return false;
1129   }
1130 
1131   FormatToken *getStashedToken() {
1132     // Create a synthesized second '>' or '<' token.
1133     Token Tok = FormatTok->Tok;
1134     StringRef TokenText = FormatTok->TokenText;
1135 
1136     unsigned OriginalColumn = FormatTok->OriginalColumn;
1137     FormatTok = new (Allocator.Allocate()) FormatToken;
1138     FormatTok->Tok = Tok;
1139     SourceLocation TokLocation =
1140         FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
1141     FormatTok->Tok.setLocation(TokLocation);
1142     FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
1143     FormatTok->TokenText = TokenText;
1144     FormatTok->ColumnWidth = 1;
1145     FormatTok->OriginalColumn = OriginalColumn + 1;
1146 
1147     return FormatTok;
1148   }
1149 
1150   FormatToken *getNextToken() {
1151     if (GreaterStashed) {
1152       GreaterStashed = false;
1153       return getStashedToken();
1154     }
1155     if (LessStashed) {
1156       LessStashed = false;
1157       return getStashedToken();
1158     }
1159 
1160     FormatTok = new (Allocator.Allocate()) FormatToken;
1161     readRawToken(*FormatTok);
1162     SourceLocation WhitespaceStart =
1163         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1164     FormatTok->IsFirst = IsFirstToken;
1165     IsFirstToken = false;
1166 
1167     // Consume and record whitespace until we find a significant token.
1168     unsigned WhitespaceLength = TrailingWhitespace;
1169     while (FormatTok->Tok.is(tok::unknown)) {
1170       StringRef Text = FormatTok->TokenText;
1171       auto EscapesNewline = [&](int pos) {
1172         // A '\r' here is just part of '\r\n'. Skip it.
1173         if (pos >= 0 && Text[pos] == '\r')
1174           --pos;
1175         // See whether there is an odd number of '\' before this.
1176         unsigned count = 0;
1177         for (; pos >= 0; --pos, ++count)
1178           if (Text[pos] != '\\')
1179             break;
1180         return count & 1;
1181       };
1182       // FIXME: This miscounts tok:unknown tokens that are not just
1183       // whitespace, e.g. a '`' character.
1184       for (int i = 0, e = Text.size(); i != e; ++i) {
1185         switch (Text[i]) {
1186         case '\n':
1187           ++FormatTok->NewlinesBefore;
1188           FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
1189           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1190           Column = 0;
1191           break;
1192         case '\r':
1193           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1194           Column = 0;
1195           break;
1196         case '\f':
1197         case '\v':
1198           Column = 0;
1199           break;
1200         case ' ':
1201           ++Column;
1202           break;
1203         case '\t':
1204           Column += Style.TabWidth - Column % Style.TabWidth;
1205           break;
1206         case '\\':
1207           if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
1208             FormatTok->Type = TT_ImplicitStringLiteral;
1209           break;
1210         default:
1211           FormatTok->Type = TT_ImplicitStringLiteral;
1212           break;
1213         }
1214         if (FormatTok->Type == TT_ImplicitStringLiteral)
1215           break;
1216       }
1217 
1218       if (FormatTok->is(TT_ImplicitStringLiteral))
1219         break;
1220       WhitespaceLength += FormatTok->Tok.getLength();
1221 
1222       readRawToken(*FormatTok);
1223     }
1224 
1225     // In case the token starts with escaped newlines, we want to
1226     // take them into account as whitespace - this pattern is quite frequent
1227     // in macro definitions.
1228     // FIXME: Add a more explicit test.
1229     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1230            FormatTok->TokenText[1] == '\n') {
1231       ++FormatTok->NewlinesBefore;
1232       WhitespaceLength += 2;
1233       FormatTok->LastNewlineOffset = 2;
1234       Column = 0;
1235       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1236     }
1237 
1238     FormatTok->WhitespaceRange = SourceRange(
1239         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1240 
1241     FormatTok->OriginalColumn = Column;
1242 
1243     TrailingWhitespace = 0;
1244     if (FormatTok->Tok.is(tok::comment)) {
1245       // FIXME: Add the trimmed whitespace to Column.
1246       StringRef UntrimmedText = FormatTok->TokenText;
1247       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1248       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1249     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1250       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1251       FormatTok->Tok.setIdentifierInfo(&Info);
1252       FormatTok->Tok.setKind(Info.getTokenID());
1253       if (Style.Language == FormatStyle::LK_Java &&
1254           FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1255                              tok::kw_operator)) {
1256         FormatTok->Tok.setKind(tok::identifier);
1257         FormatTok->Tok.setIdentifierInfo(nullptr);
1258       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1259                  FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
1260                                     tok::kw_operator)) {
1261         FormatTok->Tok.setKind(tok::identifier);
1262         FormatTok->Tok.setIdentifierInfo(nullptr);
1263       }
1264     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1265       FormatTok->Tok.setKind(tok::greater);
1266       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1267       GreaterStashed = true;
1268     } else if (FormatTok->Tok.is(tok::lessless)) {
1269       FormatTok->Tok.setKind(tok::less);
1270       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1271       LessStashed = true;
1272     }
1273 
1274     // Now FormatTok is the next non-whitespace token.
1275 
1276     StringRef Text = FormatTok->TokenText;
1277     size_t FirstNewlinePos = Text.find('\n');
1278     if (FirstNewlinePos == StringRef::npos) {
1279       // FIXME: ColumnWidth actually depends on the start column, we need to
1280       // take this into account when the token is moved.
1281       FormatTok->ColumnWidth =
1282           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1283       Column += FormatTok->ColumnWidth;
1284     } else {
1285       FormatTok->IsMultiline = true;
1286       // FIXME: ColumnWidth actually depends on the start column, we need to
1287       // take this into account when the token is moved.
1288       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1289           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1290 
1291       // The last line of the token always starts in column 0.
1292       // Thus, the length can be precomputed even in the presence of tabs.
1293       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1294           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1295           Encoding);
1296       Column = FormatTok->LastLineColumnWidth;
1297     }
1298 
1299     if (Style.Language == FormatStyle::LK_Cpp) {
1300       if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1301             Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1302                 tok::pp_define) &&
1303           std::find(ForEachMacros.begin(), ForEachMacros.end(),
1304                     FormatTok->Tok.getIdentifierInfo()) !=
1305               ForEachMacros.end()) {
1306         FormatTok->Type = TT_ForEachMacro;
1307       } else if (FormatTok->is(tok::identifier)) {
1308         if (MacroBlockBeginRegex.match(Text)) {
1309           FormatTok->Type = TT_MacroBlockBegin;
1310         } else if (MacroBlockEndRegex.match(Text)) {
1311           FormatTok->Type = TT_MacroBlockEnd;
1312         }
1313       }
1314     }
1315 
1316     return FormatTok;
1317   }
1318 
1319   FormatToken *FormatTok;
1320   bool IsFirstToken;
1321   bool GreaterStashed, LessStashed;
1322   unsigned Column;
1323   unsigned TrailingWhitespace;
1324   std::unique_ptr<Lexer> Lex;
1325   const SourceManager &SourceMgr;
1326   FileID ID;
1327   const FormatStyle &Style;
1328   IdentifierTable IdentTable;
1329   AdditionalKeywords Keywords;
1330   encoding::Encoding Encoding;
1331   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1332   // Index (in 'Tokens') of the last token that starts a new line.
1333   unsigned FirstInLineIndex;
1334   SmallVector<FormatToken *, 16> Tokens;
1335   SmallVector<IdentifierInfo *, 8> ForEachMacros;
1336 
1337   bool FormattingDisabled;
1338 
1339   llvm::Regex MacroBlockBeginRegex;
1340   llvm::Regex MacroBlockEndRegex;
1341 
1342   void readRawToken(FormatToken &Tok) {
1343     Lex->LexFromRawLexer(Tok.Tok);
1344     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1345                               Tok.Tok.getLength());
1346     // For formatting, treat unterminated string literals like normal string
1347     // literals.
1348     if (Tok.is(tok::unknown)) {
1349       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1350         Tok.Tok.setKind(tok::string_literal);
1351         Tok.IsUnterminatedLiteral = true;
1352       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1353                  Tok.TokenText == "''") {
1354         Tok.Tok.setKind(tok::string_literal);
1355       }
1356     }
1357 
1358     if (Style.Language == FormatStyle::LK_JavaScript &&
1359         Tok.is(tok::char_constant)) {
1360       Tok.Tok.setKind(tok::string_literal);
1361     }
1362 
1363     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1364                                  Tok.TokenText == "/* clang-format on */")) {
1365       FormattingDisabled = false;
1366     }
1367 
1368     Tok.Finalized = FormattingDisabled;
1369 
1370     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1371                                  Tok.TokenText == "/* clang-format off */")) {
1372       FormattingDisabled = true;
1373     }
1374   }
1375 
1376   void resetLexer(unsigned Offset) {
1377     StringRef Buffer = SourceMgr.getBufferData(ID);
1378     Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1379                         getFormattingLangOpts(Style), Buffer.begin(),
1380                         Buffer.begin() + Offset, Buffer.end()));
1381     Lex->SetKeepWhitespaceMode(true);
1382     TrailingWhitespace = 0;
1383   }
1384 };
1385 
1386 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1387   switch (Language) {
1388   case FormatStyle::LK_Cpp:
1389     return "C++";
1390   case FormatStyle::LK_Java:
1391     return "Java";
1392   case FormatStyle::LK_JavaScript:
1393     return "JavaScript";
1394   case FormatStyle::LK_Proto:
1395     return "Proto";
1396   default:
1397     return "Unknown";
1398   }
1399 }
1400 
1401 class Environment {
1402 public:
1403   Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
1404       : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
1405 
1406   Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
1407               std::unique_ptr<SourceManager> VirtualSM,
1408               std::unique_ptr<DiagnosticsEngine> Diagnostics,
1409               const std::vector<CharSourceRange> &CharRanges)
1410       : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
1411         SM(*VirtualSM), FileMgr(std::move(FileMgr)),
1412         VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
1413 
1414   // This sets up an virtual file system with file \p FileName containing \p
1415   // Code.
1416   static std::unique_ptr<Environment>
1417   CreateVirtualEnvironment(StringRef Code, StringRef FileName,
1418                            ArrayRef<tooling::Range> Ranges) {
1419     // This is referenced by `FileMgr` and will be released by `FileMgr` when it
1420     // is deleted.
1421     IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
1422         new vfs::InMemoryFileSystem);
1423     // This is passed to `SM` as reference, so the pointer has to be referenced
1424     // in `Environment` so that `FileMgr` can out-live this function scope.
1425     std::unique_ptr<FileManager> FileMgr(
1426         new FileManager(FileSystemOptions(), InMemoryFileSystem));
1427     // This is passed to `SM` as reference, so the pointer has to be referenced
1428     // by `Environment` due to the same reason above.
1429     std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
1430         IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1431         new DiagnosticOptions));
1432     // This will be stored as reference, so the pointer has to be stored in
1433     // due to the same reason above.
1434     std::unique_ptr<SourceManager> VirtualSM(
1435         new SourceManager(*Diagnostics, *FileMgr));
1436     InMemoryFileSystem->addFile(
1437         FileName, 0, llvm::MemoryBuffer::getMemBuffer(
1438                          Code, FileName, /*RequiresNullTerminator=*/false));
1439     FileID ID = VirtualSM->createFileID(
1440         FileMgr->getFile(FileName), SourceLocation(), clang::SrcMgr::C_User);
1441     assert(ID.isValid());
1442     SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
1443     std::vector<CharSourceRange> CharRanges;
1444     for (const tooling::Range &Range : Ranges) {
1445       SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
1446       SourceLocation End = Start.getLocWithOffset(Range.getLength());
1447       CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1448     }
1449     return llvm::make_unique<Environment>(ID, std::move(FileMgr),
1450                                           std::move(VirtualSM),
1451                                           std::move(Diagnostics), CharRanges);
1452   }
1453 
1454   FileID getFileID() const { return ID; }
1455 
1456   StringRef getFileName() const { return FileName; }
1457 
1458   ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
1459 
1460   const SourceManager &getSourceManager() const { return SM; }
1461 
1462 private:
1463   FileID ID;
1464   StringRef FileName;
1465   SmallVector<CharSourceRange, 8> CharRanges;
1466   SourceManager &SM;
1467 
1468   // The order of these fields are important - they should be in the same order
1469   // as they are created in `CreateVirtualEnvironment` so that they can be
1470   // deleted in the reverse order as they are created.
1471   std::unique_ptr<FileManager> FileMgr;
1472   std::unique_ptr<SourceManager> VirtualSM;
1473   std::unique_ptr<DiagnosticsEngine> Diagnostics;
1474 };
1475 
1476 class TokenAnalyzer : public UnwrappedLineConsumer {
1477 public:
1478   TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
1479       : Style(Style), Env(Env),
1480         AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
1481         UnwrappedLines(1),
1482         Encoding(encoding::detectEncoding(
1483             Env.getSourceManager().getBufferData(Env.getFileID()))) {
1484     DEBUG(llvm::dbgs() << "File encoding: "
1485                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1486                                                                : "unknown")
1487                        << "\n");
1488     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1489                        << "\n");
1490   }
1491 
1492   tooling::Replacements process() {
1493     tooling::Replacements Result;
1494     FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
1495                             Encoding);
1496 
1497     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1498                                *this);
1499     Parser.parse();
1500     assert(UnwrappedLines.rbegin()->empty());
1501     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1502          ++Run) {
1503       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1504       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1505 
1506       TokenAnnotator Annotator(Style, Tokens.getKeywords());
1507       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1508         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1509         Annotator.annotate(*AnnotatedLines.back());
1510       }
1511 
1512       tooling::Replacements RunResult =
1513           analyze(Annotator, AnnotatedLines, Tokens, Result);
1514 
1515       DEBUG({
1516         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1517         for (tooling::Replacements::iterator I = RunResult.begin(),
1518                                              E = RunResult.end();
1519              I != E; ++I) {
1520           llvm::dbgs() << I->toString() << "\n";
1521         }
1522       });
1523       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1524         delete AnnotatedLines[i];
1525       }
1526       Result.insert(RunResult.begin(), RunResult.end());
1527     }
1528     return Result;
1529   }
1530 
1531 protected:
1532   virtual tooling::Replacements
1533   analyze(TokenAnnotator &Annotator,
1534           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1535           FormatTokenLexer &Tokens, tooling::Replacements &Result) = 0;
1536 
1537   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1538     assert(!UnwrappedLines.empty());
1539     UnwrappedLines.back().push_back(TheLine);
1540   }
1541 
1542   void finishRun() override {
1543     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1544   }
1545 
1546   FormatStyle Style;
1547   // Stores Style, FileID and SourceManager etc.
1548   const Environment &Env;
1549   // AffectedRangeMgr stores ranges to be fixed.
1550   AffectedRangeManager AffectedRangeMgr;
1551   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1552   encoding::Encoding Encoding;
1553 };
1554 
1555 class Formatter : public TokenAnalyzer {
1556 public:
1557   Formatter(const Environment &Env, const FormatStyle &Style,
1558             bool *IncompleteFormat)
1559       : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {}
1560 
1561   tooling::Replacements
1562   analyze(TokenAnnotator &Annotator,
1563           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1564           FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
1565     deriveLocalStyle(AnnotatedLines);
1566     AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
1567                                           AnnotatedLines.end());
1568 
1569     if (Style.Language == FormatStyle::LK_JavaScript &&
1570         Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
1571       requoteJSStringLiteral(AnnotatedLines, Result);
1572 
1573     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1574       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1575     }
1576 
1577     Annotator.setCommentLineLevels(AnnotatedLines);
1578 
1579     WhitespaceManager Whitespaces(
1580         Env.getSourceManager(), Style,
1581         inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
1582     ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
1583                                   Env.getSourceManager(), Whitespaces, Encoding,
1584                                   BinPackInconclusiveFunctions);
1585     UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
1586                            IncompleteFormat)
1587         .format(AnnotatedLines);
1588     return Whitespaces.generateReplacements();
1589   }
1590 
1591 private:
1592   // If the last token is a double/single-quoted string literal, generates a
1593   // replacement with a single/double quoted string literal, re-escaping the
1594   // contents in the process.
1595   void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
1596                               tooling::Replacements &Result) {
1597     for (AnnotatedLine *Line : Lines) {
1598       requoteJSStringLiteral(Line->Children, Result);
1599       if (!Line->Affected)
1600         continue;
1601       for (FormatToken *FormatTok = Line->First; FormatTok;
1602            FormatTok = FormatTok->Next) {
1603         StringRef Input = FormatTok->TokenText;
1604         if (FormatTok->Finalized || !FormatTok->isStringLiteral() ||
1605             // NB: testing for not starting with a double quote to avoid
1606             // breaking
1607             // `template strings`.
1608             (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
1609              !Input.startswith("\"")) ||
1610             (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
1611              !Input.startswith("\'")))
1612           continue;
1613 
1614         // Change start and end quote.
1615         bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
1616         SourceLocation Start = FormatTok->Tok.getLocation();
1617         auto Replace = [&](SourceLocation Start, unsigned Length,
1618                            StringRef ReplacementText) {
1619           Result.insert(tooling::Replacement(Env.getSourceManager(), Start,
1620                                              Length, ReplacementText));
1621         };
1622         Replace(Start, 1, IsSingle ? "'" : "\"");
1623         Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
1624                 IsSingle ? "'" : "\"");
1625 
1626         // Escape internal quotes.
1627         size_t ColumnWidth = FormatTok->TokenText.size();
1628         bool Escaped = false;
1629         for (size_t i = 1; i < Input.size() - 1; i++) {
1630           switch (Input[i]) {
1631           case '\\':
1632             if (!Escaped && i + 1 < Input.size() &&
1633                 ((IsSingle && Input[i + 1] == '"') ||
1634                  (!IsSingle && Input[i + 1] == '\''))) {
1635               // Remove this \, it's escaping a " or ' that no longer needs
1636               // escaping
1637               ColumnWidth--;
1638               Replace(Start.getLocWithOffset(i), 1, "");
1639               continue;
1640             }
1641             Escaped = !Escaped;
1642             break;
1643           case '\"':
1644           case '\'':
1645             if (!Escaped && IsSingle == (Input[i] == '\'')) {
1646               // Escape the quote.
1647               Replace(Start.getLocWithOffset(i), 0, "\\");
1648               ColumnWidth++;
1649             }
1650             Escaped = false;
1651             break;
1652           default:
1653             Escaped = false;
1654             break;
1655           }
1656         }
1657 
1658         // For formatting, count the number of non-escaped single quotes in them
1659         // and adjust ColumnWidth to take the added escapes into account.
1660         // FIXME(martinprobst): this might conflict with code breaking a long
1661         // string literal (which clang-format doesn't do, yet). For that to
1662         // work, this code would have to modify TokenText directly.
1663         FormatTok->ColumnWidth = ColumnWidth;
1664       }
1665     }
1666   }
1667 
1668   static bool inputUsesCRLF(StringRef Text) {
1669     return Text.count('\r') * 2 > Text.count('\n');
1670   }
1671 
1672   bool
1673   hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1674     for (const AnnotatedLine *Line : Lines) {
1675       if (hasCpp03IncompatibleFormat(Line->Children))
1676         return true;
1677       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
1678         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1679           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1680             return true;
1681           if (Tok->is(TT_TemplateCloser) &&
1682               Tok->Previous->is(TT_TemplateCloser))
1683             return true;
1684         }
1685       }
1686     }
1687     return false;
1688   }
1689 
1690   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1691     int AlignmentDiff = 0;
1692     for (const AnnotatedLine *Line : Lines) {
1693       AlignmentDiff += countVariableAlignments(Line->Children);
1694       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
1695         if (!Tok->is(TT_PointerOrReference))
1696           continue;
1697         bool SpaceBefore =
1698             Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1699         bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
1700                           Tok->Next->WhitespaceRange.getEnd();
1701         if (SpaceBefore && !SpaceAfter)
1702           ++AlignmentDiff;
1703         if (!SpaceBefore && SpaceAfter)
1704           --AlignmentDiff;
1705       }
1706     }
1707     return AlignmentDiff;
1708   }
1709 
1710   void
1711   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1712     bool HasBinPackedFunction = false;
1713     bool HasOnePerLineFunction = false;
1714     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1715       if (!AnnotatedLines[i]->First->Next)
1716         continue;
1717       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1718       while (Tok->Next) {
1719         if (Tok->PackingKind == PPK_BinPacked)
1720           HasBinPackedFunction = true;
1721         if (Tok->PackingKind == PPK_OnePerLine)
1722           HasOnePerLineFunction = true;
1723 
1724         Tok = Tok->Next;
1725       }
1726     }
1727     if (Style.DerivePointerAlignment)
1728       Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
1729                                    ? FormatStyle::PAS_Left
1730                                    : FormatStyle::PAS_Right;
1731     if (Style.Standard == FormatStyle::LS_Auto)
1732       Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
1733                            ? FormatStyle::LS_Cpp11
1734                            : FormatStyle::LS_Cpp03;
1735     BinPackInconclusiveFunctions =
1736         HasBinPackedFunction || !HasOnePerLineFunction;
1737   }
1738 
1739   bool BinPackInconclusiveFunctions;
1740   bool *IncompleteFormat;
1741 };
1742 
1743 // This class clean up the erroneous/redundant code around the given ranges in
1744 // file.
1745 class Cleaner : public TokenAnalyzer {
1746 public:
1747   Cleaner(const Environment &Env, const FormatStyle &Style)
1748       : TokenAnalyzer(Env, Style),
1749         DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}
1750 
1751   // FIXME: eliminate unused parameters.
1752   tooling::Replacements
1753   analyze(TokenAnnotator &Annotator,
1754           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1755           FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
1756     // FIXME: in the current implementation the granularity of affected range
1757     // is an annotated line. However, this is not sufficient. Furthermore,
1758     // redundant code introduced by replacements does not necessarily
1759     // intercept with ranges of replacements that result in the redundancy.
1760     // To determine if some redundant code is actually introduced by
1761     // replacements(e.g. deletions), we need to come up with a more
1762     // sophisticated way of computing affected ranges.
1763     AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
1764                                           AnnotatedLines.end());
1765 
1766     checkEmptyNamespace(AnnotatedLines);
1767 
1768     return generateFixes();
1769   }
1770 
1771 private:
1772   bool containsOnlyComments(const AnnotatedLine &Line) {
1773     for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) {
1774       if (Tok->isNot(tok::comment))
1775         return false;
1776     }
1777     return true;
1778   }
1779 
1780   // Iterate through all lines and remove any empty (nested) namespaces.
1781   void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1782     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1783       auto &Line = *AnnotatedLines[i];
1784       if (Line.startsWith(tok::kw_namespace) ||
1785           Line.startsWith(tok::kw_inline, tok::kw_namespace)) {
1786         checkEmptyNamespace(AnnotatedLines, i, i);
1787       }
1788     }
1789 
1790     for (auto Line : DeletedLines) {
1791       FormatToken *Tok = AnnotatedLines[Line]->First;
1792       while (Tok) {
1793         deleteToken(Tok);
1794         Tok = Tok->Next;
1795       }
1796     }
1797   }
1798 
1799   // The function checks if the namespace, which starts from \p CurrentLine, and
1800   // its nested namespaces are empty and delete them if they are empty. It also
1801   // sets \p NewLine to the last line checked.
1802   // Returns true if the current namespace is empty.
1803   bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1804                            unsigned CurrentLine, unsigned &NewLine) {
1805     unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
1806     if (Style.BraceWrapping.AfterNamespace) {
1807       // If the left brace is in a new line, we should consume it first so that
1808       // it does not make the namespace non-empty.
1809       // FIXME: error handling if there is no left brace.
1810       if (!AnnotatedLines[++CurrentLine]->startsWith(tok::l_brace)) {
1811         NewLine = CurrentLine;
1812         return false;
1813       }
1814     } else if (!AnnotatedLines[CurrentLine]->endsWith(tok::l_brace)) {
1815       return false;
1816     }
1817     while (++CurrentLine < End) {
1818       if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace))
1819         break;
1820 
1821       if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
1822           AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
1823                                                   tok::kw_namespace)) {
1824         if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine))
1825           return false;
1826         CurrentLine = NewLine;
1827         continue;
1828       }
1829 
1830       if (containsOnlyComments(*AnnotatedLines[CurrentLine]))
1831         continue;
1832 
1833       // If there is anything other than comments or nested namespaces in the
1834       // current namespace, the namespace cannot be empty.
1835       NewLine = CurrentLine;
1836       return false;
1837     }
1838 
1839     NewLine = CurrentLine;
1840     if (CurrentLine >= End)
1841       return false;
1842 
1843     // Check if the empty namespace is actually affected by changed ranges.
1844     if (!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange(
1845             AnnotatedLines[InitLine]->First->Tok.getLocation(),
1846             AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc())))
1847       return false;
1848 
1849     for (unsigned i = InitLine; i <= CurrentLine; ++i) {
1850       DeletedLines.insert(i);
1851     }
1852 
1853     return true;
1854   }
1855 
1856   // Delete the given token.
1857   inline void deleteToken(FormatToken *Tok) {
1858     if (Tok)
1859       DeletedTokens.insert(Tok);
1860   }
1861 
1862   tooling::Replacements generateFixes() {
1863     tooling::Replacements Fixes;
1864     std::vector<FormatToken *> Tokens;
1865     std::copy(DeletedTokens.begin(), DeletedTokens.end(),
1866               std::back_inserter(Tokens));
1867 
1868     // Merge multiple continuous token deletions into one big deletion so that
1869     // the number of replacements can be reduced. This makes computing affected
1870     // ranges more efficient when we run reformat on the changed code.
1871     unsigned Idx = 0;
1872     while (Idx < Tokens.size()) {
1873       unsigned St = Idx, End = Idx;
1874       while ((End + 1) < Tokens.size() &&
1875              Tokens[End]->Next == Tokens[End + 1]) {
1876         End++;
1877       }
1878       auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
1879                                               Tokens[End]->Tok.getEndLoc());
1880       Fixes.insert(tooling::Replacement(Env.getSourceManager(), SR, ""));
1881       Idx = End + 1;
1882     }
1883 
1884     return Fixes;
1885   }
1886 
1887   // Class for less-than inequality comparason for the set `RedundantTokens`.
1888   // We store tokens in the order they appear in the translation unit so that
1889   // we do not need to sort them in `generateFixes()`.
1890   struct FormatTokenLess {
1891     FormatTokenLess(const SourceManager &SM) : SM(SM) {}
1892 
1893     bool operator()(const FormatToken *LHS, const FormatToken *RHS) {
1894       return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(),
1895                                           RHS->Tok.getLocation());
1896     }
1897     const SourceManager &SM;
1898   };
1899 
1900   // Tokens to be deleted.
1901   std::set<FormatToken *, FormatTokenLess> DeletedTokens;
1902   // The line numbers of lines to be deleted.
1903   std::set<unsigned> DeletedLines;
1904 };
1905 
1906 struct IncludeDirective {
1907   StringRef Filename;
1908   StringRef Text;
1909   unsigned Offset;
1910   int Category;
1911 };
1912 
1913 } // end anonymous namespace
1914 
1915 // Determines whether 'Ranges' intersects with ('Start', 'End').
1916 static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
1917                          unsigned End) {
1918   for (auto Range : Ranges) {
1919     if (Range.getOffset() < End &&
1920         Range.getOffset() + Range.getLength() > Start)
1921       return true;
1922   }
1923   return false;
1924 }
1925 
1926 // Sorts a block of includes given by 'Includes' alphabetically adding the
1927 // necessary replacement to 'Replaces'. 'Includes' must be in strict source
1928 // order.
1929 static void sortIncludes(const FormatStyle &Style,
1930                          const SmallVectorImpl<IncludeDirective> &Includes,
1931                          ArrayRef<tooling::Range> Ranges, StringRef FileName,
1932                          tooling::Replacements &Replaces, unsigned *Cursor) {
1933   if (!affectsRange(Ranges, Includes.front().Offset,
1934                     Includes.back().Offset + Includes.back().Text.size()))
1935     return;
1936   SmallVector<unsigned, 16> Indices;
1937   for (unsigned i = 0, e = Includes.size(); i != e; ++i)
1938     Indices.push_back(i);
1939   std::stable_sort(
1940       Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
1941         return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
1942                std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
1943       });
1944 
1945   // If the #includes are out of order, we generate a single replacement fixing
1946   // the entire block. Otherwise, no replacement is generated.
1947   bool OutOfOrder = false;
1948   for (unsigned i = 1, e = Indices.size(); i != e; ++i) {
1949     if (Indices[i] != i) {
1950       OutOfOrder = true;
1951       break;
1952     }
1953   }
1954   if (!OutOfOrder)
1955     return;
1956 
1957   std::string result;
1958   bool CursorMoved = false;
1959   for (unsigned Index : Indices) {
1960     if (!result.empty())
1961       result += "\n";
1962     result += Includes[Index].Text;
1963 
1964     if (Cursor && !CursorMoved) {
1965       unsigned Start = Includes[Index].Offset;
1966       unsigned End = Start + Includes[Index].Text.size();
1967       if (*Cursor >= Start && *Cursor < End) {
1968         *Cursor = Includes.front().Offset + result.size() + *Cursor - End;
1969         CursorMoved = true;
1970       }
1971     }
1972   }
1973 
1974   // Sorting #includes shouldn't change their total number of characters.
1975   // This would otherwise mess up 'Ranges'.
1976   assert(result.size() ==
1977          Includes.back().Offset + Includes.back().Text.size() -
1978              Includes.front().Offset);
1979 
1980   Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset,
1981                                        result.size(), result));
1982 }
1983 
1984 tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
1985                                    ArrayRef<tooling::Range> Ranges,
1986                                    StringRef FileName, unsigned *Cursor) {
1987   tooling::Replacements Replaces;
1988   if (!Style.SortIncludes)
1989     return Replaces;
1990 
1991   unsigned Prev = 0;
1992   unsigned SearchFrom = 0;
1993   llvm::Regex IncludeRegex(
1994       R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
1995   SmallVector<StringRef, 4> Matches;
1996   SmallVector<IncludeDirective, 16> IncludesInBlock;
1997 
1998   // In compiled files, consider the first #include to be the main #include of
1999   // the file if it is not a system #include. This ensures that the header
2000   // doesn't have hidden dependencies
2001   // (http://llvm.org/docs/CodingStandards.html#include-style).
2002   //
2003   // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
2004   // cases where the first #include is unlikely to be the main header.
2005   bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
2006                   FileName.endswith(".cpp") || FileName.endswith(".c++") ||
2007                   FileName.endswith(".cxx") || FileName.endswith(".m") ||
2008                   FileName.endswith(".mm");
2009   StringRef FileStem = llvm::sys::path::stem(FileName);
2010   bool FirstIncludeBlock = true;
2011   bool MainIncludeFound = false;
2012 
2013   // Create pre-compiled regular expressions for the #include categories.
2014   SmallVector<llvm::Regex, 4> CategoryRegexs;
2015   for (const auto &Category : Style.IncludeCategories)
2016     CategoryRegexs.emplace_back(Category.Regex);
2017 
2018   bool FormattingOff = false;
2019 
2020   for (;;) {
2021     auto Pos = Code.find('\n', SearchFrom);
2022     StringRef Line =
2023         Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
2024 
2025     StringRef Trimmed = Line.trim();
2026     if (Trimmed == "// clang-format off")
2027       FormattingOff = true;
2028     else if (Trimmed == "// clang-format on")
2029       FormattingOff = false;
2030 
2031     if (!FormattingOff && !Line.endswith("\\")) {
2032       if (IncludeRegex.match(Line, &Matches)) {
2033         StringRef IncludeName = Matches[2];
2034         int Category = INT_MAX;
2035         for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
2036           if (CategoryRegexs[i].match(IncludeName)) {
2037             Category = Style.IncludeCategories[i].Priority;
2038             break;
2039           }
2040         }
2041         if (IsSource && !MainIncludeFound && Category > 0 &&
2042             FirstIncludeBlock && IncludeName.startswith("\"")) {
2043           StringRef HeaderStem =
2044               llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
2045           if (FileStem.startswith(HeaderStem)) {
2046             llvm::Regex MainIncludeRegex(
2047                 (HeaderStem + Style.IncludeIsMainRegex).str());
2048             if (MainIncludeRegex.match(FileStem)) {
2049               Category = 0;
2050               MainIncludeFound = true;
2051             }
2052           }
2053         }
2054         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
2055       } else if (!IncludesInBlock.empty()) {
2056         sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
2057                      Cursor);
2058         IncludesInBlock.clear();
2059         FirstIncludeBlock = false;
2060       }
2061       Prev = Pos + 1;
2062     }
2063     if (Pos == StringRef::npos || Pos + 1 == Code.size())
2064       break;
2065     SearchFrom = Pos + 1;
2066   }
2067   if (!IncludesInBlock.empty())
2068     sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
2069   return Replaces;
2070 }
2071 
2072 template <typename T>
2073 static tooling::Replacements
2074 processReplacements(T ProcessFunc, StringRef Code,
2075                     const tooling::Replacements &Replaces,
2076                     const FormatStyle &Style) {
2077   if (Replaces.empty())
2078     return tooling::Replacements();
2079 
2080   std::string NewCode = applyAllReplacements(Code, Replaces);
2081   std::vector<tooling::Range> ChangedRanges =
2082       tooling::calculateChangedRanges(Replaces);
2083   StringRef FileName = Replaces.begin()->getFilePath();
2084 
2085   tooling::Replacements FormatReplaces =
2086       ProcessFunc(Style, NewCode, ChangedRanges, FileName);
2087 
2088   return mergeReplacements(Replaces, FormatReplaces);
2089 }
2090 
2091 tooling::Replacements formatReplacements(StringRef Code,
2092                                          const tooling::Replacements &Replaces,
2093                                          const FormatStyle &Style) {
2094   // We need to use lambda function here since there are two versions of
2095   // `reformat`.
2096   auto Reformat = [](const FormatStyle &Style, StringRef Code,
2097                      std::vector<tooling::Range> Ranges,
2098                      StringRef FileName) -> tooling::Replacements {
2099     return reformat(Style, Code, Ranges, FileName);
2100   };
2101   return processReplacements(Reformat, Code, Replaces, Style);
2102 }
2103 
2104 tooling::Replacements
2105 cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
2106                           const FormatStyle &Style) {
2107   // We need to use lambda function here since there are two versions of
2108   // `cleanup`.
2109   auto Cleanup = [](const FormatStyle &Style, StringRef Code,
2110                     std::vector<tooling::Range> Ranges,
2111                     StringRef FileName) -> tooling::Replacements {
2112     return cleanup(Style, Code, Ranges, FileName);
2113   };
2114   return processReplacements(Cleanup, Code, Replaces, Style);
2115 }
2116 
2117 tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SM,
2118                                FileID ID, ArrayRef<CharSourceRange> Ranges,
2119                                bool *IncompleteFormat) {
2120   FormatStyle Expanded = expandPresets(Style);
2121   if (Expanded.DisableFormat)
2122     return tooling::Replacements();
2123 
2124   Environment Env(SM, ID, Ranges);
2125   Formatter Format(Env, Expanded, IncompleteFormat);
2126   return Format.process();
2127 }
2128 
2129 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
2130                                ArrayRef<tooling::Range> Ranges,
2131                                StringRef FileName, bool *IncompleteFormat) {
2132   FormatStyle Expanded = expandPresets(Style);
2133   if (Expanded.DisableFormat)
2134     return tooling::Replacements();
2135 
2136   std::unique_ptr<Environment> Env =
2137       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
2138   Formatter Format(*Env, Expanded, IncompleteFormat);
2139   return Format.process();
2140 }
2141 
2142 tooling::Replacements cleanup(const FormatStyle &Style, SourceManager &SM,
2143                               FileID ID, ArrayRef<CharSourceRange> Ranges) {
2144   Environment Env(SM, ID, Ranges);
2145   Cleaner Clean(Env, Style);
2146   return Clean.process();
2147 }
2148 
2149 tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
2150                               ArrayRef<tooling::Range> Ranges,
2151                               StringRef FileName) {
2152   std::unique_ptr<Environment> Env =
2153       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
2154   Cleaner Clean(*Env, Style);
2155   return Clean.process();
2156 }
2157 
2158 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
2159   LangOptions LangOpts;
2160   LangOpts.CPlusPlus = 1;
2161   LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2162   LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2163   LangOpts.LineComment = 1;
2164   bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
2165   LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
2166   LangOpts.Bool = 1;
2167   LangOpts.ObjC1 = 1;
2168   LangOpts.ObjC2 = 1;
2169   LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
2170   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
2171   return LangOpts;
2172 }
2173 
2174 const char *StyleOptionHelpDescription =
2175     "Coding style, currently supports:\n"
2176     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
2177     "Use -style=file to load style configuration from\n"
2178     ".clang-format file located in one of the parent\n"
2179     "directories of the source file (or current\n"
2180     "directory for stdin).\n"
2181     "Use -style=\"{key: value, ...}\" to set specific\n"
2182     "parameters, e.g.:\n"
2183     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
2184 
2185 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
2186   if (FileName.endswith(".java"))
2187     return FormatStyle::LK_Java;
2188   if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
2189     return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
2190   if (FileName.endswith_lower(".proto") ||
2191       FileName.endswith_lower(".protodevel"))
2192     return FormatStyle::LK_Proto;
2193   if (FileName.endswith_lower(".td"))
2194     return FormatStyle::LK_TableGen;
2195   return FormatStyle::LK_Cpp;
2196 }
2197 
2198 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
2199                      StringRef FallbackStyle, vfs::FileSystem *FS) {
2200   if (!FS) {
2201     FS = vfs::getRealFileSystem().get();
2202   }
2203   FormatStyle Style = getLLVMStyle();
2204   Style.Language = getLanguageByFileName(FileName);
2205   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
2206     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
2207                  << "\" using LLVM style\n";
2208     return Style;
2209   }
2210 
2211   if (StyleName.startswith("{")) {
2212     // Parse YAML/JSON style from the command line.
2213     if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
2214       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
2215                    << FallbackStyle << " style\n";
2216     }
2217     return Style;
2218   }
2219 
2220   if (!StyleName.equals_lower("file")) {
2221     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
2222       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
2223                    << " style\n";
2224     return Style;
2225   }
2226 
2227   // Look for .clang-format/_clang-format file in the file's parent directories.
2228   SmallString<128> UnsuitableConfigFiles;
2229   SmallString<128> Path(FileName);
2230   llvm::sys::fs::make_absolute(Path);
2231   for (StringRef Directory = Path; !Directory.empty();
2232        Directory = llvm::sys::path::parent_path(Directory)) {
2233 
2234     auto Status = FS->status(Directory);
2235     if (!Status ||
2236         Status->getType() != llvm::sys::fs::file_type::directory_file) {
2237       continue;
2238     }
2239 
2240     SmallString<128> ConfigFile(Directory);
2241 
2242     llvm::sys::path::append(ConfigFile, ".clang-format");
2243     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2244 
2245     Status = FS->status(ConfigFile.str());
2246     bool IsFile =
2247         Status && (Status->getType() == llvm::sys::fs::file_type::regular_file);
2248     if (!IsFile) {
2249       // Try _clang-format too, since dotfiles are not commonly used on Windows.
2250       ConfigFile = Directory;
2251       llvm::sys::path::append(ConfigFile, "_clang-format");
2252       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2253       Status = FS->status(ConfigFile.str());
2254       IsFile = Status &&
2255                (Status->getType() == llvm::sys::fs::file_type::regular_file);
2256     }
2257 
2258     if (IsFile) {
2259       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
2260           FS->getBufferForFile(ConfigFile.str());
2261       if (std::error_code EC = Text.getError()) {
2262         llvm::errs() << EC.message() << "\n";
2263         break;
2264       }
2265       if (std::error_code ec =
2266               parseConfiguration(Text.get()->getBuffer(), &Style)) {
2267         if (ec == ParseError::Unsuitable) {
2268           if (!UnsuitableConfigFiles.empty())
2269             UnsuitableConfigFiles.append(", ");
2270           UnsuitableConfigFiles.append(ConfigFile);
2271           continue;
2272         }
2273         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
2274                      << "\n";
2275         break;
2276       }
2277       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2278       return Style;
2279     }
2280   }
2281   if (!UnsuitableConfigFiles.empty()) {
2282     llvm::errs() << "Configuration file(s) do(es) not support "
2283                  << getLanguageName(Style.Language) << ": "
2284                  << UnsuitableConfigFiles << "\n";
2285   }
2286   return Style;
2287 }
2288 
2289 } // namespace format
2290 } // namespace clang
2291