1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Format/Format.h"
17 #include "AffectedRangeManager.h"
18 #include "ContinuationIndenter.h"
19 #include "TokenAnnotator.h"
20 #include "UnwrappedLineFormatter.h"
21 #include "UnwrappedLineParser.h"
22 #include "WhitespaceManager.h"
23 #include "clang/Basic/Diagnostic.h"
24 #include "clang/Basic/DiagnosticOptions.h"
25 #include "clang/Basic/SourceManager.h"
26 #include "clang/Basic/VirtualFileSystem.h"
27 #include "clang/Lex/Lexer.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/Support/Allocator.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/Regex.h"
33 #include "llvm/Support/YAMLTraits.h"
34 #include <memory>
35 #include <queue>
36 #include <string>
37 
38 #define DEBUG_TYPE "format-formatter"
39 
40 using clang::format::FormatStyle;
41 
42 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
43 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
44 
45 namespace llvm {
46 namespace yaml {
47 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
48   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
49     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
50     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
51     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
52     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
53     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
54   }
55 };
56 
57 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
58   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
59     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
60     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
61     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
62     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
63     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
64   }
65 };
66 
67 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
68   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
69     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
70     IO.enumCase(Value, "false", FormatStyle::UT_Never);
71     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
72     IO.enumCase(Value, "true", FormatStyle::UT_Always);
73     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
74     IO.enumCase(Value, "ForContinuationAndIndentation",
75                 FormatStyle::UT_ForContinuationAndIndentation);
76   }
77 };
78 
79 template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
80   static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
81     IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
82     IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
83     IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
84   }
85 };
86 
87 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
88   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
89     IO.enumCase(Value, "None", FormatStyle::SFS_None);
90     IO.enumCase(Value, "false", FormatStyle::SFS_None);
91     IO.enumCase(Value, "All", FormatStyle::SFS_All);
92     IO.enumCase(Value, "true", FormatStyle::SFS_All);
93     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
94     IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
95   }
96 };
97 
98 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
99   static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
100     IO.enumCase(Value, "All", FormatStyle::BOS_All);
101     IO.enumCase(Value, "true", FormatStyle::BOS_All);
102     IO.enumCase(Value, "None", FormatStyle::BOS_None);
103     IO.enumCase(Value, "false", FormatStyle::BOS_None);
104     IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
105   }
106 };
107 
108 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
109   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
110     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
111     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
112     IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
113     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
114     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
115     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
116     IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
117     IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
118   }
119 };
120 
121 template <>
122 struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
123   static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
124     IO.enumCase(Value, "None", FormatStyle::RTBS_None);
125     IO.enumCase(Value, "All", FormatStyle::RTBS_All);
126     IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
127     IO.enumCase(Value, "TopLevelDefinitions",
128                 FormatStyle::RTBS_TopLevelDefinitions);
129     IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
130   }
131 };
132 
133 template <>
134 struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
135   static void
136   enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
137     IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
138     IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
139     IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
140 
141     // For backward compatibility.
142     IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
143     IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
144   }
145 };
146 
147 template <>
148 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
149   static void enumeration(IO &IO,
150                           FormatStyle::NamespaceIndentationKind &Value) {
151     IO.enumCase(Value, "None", FormatStyle::NI_None);
152     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
153     IO.enumCase(Value, "All", FormatStyle::NI_All);
154   }
155 };
156 
157 template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
158   static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
159     IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
160     IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
161     IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
162 
163     // For backward compatibility.
164     IO.enumCase(Value, "true", FormatStyle::BAS_Align);
165     IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
166   }
167 };
168 
169 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
170   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
171     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
172     IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
173     IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
174 
175     // For backward compatibility.
176     IO.enumCase(Value, "true", FormatStyle::PAS_Left);
177     IO.enumCase(Value, "false", FormatStyle::PAS_Right);
178   }
179 };
180 
181 template <>
182 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
183   static void enumeration(IO &IO,
184                           FormatStyle::SpaceBeforeParensOptions &Value) {
185     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
186     IO.enumCase(Value, "ControlStatements",
187                 FormatStyle::SBPO_ControlStatements);
188     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
189 
190     // For backward compatibility.
191     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
192     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
193   }
194 };
195 
196 template <> struct MappingTraits<FormatStyle> {
197   static void mapping(IO &IO, FormatStyle &Style) {
198     // When reading, read the language first, we need it for getPredefinedStyle.
199     IO.mapOptional("Language", Style.Language);
200 
201     if (IO.outputting()) {
202       StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
203                                  "Mozilla", "WebKit", "GNU"};
204       ArrayRef<StringRef> Styles(StylesArray);
205       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
206         StringRef StyleName(Styles[i]);
207         FormatStyle PredefinedStyle;
208         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
209             Style == PredefinedStyle) {
210           IO.mapOptional("# BasedOnStyle", StyleName);
211           break;
212         }
213       }
214     } else {
215       StringRef BasedOnStyle;
216       IO.mapOptional("BasedOnStyle", BasedOnStyle);
217       if (!BasedOnStyle.empty()) {
218         FormatStyle::LanguageKind OldLanguage = Style.Language;
219         FormatStyle::LanguageKind Language =
220             ((FormatStyle *)IO.getContext())->Language;
221         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
222           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
223           return;
224         }
225         Style.Language = OldLanguage;
226       }
227     }
228 
229     // For backward compatibility.
230     if (!IO.outputting()) {
231       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
232       IO.mapOptional("IndentFunctionDeclarationAfterType",
233                      Style.IndentWrappedFunctionNames);
234       IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
235       IO.mapOptional("SpaceAfterControlStatementKeyword",
236                      Style.SpaceBeforeParens);
237     }
238 
239     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
240     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
241     IO.mapOptional("AlignConsecutiveAssignments",
242                    Style.AlignConsecutiveAssignments);
243     IO.mapOptional("AlignConsecutiveDeclarations",
244                    Style.AlignConsecutiveDeclarations);
245     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
246     IO.mapOptional("AlignOperands", Style.AlignOperands);
247     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
248     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
249                    Style.AllowAllParametersOfDeclarationOnNextLine);
250     IO.mapOptional("AllowShortBlocksOnASingleLine",
251                    Style.AllowShortBlocksOnASingleLine);
252     IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
253                    Style.AllowShortCaseLabelsOnASingleLine);
254     IO.mapOptional("AllowShortFunctionsOnASingleLine",
255                    Style.AllowShortFunctionsOnASingleLine);
256     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
257                    Style.AllowShortIfStatementsOnASingleLine);
258     IO.mapOptional("AllowShortLoopsOnASingleLine",
259                    Style.AllowShortLoopsOnASingleLine);
260     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
261                    Style.AlwaysBreakAfterDefinitionReturnType);
262     IO.mapOptional("AlwaysBreakAfterReturnType",
263                    Style.AlwaysBreakAfterReturnType);
264     // If AlwaysBreakAfterDefinitionReturnType was specified but
265     // AlwaysBreakAfterReturnType was not, initialize the latter from the
266     // former for backwards compatibility.
267     if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
268         Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
269       if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
270         Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
271       else if (Style.AlwaysBreakAfterDefinitionReturnType ==
272                FormatStyle::DRTBS_TopLevel)
273         Style.AlwaysBreakAfterReturnType =
274             FormatStyle::RTBS_TopLevelDefinitions;
275     }
276 
277     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
278                    Style.AlwaysBreakBeforeMultilineStrings);
279     IO.mapOptional("AlwaysBreakTemplateDeclarations",
280                    Style.AlwaysBreakTemplateDeclarations);
281     IO.mapOptional("BinPackArguments", Style.BinPackArguments);
282     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
283     IO.mapOptional("BraceWrapping", Style.BraceWrapping);
284     IO.mapOptional("BreakBeforeBinaryOperators",
285                    Style.BreakBeforeBinaryOperators);
286     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
287     IO.mapOptional("BreakBeforeTernaryOperators",
288                    Style.BreakBeforeTernaryOperators);
289     IO.mapOptional("BreakConstructorInitializersBeforeComma",
290                    Style.BreakConstructorInitializersBeforeComma);
291     IO.mapOptional("BreakAfterJavaFieldAnnotations",
292                    Style.BreakAfterJavaFieldAnnotations);
293     IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
294     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
295     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
296     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
297                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
298     IO.mapOptional("ConstructorInitializerIndentWidth",
299                    Style.ConstructorInitializerIndentWidth);
300     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
301     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
302     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
303     IO.mapOptional("DisableFormat", Style.DisableFormat);
304     IO.mapOptional("ExperimentalAutoDetectBinPacking",
305                    Style.ExperimentalAutoDetectBinPacking);
306     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
307     IO.mapOptional("IncludeCategories", Style.IncludeCategories);
308     IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
309     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
310     IO.mapOptional("IndentWidth", Style.IndentWidth);
311     IO.mapOptional("IndentWrappedFunctionNames",
312                    Style.IndentWrappedFunctionNames);
313     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
314                    Style.KeepEmptyLinesAtTheStartOfBlocks);
315     IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
316     IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
317     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
318     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
319     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
320     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
321     IO.mapOptional("ObjCSpaceBeforeProtocolList",
322                    Style.ObjCSpaceBeforeProtocolList);
323     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
324                    Style.PenaltyBreakBeforeFirstCallParameter);
325     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
326     IO.mapOptional("PenaltyBreakFirstLessLess",
327                    Style.PenaltyBreakFirstLessLess);
328     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
329     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
330     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
331                    Style.PenaltyReturnTypeOnItsOwnLine);
332     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
333     IO.mapOptional("ReflowComments", Style.ReflowComments);
334     IO.mapOptional("SortIncludes", Style.SortIncludes);
335     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
336     IO.mapOptional("SpaceBeforeAssignmentOperators",
337                    Style.SpaceBeforeAssignmentOperators);
338     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
339     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
340     IO.mapOptional("SpacesBeforeTrailingComments",
341                    Style.SpacesBeforeTrailingComments);
342     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
343     IO.mapOptional("SpacesInContainerLiterals",
344                    Style.SpacesInContainerLiterals);
345     IO.mapOptional("SpacesInCStyleCastParentheses",
346                    Style.SpacesInCStyleCastParentheses);
347     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
348     IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
349     IO.mapOptional("Standard", Style.Standard);
350     IO.mapOptional("TabWidth", Style.TabWidth);
351     IO.mapOptional("UseTab", Style.UseTab);
352     IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
353   }
354 };
355 
356 template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
357   static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
358     IO.mapOptional("AfterClass", Wrapping.AfterClass);
359     IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
360     IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
361     IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
362     IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
363     IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
364     IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
365     IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
366     IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
367     IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
368     IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
369   }
370 };
371 
372 template <> struct MappingTraits<FormatStyle::IncludeCategory> {
373   static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
374     IO.mapOptional("Regex", Category.Regex);
375     IO.mapOptional("Priority", Category.Priority);
376   }
377 };
378 
379 // Allows to read vector<FormatStyle> while keeping default values.
380 // IO.getContext() should contain a pointer to the FormatStyle structure, that
381 // will be used to get default values for missing keys.
382 // If the first element has no Language specified, it will be treated as the
383 // default one for the following elements.
384 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
385   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
386     return Seq.size();
387   }
388   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
389                               size_t Index) {
390     if (Index >= Seq.size()) {
391       assert(Index == Seq.size());
392       FormatStyle Template;
393       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
394         Template = Seq[0];
395       } else {
396         Template = *((const FormatStyle *)IO.getContext());
397         Template.Language = FormatStyle::LK_None;
398       }
399       Seq.resize(Index + 1, Template);
400     }
401     return Seq[Index];
402   }
403 };
404 } // namespace yaml
405 } // namespace llvm
406 
407 namespace clang {
408 namespace format {
409 
410 const std::error_category &getParseCategory() {
411   static ParseErrorCategory C;
412   return C;
413 }
414 std::error_code make_error_code(ParseError e) {
415   return std::error_code(static_cast<int>(e), getParseCategory());
416 }
417 
418 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
419   return "clang-format.parse_error";
420 }
421 
422 std::string ParseErrorCategory::message(int EV) const {
423   switch (static_cast<ParseError>(EV)) {
424   case ParseError::Success:
425     return "Success";
426   case ParseError::Error:
427     return "Invalid argument";
428   case ParseError::Unsuitable:
429     return "Unsuitable";
430   }
431   llvm_unreachable("unexpected parse error");
432 }
433 
434 static FormatStyle expandPresets(const FormatStyle &Style) {
435   if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
436     return Style;
437   FormatStyle Expanded = Style;
438   Expanded.BraceWrapping = {false, false, false, false, false, false,
439                             false, false, false, false, false};
440   switch (Style.BreakBeforeBraces) {
441   case FormatStyle::BS_Linux:
442     Expanded.BraceWrapping.AfterClass = true;
443     Expanded.BraceWrapping.AfterFunction = true;
444     Expanded.BraceWrapping.AfterNamespace = true;
445     break;
446   case FormatStyle::BS_Mozilla:
447     Expanded.BraceWrapping.AfterClass = true;
448     Expanded.BraceWrapping.AfterEnum = true;
449     Expanded.BraceWrapping.AfterFunction = true;
450     Expanded.BraceWrapping.AfterStruct = true;
451     Expanded.BraceWrapping.AfterUnion = true;
452     break;
453   case FormatStyle::BS_Stroustrup:
454     Expanded.BraceWrapping.AfterFunction = true;
455     Expanded.BraceWrapping.BeforeCatch = true;
456     Expanded.BraceWrapping.BeforeElse = true;
457     break;
458   case FormatStyle::BS_Allman:
459     Expanded.BraceWrapping.AfterClass = true;
460     Expanded.BraceWrapping.AfterControlStatement = true;
461     Expanded.BraceWrapping.AfterEnum = true;
462     Expanded.BraceWrapping.AfterFunction = true;
463     Expanded.BraceWrapping.AfterNamespace = true;
464     Expanded.BraceWrapping.AfterObjCDeclaration = true;
465     Expanded.BraceWrapping.AfterStruct = true;
466     Expanded.BraceWrapping.BeforeCatch = true;
467     Expanded.BraceWrapping.BeforeElse = true;
468     break;
469   case FormatStyle::BS_GNU:
470     Expanded.BraceWrapping = {true, true, true, true, true, true,
471                               true, true, true, true, true};
472     break;
473   case FormatStyle::BS_WebKit:
474     Expanded.BraceWrapping.AfterFunction = true;
475     break;
476   default:
477     break;
478   }
479   return Expanded;
480 }
481 
482 FormatStyle getLLVMStyle() {
483   FormatStyle LLVMStyle;
484   LLVMStyle.Language = FormatStyle::LK_Cpp;
485   LLVMStyle.AccessModifierOffset = -2;
486   LLVMStyle.AlignEscapedNewlinesLeft = false;
487   LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
488   LLVMStyle.AlignOperands = true;
489   LLVMStyle.AlignTrailingComments = true;
490   LLVMStyle.AlignConsecutiveAssignments = false;
491   LLVMStyle.AlignConsecutiveDeclarations = false;
492   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
493   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
494   LLVMStyle.AllowShortBlocksOnASingleLine = false;
495   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
496   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
497   LLVMStyle.AllowShortLoopsOnASingleLine = false;
498   LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
499   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
500   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
501   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
502   LLVMStyle.BinPackParameters = true;
503   LLVMStyle.BinPackArguments = true;
504   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
505   LLVMStyle.BreakBeforeTernaryOperators = true;
506   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
507   LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
508                              false, false, false, false, false};
509   LLVMStyle.BreakAfterJavaFieldAnnotations = false;
510   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
511   LLVMStyle.BreakStringLiterals = true;
512   LLVMStyle.ColumnLimit = 80;
513   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
514   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
515   LLVMStyle.ConstructorInitializerIndentWidth = 4;
516   LLVMStyle.ContinuationIndentWidth = 4;
517   LLVMStyle.Cpp11BracedListStyle = true;
518   LLVMStyle.DerivePointerAlignment = false;
519   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
520   LLVMStyle.ForEachMacros.push_back("foreach");
521   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
522   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
523   LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
524                                  {"^(<|\"(gtest|isl|json)/)", 3},
525                                  {".*", 1}};
526   LLVMStyle.IncludeIsMainRegex = "$";
527   LLVMStyle.IndentCaseLabels = false;
528   LLVMStyle.IndentWrappedFunctionNames = false;
529   LLVMStyle.IndentWidth = 2;
530   LLVMStyle.TabWidth = 8;
531   LLVMStyle.MaxEmptyLinesToKeep = 1;
532   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
533   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
534   LLVMStyle.ObjCBlockIndentWidth = 2;
535   LLVMStyle.ObjCSpaceAfterProperty = false;
536   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
537   LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
538   LLVMStyle.SpacesBeforeTrailingComments = 1;
539   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
540   LLVMStyle.UseTab = FormatStyle::UT_Never;
541   LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
542   LLVMStyle.ReflowComments = true;
543   LLVMStyle.SpacesInParentheses = false;
544   LLVMStyle.SpacesInSquareBrackets = false;
545   LLVMStyle.SpaceInEmptyParentheses = false;
546   LLVMStyle.SpacesInContainerLiterals = true;
547   LLVMStyle.SpacesInCStyleCastParentheses = false;
548   LLVMStyle.SpaceAfterCStyleCast = false;
549   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
550   LLVMStyle.SpaceBeforeAssignmentOperators = true;
551   LLVMStyle.SpacesInAngles = false;
552 
553   LLVMStyle.PenaltyBreakComment = 300;
554   LLVMStyle.PenaltyBreakFirstLessLess = 120;
555   LLVMStyle.PenaltyBreakString = 1000;
556   LLVMStyle.PenaltyExcessCharacter = 1000000;
557   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
558   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
559 
560   LLVMStyle.DisableFormat = false;
561   LLVMStyle.SortIncludes = true;
562 
563   return LLVMStyle;
564 }
565 
566 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
567   FormatStyle GoogleStyle = getLLVMStyle();
568   GoogleStyle.Language = Language;
569 
570   GoogleStyle.AccessModifierOffset = -1;
571   GoogleStyle.AlignEscapedNewlinesLeft = true;
572   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
573   GoogleStyle.AllowShortLoopsOnASingleLine = true;
574   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
575   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
576   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
577   GoogleStyle.DerivePointerAlignment = true;
578   GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
579   GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
580   GoogleStyle.IndentCaseLabels = true;
581   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
582   GoogleStyle.ObjCSpaceAfterProperty = false;
583   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
584   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
585   GoogleStyle.SpacesBeforeTrailingComments = 2;
586   GoogleStyle.Standard = FormatStyle::LS_Auto;
587 
588   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
589   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
590 
591   if (Language == FormatStyle::LK_Java) {
592     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
593     GoogleStyle.AlignOperands = false;
594     GoogleStyle.AlignTrailingComments = false;
595     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
596     GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
597     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
598     GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
599     GoogleStyle.ColumnLimit = 100;
600     GoogleStyle.SpaceAfterCStyleCast = true;
601     GoogleStyle.SpacesBeforeTrailingComments = 1;
602   } else if (Language == FormatStyle::LK_JavaScript) {
603     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
604     GoogleStyle.AlignOperands = false;
605     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
606     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
607     GoogleStyle.BreakBeforeTernaryOperators = false;
608     GoogleStyle.CommentPragmas = "@(export|return|see|visibility) ";
609     GoogleStyle.MaxEmptyLinesToKeep = 3;
610     GoogleStyle.SpacesInContainerLiterals = false;
611     GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
612   } else if (Language == FormatStyle::LK_Proto) {
613     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
614     GoogleStyle.SpacesInContainerLiterals = false;
615   }
616 
617   return GoogleStyle;
618 }
619 
620 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
621   FormatStyle ChromiumStyle = getGoogleStyle(Language);
622   if (Language == FormatStyle::LK_Java) {
623     ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
624     ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
625     ChromiumStyle.ContinuationIndentWidth = 8;
626     ChromiumStyle.IndentWidth = 4;
627   } else {
628     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
629     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
630     ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
631     ChromiumStyle.AllowShortLoopsOnASingleLine = false;
632     ChromiumStyle.BinPackParameters = false;
633     ChromiumStyle.DerivePointerAlignment = false;
634   }
635   ChromiumStyle.SortIncludes = false;
636   return ChromiumStyle;
637 }
638 
639 FormatStyle getMozillaStyle() {
640   FormatStyle MozillaStyle = getLLVMStyle();
641   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
642   MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
643   MozillaStyle.AlwaysBreakAfterReturnType =
644       FormatStyle::RTBS_TopLevelDefinitions;
645   MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
646       FormatStyle::DRTBS_TopLevel;
647   MozillaStyle.AlwaysBreakTemplateDeclarations = true;
648   MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
649   MozillaStyle.BreakConstructorInitializersBeforeComma = true;
650   MozillaStyle.ConstructorInitializerIndentWidth = 2;
651   MozillaStyle.ContinuationIndentWidth = 2;
652   MozillaStyle.Cpp11BracedListStyle = false;
653   MozillaStyle.IndentCaseLabels = true;
654   MozillaStyle.ObjCSpaceAfterProperty = true;
655   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
656   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
657   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
658   return MozillaStyle;
659 }
660 
661 FormatStyle getWebKitStyle() {
662   FormatStyle Style = getLLVMStyle();
663   Style.AccessModifierOffset = -4;
664   Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
665   Style.AlignOperands = false;
666   Style.AlignTrailingComments = false;
667   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
668   Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
669   Style.BreakConstructorInitializersBeforeComma = true;
670   Style.Cpp11BracedListStyle = false;
671   Style.ColumnLimit = 0;
672   Style.IndentWidth = 4;
673   Style.NamespaceIndentation = FormatStyle::NI_Inner;
674   Style.ObjCBlockIndentWidth = 4;
675   Style.ObjCSpaceAfterProperty = true;
676   Style.PointerAlignment = FormatStyle::PAS_Left;
677   Style.Standard = FormatStyle::LS_Cpp03;
678   return Style;
679 }
680 
681 FormatStyle getGNUStyle() {
682   FormatStyle Style = getLLVMStyle();
683   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
684   Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
685   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
686   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
687   Style.BreakBeforeTernaryOperators = true;
688   Style.Cpp11BracedListStyle = false;
689   Style.ColumnLimit = 79;
690   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
691   Style.Standard = FormatStyle::LS_Cpp03;
692   return Style;
693 }
694 
695 FormatStyle getNoStyle() {
696   FormatStyle NoStyle = getLLVMStyle();
697   NoStyle.DisableFormat = true;
698   NoStyle.SortIncludes = false;
699   return NoStyle;
700 }
701 
702 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
703                         FormatStyle *Style) {
704   if (Name.equals_lower("llvm")) {
705     *Style = getLLVMStyle();
706   } else if (Name.equals_lower("chromium")) {
707     *Style = getChromiumStyle(Language);
708   } else if (Name.equals_lower("mozilla")) {
709     *Style = getMozillaStyle();
710   } else if (Name.equals_lower("google")) {
711     *Style = getGoogleStyle(Language);
712   } else if (Name.equals_lower("webkit")) {
713     *Style = getWebKitStyle();
714   } else if (Name.equals_lower("gnu")) {
715     *Style = getGNUStyle();
716   } else if (Name.equals_lower("none")) {
717     *Style = getNoStyle();
718   } else {
719     return false;
720   }
721 
722   Style->Language = Language;
723   return true;
724 }
725 
726 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
727   assert(Style);
728   FormatStyle::LanguageKind Language = Style->Language;
729   assert(Language != FormatStyle::LK_None);
730   if (Text.trim().empty())
731     return make_error_code(ParseError::Error);
732 
733   std::vector<FormatStyle> Styles;
734   llvm::yaml::Input Input(Text);
735   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
736   // values for the fields, keys for which are missing from the configuration.
737   // Mapping also uses the context to get the language to find the correct
738   // base style.
739   Input.setContext(Style);
740   Input >> Styles;
741   if (Input.error())
742     return Input.error();
743 
744   for (unsigned i = 0; i < Styles.size(); ++i) {
745     // Ensures that only the first configuration can skip the Language option.
746     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
747       return make_error_code(ParseError::Error);
748     // Ensure that each language is configured at most once.
749     for (unsigned j = 0; j < i; ++j) {
750       if (Styles[i].Language == Styles[j].Language) {
751         DEBUG(llvm::dbgs()
752               << "Duplicate languages in the config file on positions " << j
753               << " and " << i << "\n");
754         return make_error_code(ParseError::Error);
755       }
756     }
757   }
758   // Look for a suitable configuration starting from the end, so we can
759   // find the configuration for the specific language first, and the default
760   // configuration (which can only be at slot 0) after it.
761   for (int i = Styles.size() - 1; i >= 0; --i) {
762     if (Styles[i].Language == Language ||
763         Styles[i].Language == FormatStyle::LK_None) {
764       *Style = Styles[i];
765       Style->Language = Language;
766       return make_error_code(ParseError::Success);
767     }
768   }
769   return make_error_code(ParseError::Unsuitable);
770 }
771 
772 std::string configurationAsText(const FormatStyle &Style) {
773   std::string Text;
774   llvm::raw_string_ostream Stream(Text);
775   llvm::yaml::Output Output(Stream);
776   // We use the same mapping method for input and output, so we need a non-const
777   // reference here.
778   FormatStyle NonConstStyle = expandPresets(Style);
779   Output << NonConstStyle;
780   return Stream.str();
781 }
782 
783 namespace {
784 
785 class FormatTokenLexer {
786 public:
787   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
788                    const FormatStyle &Style, encoding::Encoding Encoding)
789       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
790         LessStashed(false), Column(0), TrailingWhitespace(0),
791         SourceMgr(SourceMgr), ID(ID), Style(Style),
792         IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
793         Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
794         MacroBlockBeginRegex(Style.MacroBlockBegin),
795         MacroBlockEndRegex(Style.MacroBlockEnd) {
796     Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
797                         getFormattingLangOpts(Style)));
798     Lex->SetKeepWhitespaceMode(true);
799 
800     for (const std::string &ForEachMacro : Style.ForEachMacros)
801       ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
802     std::sort(ForEachMacros.begin(), ForEachMacros.end());
803   }
804 
805   ArrayRef<FormatToken *> lex() {
806     assert(Tokens.empty());
807     assert(FirstInLineIndex == 0);
808     do {
809       Tokens.push_back(getNextToken());
810       if (Style.Language == FormatStyle::LK_JavaScript)
811         tryParseJSRegexLiteral();
812       tryMergePreviousTokens();
813       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
814         FirstInLineIndex = Tokens.size() - 1;
815     } while (Tokens.back()->Tok.isNot(tok::eof));
816     return Tokens;
817   }
818 
819   const AdditionalKeywords &getKeywords() { return Keywords; }
820 
821 private:
822   void tryMergePreviousTokens() {
823     if (tryMerge_TMacro())
824       return;
825     if (tryMergeConflictMarkers())
826       return;
827     if (tryMergeLessLess())
828       return;
829 
830     if (Style.Language == FormatStyle::LK_JavaScript) {
831       if (tryMergeTemplateString())
832         return;
833 
834       static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
835       static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
836                                                      tok::equal};
837       static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
838                                                     tok::greaterequal};
839       static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
840       // FIXME: Investigate what token type gives the correct operator priority.
841       if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
842         return;
843       if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
844         return;
845       if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
846         return;
847       if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
848         return;
849     }
850   }
851 
852   bool tryMergeLessLess() {
853     // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
854     if (Tokens.size() < 3)
855       return false;
856 
857     bool FourthTokenIsLess = false;
858     if (Tokens.size() > 3)
859       FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
860 
861     auto First = Tokens.end() - 3;
862     if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
863         First[0]->isNot(tok::less) || FourthTokenIsLess)
864       return false;
865 
866     // Only merge if there currently is no whitespace between the two "<".
867     if (First[1]->WhitespaceRange.getBegin() !=
868         First[1]->WhitespaceRange.getEnd())
869       return false;
870 
871     First[0]->Tok.setKind(tok::lessless);
872     First[0]->TokenText = "<<";
873     First[0]->ColumnWidth += 1;
874     Tokens.erase(Tokens.end() - 2);
875     return true;
876   }
877 
878   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
879     if (Tokens.size() < Kinds.size())
880       return false;
881 
882     SmallVectorImpl<FormatToken *>::const_iterator First =
883         Tokens.end() - Kinds.size();
884     if (!First[0]->is(Kinds[0]))
885       return false;
886     unsigned AddLength = 0;
887     for (unsigned i = 1; i < Kinds.size(); ++i) {
888       if (!First[i]->is(Kinds[i]) ||
889           First[i]->WhitespaceRange.getBegin() !=
890               First[i]->WhitespaceRange.getEnd())
891         return false;
892       AddLength += First[i]->TokenText.size();
893     }
894     Tokens.resize(Tokens.size() - Kinds.size() + 1);
895     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
896                                     First[0]->TokenText.size() + AddLength);
897     First[0]->ColumnWidth += AddLength;
898     First[0]->Type = NewType;
899     return true;
900   }
901 
902   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
903   bool precedesOperand(FormatToken *Tok) {
904     // NB: This is not entirely correct, as an r_paren can introduce an operand
905     // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
906     // corner case to not matter in practice, though.
907     return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
908                         tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
909                         tok::colon, tok::question, tok::tilde) ||
910            Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
911                         tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
912                         tok::kw_typeof, Keywords.kw_instanceof,
913                         Keywords.kw_in) ||
914            Tok->isBinaryOperator();
915   }
916 
917   bool canPrecedeRegexLiteral(FormatToken *Prev) {
918     if (!Prev)
919       return true;
920 
921     // Regex literals can only follow after prefix unary operators, not after
922     // postfix unary operators. If the '++' is followed by a non-operand
923     // introducing token, the slash here is the operand and not the start of a
924     // regex.
925     if (Prev->isOneOf(tok::plusplus, tok::minusminus))
926       return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
927 
928     // The previous token must introduce an operand location where regex
929     // literals can occur.
930     if (!precedesOperand(Prev))
931       return false;
932 
933     return true;
934   }
935 
936   // Tries to parse a JavaScript Regex literal starting at the current token,
937   // if that begins with a slash and is in a location where JavaScript allows
938   // regex literals. Changes the current token to a regex literal and updates
939   // its text if successful.
940   void tryParseJSRegexLiteral() {
941     FormatToken *RegexToken = Tokens.back();
942     if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
943       return;
944 
945     FormatToken *Prev = nullptr;
946     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
947       // NB: Because previous pointers are not initialized yet, this cannot use
948       // Token.getPreviousNonComment.
949       if ((*I)->isNot(tok::comment)) {
950         Prev = *I;
951         break;
952       }
953     }
954 
955     if (!canPrecedeRegexLiteral(Prev))
956       return;
957 
958     // 'Manually' lex ahead in the current file buffer.
959     const char *Offset = Lex->getBufferLocation();
960     const char *RegexBegin = Offset - RegexToken->TokenText.size();
961     StringRef Buffer = Lex->getBuffer();
962     bool InCharacterClass = false;
963     bool HaveClosingSlash = false;
964     for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
965       // Regular expressions are terminated with a '/', which can only be
966       // escaped using '\' or a character class between '[' and ']'.
967       // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
968       switch (*Offset) {
969       case '\\':
970         // Skip the escaped character.
971         ++Offset;
972         break;
973       case '[':
974         InCharacterClass = true;
975         break;
976       case ']':
977         InCharacterClass = false;
978         break;
979       case '/':
980         if (!InCharacterClass)
981           HaveClosingSlash = true;
982         break;
983       }
984     }
985 
986     RegexToken->Type = TT_RegexLiteral;
987     // Treat regex literals like other string_literals.
988     RegexToken->Tok.setKind(tok::string_literal);
989     RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
990     RegexToken->ColumnWidth = RegexToken->TokenText.size();
991 
992     resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
993   }
994 
995   bool tryMergeTemplateString() {
996     if (Tokens.size() < 2)
997       return false;
998 
999     FormatToken *EndBacktick = Tokens.back();
1000     // Backticks get lexed as tok::unknown tokens. If a template string contains
1001     // a comment start, it gets lexed as a tok::comment, or tok::unknown if
1002     // unterminated.
1003     if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
1004                               tok::char_constant, tok::unknown))
1005       return false;
1006     size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
1007     // Unknown token that's not actually a backtick, or a comment that doesn't
1008     // contain a backtick.
1009     if (CommentBacktickPos == StringRef::npos)
1010       return false;
1011 
1012     unsigned TokenCount = 0;
1013     bool IsMultiline = false;
1014     unsigned EndColumnInFirstLine =
1015         EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
1016     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
1017       ++TokenCount;
1018       if (I[0]->IsMultiline)
1019         IsMultiline = true;
1020 
1021       // If there was a preceding template string, this must be the start of a
1022       // template string, not the end.
1023       if (I[0]->is(TT_TemplateString))
1024         return false;
1025 
1026       if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
1027         // Keep track of the rhs offset of the last token to wrap across lines -
1028         // its the rhs offset of the first line of the template string, used to
1029         // determine its width.
1030         if (I[0]->IsMultiline)
1031           EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
1032         // If the token has newlines, the token before it (if it exists) is the
1033         // rhs end of the previous line.
1034         if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
1035           EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
1036           IsMultiline = true;
1037         }
1038         continue;
1039       }
1040 
1041       Tokens.resize(Tokens.size() - TokenCount);
1042       Tokens.back()->Type = TT_TemplateString;
1043       const char *EndOffset =
1044           EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
1045       if (CommentBacktickPos != 0) {
1046         // If the backtick was not the first character (e.g. in a comment),
1047         // re-lex after the backtick position.
1048         SourceLocation Loc = EndBacktick->Tok.getLocation();
1049         resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
1050       }
1051       Tokens.back()->TokenText =
1052           StringRef(Tokens.back()->TokenText.data(),
1053                     EndOffset - Tokens.back()->TokenText.data());
1054 
1055       unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
1056       if (EndOriginalColumn == 0) {
1057         SourceLocation Loc = EndBacktick->Tok.getLocation();
1058         EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
1059       }
1060       // If the ` is further down within the token (e.g. in a comment).
1061       EndOriginalColumn += CommentBacktickPos;
1062 
1063       if (IsMultiline) {
1064         // ColumnWidth is from backtick to last token in line.
1065         // LastLineColumnWidth is 0 to backtick.
1066         // x = `some content
1067         //     until here`;
1068         Tokens.back()->ColumnWidth =
1069             EndColumnInFirstLine - Tokens.back()->OriginalColumn;
1070         // +1 for the ` itself.
1071         Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
1072         Tokens.back()->IsMultiline = true;
1073       } else {
1074         // Token simply spans from start to end, +1 for the ` itself.
1075         Tokens.back()->ColumnWidth =
1076             EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
1077       }
1078       return true;
1079     }
1080     return false;
1081   }
1082 
1083   bool tryMerge_TMacro() {
1084     if (Tokens.size() < 4)
1085       return false;
1086     FormatToken *Last = Tokens.back();
1087     if (!Last->is(tok::r_paren))
1088       return false;
1089 
1090     FormatToken *String = Tokens[Tokens.size() - 2];
1091     if (!String->is(tok::string_literal) || String->IsMultiline)
1092       return false;
1093 
1094     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1095       return false;
1096 
1097     FormatToken *Macro = Tokens[Tokens.size() - 4];
1098     if (Macro->TokenText != "_T")
1099       return false;
1100 
1101     const char *Start = Macro->TokenText.data();
1102     const char *End = Last->TokenText.data() + Last->TokenText.size();
1103     String->TokenText = StringRef(Start, End - Start);
1104     String->IsFirst = Macro->IsFirst;
1105     String->LastNewlineOffset = Macro->LastNewlineOffset;
1106     String->WhitespaceRange = Macro->WhitespaceRange;
1107     String->OriginalColumn = Macro->OriginalColumn;
1108     String->ColumnWidth = encoding::columnWidthWithTabs(
1109         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1110     String->NewlinesBefore = Macro->NewlinesBefore;
1111     String->HasUnescapedNewline = Macro->HasUnescapedNewline;
1112 
1113     Tokens.pop_back();
1114     Tokens.pop_back();
1115     Tokens.pop_back();
1116     Tokens.back() = String;
1117     return true;
1118   }
1119 
1120   bool tryMergeConflictMarkers() {
1121     if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1122       return false;
1123 
1124     // Conflict lines look like:
1125     // <marker> <text from the vcs>
1126     // For example:
1127     // >>>>>>> /file/in/file/system at revision 1234
1128     //
1129     // We merge all tokens in a line that starts with a conflict marker
1130     // into a single token with a special token type that the unwrapped line
1131     // parser will use to correctly rebuild the underlying code.
1132 
1133     FileID ID;
1134     // Get the position of the first token in the line.
1135     unsigned FirstInLineOffset;
1136     std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
1137         Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1138     StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
1139     // Calculate the offset of the start of the current line.
1140     auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
1141     if (LineOffset == StringRef::npos) {
1142       LineOffset = 0;
1143     } else {
1144       ++LineOffset;
1145     }
1146 
1147     auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
1148     StringRef LineStart;
1149     if (FirstSpace == StringRef::npos) {
1150       LineStart = Buffer.substr(LineOffset);
1151     } else {
1152       LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1153     }
1154 
1155     TokenType Type = TT_Unknown;
1156     if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
1157       Type = TT_ConflictStart;
1158     } else if (LineStart == "|||||||" || LineStart == "=======" ||
1159                LineStart == "====") {
1160       Type = TT_ConflictAlternative;
1161     } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1162       Type = TT_ConflictEnd;
1163     }
1164 
1165     if (Type != TT_Unknown) {
1166       FormatToken *Next = Tokens.back();
1167 
1168       Tokens.resize(FirstInLineIndex + 1);
1169       // We do not need to build a complete token here, as we will skip it
1170       // during parsing anyway (as we must not touch whitespace around conflict
1171       // markers).
1172       Tokens.back()->Type = Type;
1173       Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1174 
1175       Tokens.push_back(Next);
1176       return true;
1177     }
1178 
1179     return false;
1180   }
1181 
1182   FormatToken *getStashedToken() {
1183     // Create a synthesized second '>' or '<' token.
1184     Token Tok = FormatTok->Tok;
1185     StringRef TokenText = FormatTok->TokenText;
1186 
1187     unsigned OriginalColumn = FormatTok->OriginalColumn;
1188     FormatTok = new (Allocator.Allocate()) FormatToken;
1189     FormatTok->Tok = Tok;
1190     SourceLocation TokLocation =
1191         FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
1192     FormatTok->Tok.setLocation(TokLocation);
1193     FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
1194     FormatTok->TokenText = TokenText;
1195     FormatTok->ColumnWidth = 1;
1196     FormatTok->OriginalColumn = OriginalColumn + 1;
1197 
1198     return FormatTok;
1199   }
1200 
1201   FormatToken *getNextToken() {
1202     if (GreaterStashed) {
1203       GreaterStashed = false;
1204       return getStashedToken();
1205     }
1206     if (LessStashed) {
1207       LessStashed = false;
1208       return getStashedToken();
1209     }
1210 
1211     FormatTok = new (Allocator.Allocate()) FormatToken;
1212     readRawToken(*FormatTok);
1213     SourceLocation WhitespaceStart =
1214         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1215     FormatTok->IsFirst = IsFirstToken;
1216     IsFirstToken = false;
1217 
1218     // Consume and record whitespace until we find a significant token.
1219     unsigned WhitespaceLength = TrailingWhitespace;
1220     while (FormatTok->Tok.is(tok::unknown)) {
1221       StringRef Text = FormatTok->TokenText;
1222       auto EscapesNewline = [&](int pos) {
1223         // A '\r' here is just part of '\r\n'. Skip it.
1224         if (pos >= 0 && Text[pos] == '\r')
1225           --pos;
1226         // See whether there is an odd number of '\' before this.
1227         unsigned count = 0;
1228         for (; pos >= 0; --pos, ++count)
1229           if (Text[pos] != '\\')
1230             break;
1231         return count & 1;
1232       };
1233       // FIXME: This miscounts tok:unknown tokens that are not just
1234       // whitespace, e.g. a '`' character.
1235       for (int i = 0, e = Text.size(); i != e; ++i) {
1236         switch (Text[i]) {
1237         case '\n':
1238           ++FormatTok->NewlinesBefore;
1239           FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
1240           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1241           Column = 0;
1242           break;
1243         case '\r':
1244           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1245           Column = 0;
1246           break;
1247         case '\f':
1248         case '\v':
1249           Column = 0;
1250           break;
1251         case ' ':
1252           ++Column;
1253           break;
1254         case '\t':
1255           Column += Style.TabWidth - Column % Style.TabWidth;
1256           break;
1257         case '\\':
1258           if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
1259             FormatTok->Type = TT_ImplicitStringLiteral;
1260           break;
1261         default:
1262           FormatTok->Type = TT_ImplicitStringLiteral;
1263           break;
1264         }
1265         if (FormatTok->Type == TT_ImplicitStringLiteral)
1266           break;
1267       }
1268 
1269       if (FormatTok->is(TT_ImplicitStringLiteral))
1270         break;
1271       WhitespaceLength += FormatTok->Tok.getLength();
1272 
1273       readRawToken(*FormatTok);
1274     }
1275 
1276     // In case the token starts with escaped newlines, we want to
1277     // take them into account as whitespace - this pattern is quite frequent
1278     // in macro definitions.
1279     // FIXME: Add a more explicit test.
1280     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1281            FormatTok->TokenText[1] == '\n') {
1282       ++FormatTok->NewlinesBefore;
1283       WhitespaceLength += 2;
1284       FormatTok->LastNewlineOffset = 2;
1285       Column = 0;
1286       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1287     }
1288 
1289     FormatTok->WhitespaceRange = SourceRange(
1290         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1291 
1292     FormatTok->OriginalColumn = Column;
1293 
1294     TrailingWhitespace = 0;
1295     if (FormatTok->Tok.is(tok::comment)) {
1296       // FIXME: Add the trimmed whitespace to Column.
1297       StringRef UntrimmedText = FormatTok->TokenText;
1298       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1299       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1300     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1301       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1302       FormatTok->Tok.setIdentifierInfo(&Info);
1303       FormatTok->Tok.setKind(Info.getTokenID());
1304       if (Style.Language == FormatStyle::LK_Java &&
1305           FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1306                              tok::kw_operator)) {
1307         FormatTok->Tok.setKind(tok::identifier);
1308         FormatTok->Tok.setIdentifierInfo(nullptr);
1309       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1310                  FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
1311                                     tok::kw_operator)) {
1312         FormatTok->Tok.setKind(tok::identifier);
1313         FormatTok->Tok.setIdentifierInfo(nullptr);
1314       }
1315     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1316       FormatTok->Tok.setKind(tok::greater);
1317       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1318       GreaterStashed = true;
1319     } else if (FormatTok->Tok.is(tok::lessless)) {
1320       FormatTok->Tok.setKind(tok::less);
1321       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1322       LessStashed = true;
1323     }
1324 
1325     // Now FormatTok is the next non-whitespace token.
1326 
1327     StringRef Text = FormatTok->TokenText;
1328     size_t FirstNewlinePos = Text.find('\n');
1329     if (FirstNewlinePos == StringRef::npos) {
1330       // FIXME: ColumnWidth actually depends on the start column, we need to
1331       // take this into account when the token is moved.
1332       FormatTok->ColumnWidth =
1333           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1334       Column += FormatTok->ColumnWidth;
1335     } else {
1336       FormatTok->IsMultiline = true;
1337       // FIXME: ColumnWidth actually depends on the start column, we need to
1338       // take this into account when the token is moved.
1339       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1340           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1341 
1342       // The last line of the token always starts in column 0.
1343       // Thus, the length can be precomputed even in the presence of tabs.
1344       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1345           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1346           Encoding);
1347       Column = FormatTok->LastLineColumnWidth;
1348     }
1349 
1350     if (Style.Language == FormatStyle::LK_Cpp) {
1351       if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1352             Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1353                 tok::pp_define) &&
1354           std::find(ForEachMacros.begin(), ForEachMacros.end(),
1355                     FormatTok->Tok.getIdentifierInfo()) !=
1356               ForEachMacros.end()) {
1357         FormatTok->Type = TT_ForEachMacro;
1358       } else if (FormatTok->is(tok::identifier)) {
1359         if (MacroBlockBeginRegex.match(Text)) {
1360           FormatTok->Type = TT_MacroBlockBegin;
1361         } else if (MacroBlockEndRegex.match(Text)) {
1362           FormatTok->Type = TT_MacroBlockEnd;
1363         }
1364       }
1365     }
1366 
1367     return FormatTok;
1368   }
1369 
1370   FormatToken *FormatTok;
1371   bool IsFirstToken;
1372   bool GreaterStashed, LessStashed;
1373   unsigned Column;
1374   unsigned TrailingWhitespace;
1375   std::unique_ptr<Lexer> Lex;
1376   const SourceManager &SourceMgr;
1377   FileID ID;
1378   const FormatStyle &Style;
1379   IdentifierTable IdentTable;
1380   AdditionalKeywords Keywords;
1381   encoding::Encoding Encoding;
1382   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1383   // Index (in 'Tokens') of the last token that starts a new line.
1384   unsigned FirstInLineIndex;
1385   SmallVector<FormatToken *, 16> Tokens;
1386   SmallVector<IdentifierInfo *, 8> ForEachMacros;
1387 
1388   bool FormattingDisabled;
1389 
1390   llvm::Regex MacroBlockBeginRegex;
1391   llvm::Regex MacroBlockEndRegex;
1392 
1393   void readRawToken(FormatToken &Tok) {
1394     Lex->LexFromRawLexer(Tok.Tok);
1395     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1396                               Tok.Tok.getLength());
1397     // For formatting, treat unterminated string literals like normal string
1398     // literals.
1399     if (Tok.is(tok::unknown)) {
1400       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1401         Tok.Tok.setKind(tok::string_literal);
1402         Tok.IsUnterminatedLiteral = true;
1403       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1404                  Tok.TokenText == "''") {
1405         Tok.Tok.setKind(tok::string_literal);
1406       }
1407     }
1408 
1409     if (Style.Language == FormatStyle::LK_JavaScript &&
1410         Tok.is(tok::char_constant)) {
1411       Tok.Tok.setKind(tok::string_literal);
1412     }
1413 
1414     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1415                                  Tok.TokenText == "/* clang-format on */")) {
1416       FormattingDisabled = false;
1417     }
1418 
1419     Tok.Finalized = FormattingDisabled;
1420 
1421     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1422                                  Tok.TokenText == "/* clang-format off */")) {
1423       FormattingDisabled = true;
1424     }
1425   }
1426 
1427   void resetLexer(unsigned Offset) {
1428     StringRef Buffer = SourceMgr.getBufferData(ID);
1429     Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1430                         getFormattingLangOpts(Style), Buffer.begin(),
1431                         Buffer.begin() + Offset, Buffer.end()));
1432     Lex->SetKeepWhitespaceMode(true);
1433     TrailingWhitespace = 0;
1434   }
1435 };
1436 
1437 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1438   switch (Language) {
1439   case FormatStyle::LK_Cpp:
1440     return "C++";
1441   case FormatStyle::LK_Java:
1442     return "Java";
1443   case FormatStyle::LK_JavaScript:
1444     return "JavaScript";
1445   case FormatStyle::LK_Proto:
1446     return "Proto";
1447   default:
1448     return "Unknown";
1449   }
1450 }
1451 
1452 class Environment {
1453 public:
1454   Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
1455       : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
1456 
1457   Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
1458               std::unique_ptr<SourceManager> VirtualSM,
1459               std::unique_ptr<DiagnosticsEngine> Diagnostics,
1460               const std::vector<CharSourceRange> &CharRanges)
1461       : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
1462         SM(*VirtualSM), FileMgr(std::move(FileMgr)),
1463         VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
1464 
1465   // This sets up an virtual file system with file \p FileName containing \p
1466   // Code.
1467   static std::unique_ptr<Environment>
1468   CreateVirtualEnvironment(StringRef Code, StringRef FileName,
1469                            ArrayRef<tooling::Range> Ranges) {
1470     // This is referenced by `FileMgr` and will be released by `FileMgr` when it
1471     // is deleted.
1472     IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
1473         new vfs::InMemoryFileSystem);
1474     // This is passed to `SM` as reference, so the pointer has to be referenced
1475     // in `Environment` so that `FileMgr` can out-live this function scope.
1476     std::unique_ptr<FileManager> FileMgr(
1477         new FileManager(FileSystemOptions(), InMemoryFileSystem));
1478     // This is passed to `SM` as reference, so the pointer has to be referenced
1479     // by `Environment` due to the same reason above.
1480     std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
1481         IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1482         new DiagnosticOptions));
1483     // This will be stored as reference, so the pointer has to be stored in
1484     // due to the same reason above.
1485     std::unique_ptr<SourceManager> VirtualSM(
1486         new SourceManager(*Diagnostics, *FileMgr));
1487     InMemoryFileSystem->addFile(
1488         FileName, 0, llvm::MemoryBuffer::getMemBuffer(
1489                          Code, FileName, /*RequiresNullTerminator=*/false));
1490     FileID ID = VirtualSM->createFileID(
1491         FileMgr->getFile(FileName), SourceLocation(), clang::SrcMgr::C_User);
1492     assert(ID.isValid());
1493     SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
1494     std::vector<CharSourceRange> CharRanges;
1495     for (const tooling::Range &Range : Ranges) {
1496       SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
1497       SourceLocation End = Start.getLocWithOffset(Range.getLength());
1498       CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1499     }
1500     return llvm::make_unique<Environment>(ID, std::move(FileMgr),
1501                                           std::move(VirtualSM),
1502                                           std::move(Diagnostics), CharRanges);
1503   }
1504 
1505   FileID getFileID() const { return ID; }
1506 
1507   StringRef getFileName() const { return FileName; }
1508 
1509   ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
1510 
1511   const SourceManager &getSourceManager() const { return SM; }
1512 
1513 private:
1514   FileID ID;
1515   StringRef FileName;
1516   SmallVector<CharSourceRange, 8> CharRanges;
1517   SourceManager &SM;
1518 
1519   // The order of these fields are important - they should be in the same order
1520   // as they are created in `CreateVirtualEnvironment` so that they can be
1521   // deleted in the reverse order as they are created.
1522   std::unique_ptr<FileManager> FileMgr;
1523   std::unique_ptr<SourceManager> VirtualSM;
1524   std::unique_ptr<DiagnosticsEngine> Diagnostics;
1525 };
1526 
1527 class TokenAnalyzer : public UnwrappedLineConsumer {
1528 public:
1529   TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
1530       : Style(Style), Env(Env),
1531         AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
1532         UnwrappedLines(1),
1533         Encoding(encoding::detectEncoding(
1534             Env.getSourceManager().getBufferData(Env.getFileID()))) {
1535     DEBUG(llvm::dbgs() << "File encoding: "
1536                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1537                                                                : "unknown")
1538                        << "\n");
1539     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1540                        << "\n");
1541   }
1542 
1543   tooling::Replacements process() {
1544     tooling::Replacements Result;
1545     FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
1546                             Encoding);
1547 
1548     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1549                                *this);
1550     Parser.parse();
1551     assert(UnwrappedLines.rbegin()->empty());
1552     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1553          ++Run) {
1554       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1555       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1556 
1557       TokenAnnotator Annotator(Style, Tokens.getKeywords());
1558       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1559         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1560         Annotator.annotate(*AnnotatedLines.back());
1561       }
1562 
1563       tooling::Replacements RunResult =
1564           analyze(Annotator, AnnotatedLines, Tokens, Result);
1565 
1566       DEBUG({
1567         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1568         for (tooling::Replacements::iterator I = RunResult.begin(),
1569                                              E = RunResult.end();
1570              I != E; ++I) {
1571           llvm::dbgs() << I->toString() << "\n";
1572         }
1573       });
1574       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1575         delete AnnotatedLines[i];
1576       }
1577       Result.insert(RunResult.begin(), RunResult.end());
1578     }
1579     return Result;
1580   }
1581 
1582 protected:
1583   virtual tooling::Replacements
1584   analyze(TokenAnnotator &Annotator,
1585           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1586           FormatTokenLexer &Tokens, tooling::Replacements &Result) = 0;
1587 
1588   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1589     assert(!UnwrappedLines.empty());
1590     UnwrappedLines.back().push_back(TheLine);
1591   }
1592 
1593   void finishRun() override {
1594     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1595   }
1596 
1597   FormatStyle Style;
1598   // Stores Style, FileID and SourceManager etc.
1599   const Environment &Env;
1600   // AffectedRangeMgr stores ranges to be fixed.
1601   AffectedRangeManager AffectedRangeMgr;
1602   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1603   encoding::Encoding Encoding;
1604 };
1605 
1606 class Formatter : public TokenAnalyzer {
1607 public:
1608   Formatter(const Environment &Env, const FormatStyle &Style,
1609             bool *IncompleteFormat)
1610       : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {}
1611 
1612   tooling::Replacements
1613   analyze(TokenAnnotator &Annotator,
1614           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1615           FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
1616     deriveLocalStyle(AnnotatedLines);
1617     AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
1618                                           AnnotatedLines.end());
1619 
1620     if (Style.Language == FormatStyle::LK_JavaScript &&
1621         Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
1622       requoteJSStringLiteral(AnnotatedLines, Result);
1623 
1624     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1625       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1626     }
1627 
1628     Annotator.setCommentLineLevels(AnnotatedLines);
1629 
1630     WhitespaceManager Whitespaces(
1631         Env.getSourceManager(), Style,
1632         inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
1633     ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
1634                                   Env.getSourceManager(), Whitespaces, Encoding,
1635                                   BinPackInconclusiveFunctions);
1636     UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
1637                            IncompleteFormat)
1638         .format(AnnotatedLines);
1639     return Whitespaces.generateReplacements();
1640   }
1641 
1642 private:
1643   // If the last token is a double/single-quoted string literal, generates a
1644   // replacement with a single/double quoted string literal, re-escaping the
1645   // contents in the process.
1646   void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
1647                               tooling::Replacements &Result) {
1648     for (AnnotatedLine *Line : Lines) {
1649       requoteJSStringLiteral(Line->Children, Result);
1650       if (!Line->Affected)
1651         continue;
1652       for (FormatToken *FormatTok = Line->First; FormatTok;
1653            FormatTok = FormatTok->Next) {
1654         StringRef Input = FormatTok->TokenText;
1655         if (!FormatTok->isStringLiteral() ||
1656             // NB: testing for not starting with a double quote to avoid
1657             // breaking
1658             // `template strings`.
1659             (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
1660              !Input.startswith("\"")) ||
1661             (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
1662              !Input.startswith("\'")))
1663           continue;
1664 
1665         // Change start and end quote.
1666         bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
1667         SourceLocation Start = FormatTok->Tok.getLocation();
1668         auto Replace = [&](SourceLocation Start, unsigned Length,
1669                            StringRef ReplacementText) {
1670           Result.insert(tooling::Replacement(Env.getSourceManager(), Start,
1671                                              Length, ReplacementText));
1672         };
1673         Replace(Start, 1, IsSingle ? "'" : "\"");
1674         Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
1675                 IsSingle ? "'" : "\"");
1676 
1677         // Escape internal quotes.
1678         size_t ColumnWidth = FormatTok->TokenText.size();
1679         bool Escaped = false;
1680         for (size_t i = 1; i < Input.size() - 1; i++) {
1681           switch (Input[i]) {
1682           case '\\':
1683             if (!Escaped && i + 1 < Input.size() &&
1684                 ((IsSingle && Input[i + 1] == '"') ||
1685                  (!IsSingle && Input[i + 1] == '\''))) {
1686               // Remove this \, it's escaping a " or ' that no longer needs
1687               // escaping
1688               ColumnWidth--;
1689               Replace(Start.getLocWithOffset(i), 1, "");
1690               continue;
1691             }
1692             Escaped = !Escaped;
1693             break;
1694           case '\"':
1695           case '\'':
1696             if (!Escaped && IsSingle == (Input[i] == '\'')) {
1697               // Escape the quote.
1698               Replace(Start.getLocWithOffset(i), 0, "\\");
1699               ColumnWidth++;
1700             }
1701             Escaped = false;
1702             break;
1703           default:
1704             Escaped = false;
1705             break;
1706           }
1707         }
1708 
1709         // For formatting, count the number of non-escaped single quotes in them
1710         // and adjust ColumnWidth to take the added escapes into account.
1711         // FIXME(martinprobst): this might conflict with code breaking a long
1712         // string literal (which clang-format doesn't do, yet). For that to
1713         // work, this code would have to modify TokenText directly.
1714         FormatTok->ColumnWidth = ColumnWidth;
1715       }
1716     }
1717   }
1718 
1719   static bool inputUsesCRLF(StringRef Text) {
1720     return Text.count('\r') * 2 > Text.count('\n');
1721   }
1722 
1723   bool
1724   hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1725     for (const AnnotatedLine *Line : Lines) {
1726       if (hasCpp03IncompatibleFormat(Line->Children))
1727         return true;
1728       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
1729         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1730           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1731             return true;
1732           if (Tok->is(TT_TemplateCloser) &&
1733               Tok->Previous->is(TT_TemplateCloser))
1734             return true;
1735         }
1736       }
1737     }
1738     return false;
1739   }
1740 
1741   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1742     int AlignmentDiff = 0;
1743     for (const AnnotatedLine *Line : Lines) {
1744       AlignmentDiff += countVariableAlignments(Line->Children);
1745       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
1746         if (!Tok->is(TT_PointerOrReference))
1747           continue;
1748         bool SpaceBefore =
1749             Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1750         bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
1751                           Tok->Next->WhitespaceRange.getEnd();
1752         if (SpaceBefore && !SpaceAfter)
1753           ++AlignmentDiff;
1754         if (!SpaceBefore && SpaceAfter)
1755           --AlignmentDiff;
1756       }
1757     }
1758     return AlignmentDiff;
1759   }
1760 
1761   void
1762   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1763     bool HasBinPackedFunction = false;
1764     bool HasOnePerLineFunction = false;
1765     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1766       if (!AnnotatedLines[i]->First->Next)
1767         continue;
1768       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1769       while (Tok->Next) {
1770         if (Tok->PackingKind == PPK_BinPacked)
1771           HasBinPackedFunction = true;
1772         if (Tok->PackingKind == PPK_OnePerLine)
1773           HasOnePerLineFunction = true;
1774 
1775         Tok = Tok->Next;
1776       }
1777     }
1778     if (Style.DerivePointerAlignment)
1779       Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
1780                                    ? FormatStyle::PAS_Left
1781                                    : FormatStyle::PAS_Right;
1782     if (Style.Standard == FormatStyle::LS_Auto)
1783       Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
1784                            ? FormatStyle::LS_Cpp11
1785                            : FormatStyle::LS_Cpp03;
1786     BinPackInconclusiveFunctions =
1787         HasBinPackedFunction || !HasOnePerLineFunction;
1788   }
1789 
1790   bool BinPackInconclusiveFunctions;
1791   bool *IncompleteFormat;
1792 };
1793 
1794 // This class clean up the erroneous/redundant code around the given ranges in
1795 // file.
1796 class Cleaner : public TokenAnalyzer {
1797 public:
1798   Cleaner(const Environment &Env, const FormatStyle &Style)
1799       : TokenAnalyzer(Env, Style),
1800         DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}
1801 
1802   // FIXME: eliminate unused parameters.
1803   tooling::Replacements
1804   analyze(TokenAnnotator &Annotator,
1805           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1806           FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
1807     // FIXME: in the current implementation the granularity of affected range
1808     // is an annotated line. However, this is not sufficient. Furthermore,
1809     // redundant code introduced by replacements does not necessarily
1810     // intercept with ranges of replacements that result in the redundancy.
1811     // To determine if some redundant code is actually introduced by
1812     // replacements(e.g. deletions), we need to come up with a more
1813     // sophisticated way of computing affected ranges.
1814     AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
1815                                           AnnotatedLines.end());
1816 
1817     checkEmptyNamespace(AnnotatedLines);
1818 
1819     return generateFixes();
1820   }
1821 
1822 private:
1823   bool containsOnlyComments(const AnnotatedLine &Line) {
1824     for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) {
1825       if (Tok->isNot(tok::comment))
1826         return false;
1827     }
1828     return true;
1829   }
1830 
1831   // Iterate through all lines and remove any empty (nested) namespaces.
1832   void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1833     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1834       auto &Line = *AnnotatedLines[i];
1835       if (Line.startsWith(tok::kw_namespace) ||
1836           Line.startsWith(tok::kw_inline, tok::kw_namespace)) {
1837         checkEmptyNamespace(AnnotatedLines, i, i);
1838       }
1839     }
1840 
1841     for (auto Line : DeletedLines) {
1842       FormatToken *Tok = AnnotatedLines[Line]->First;
1843       while (Tok) {
1844         deleteToken(Tok);
1845         Tok = Tok->Next;
1846       }
1847     }
1848   }
1849 
1850   // The function checks if the namespace, which starts from \p CurrentLine, and
1851   // its nested namespaces are empty and delete them if they are empty. It also
1852   // sets \p NewLine to the last line checked.
1853   // Returns true if the current namespace is empty.
1854   bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1855                            unsigned CurrentLine, unsigned &NewLine) {
1856     unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
1857     if (Style.BraceWrapping.AfterNamespace) {
1858       // If the left brace is in a new line, we should consume it first so that
1859       // it does not make the namespace non-empty.
1860       // FIXME: error handling if there is no left brace.
1861       if (!AnnotatedLines[++CurrentLine]->startsWith(tok::l_brace)) {
1862         NewLine = CurrentLine;
1863         return false;
1864       }
1865     } else if (!AnnotatedLines[CurrentLine]->endsWith(tok::l_brace)) {
1866       return false;
1867     }
1868     while (++CurrentLine < End) {
1869       if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace))
1870         break;
1871 
1872       if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
1873           AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
1874                                                   tok::kw_namespace)) {
1875         if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine))
1876           return false;
1877         CurrentLine = NewLine;
1878         continue;
1879       }
1880 
1881       if (containsOnlyComments(*AnnotatedLines[CurrentLine]))
1882         continue;
1883 
1884       // If there is anything other than comments or nested namespaces in the
1885       // current namespace, the namespace cannot be empty.
1886       NewLine = CurrentLine;
1887       return false;
1888     }
1889 
1890     NewLine = CurrentLine;
1891     if (CurrentLine >= End)
1892       return false;
1893 
1894     // Check if the empty namespace is actually affected by changed ranges.
1895     if (!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange(
1896             AnnotatedLines[InitLine]->First->Tok.getLocation(),
1897             AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc())))
1898       return false;
1899 
1900     for (unsigned i = InitLine; i <= CurrentLine; ++i) {
1901       DeletedLines.insert(i);
1902     }
1903 
1904     return true;
1905   }
1906 
1907   // Delete the given token.
1908   inline void deleteToken(FormatToken *Tok) {
1909     if (Tok)
1910       DeletedTokens.insert(Tok);
1911   }
1912 
1913   tooling::Replacements generateFixes() {
1914     tooling::Replacements Fixes;
1915     std::vector<FormatToken *> Tokens;
1916     std::copy(DeletedTokens.begin(), DeletedTokens.end(),
1917               std::back_inserter(Tokens));
1918 
1919     // Merge multiple continuous token deletions into one big deletion so that
1920     // the number of replacements can be reduced. This makes computing affected
1921     // ranges more efficient when we run reformat on the changed code.
1922     unsigned Idx = 0;
1923     while (Idx < Tokens.size()) {
1924       unsigned St = Idx, End = Idx;
1925       while ((End + 1) < Tokens.size() &&
1926              Tokens[End]->Next == Tokens[End + 1]) {
1927         End++;
1928       }
1929       auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
1930                                               Tokens[End]->Tok.getEndLoc());
1931       Fixes.insert(tooling::Replacement(Env.getSourceManager(), SR, ""));
1932       Idx = End + 1;
1933     }
1934 
1935     return Fixes;
1936   }
1937 
1938   // Class for less-than inequality comparason for the set `RedundantTokens`.
1939   // We store tokens in the order they appear in the translation unit so that
1940   // we do not need to sort them in `generateFixes()`.
1941   struct FormatTokenLess {
1942     FormatTokenLess(const SourceManager &SM) : SM(SM) {}
1943 
1944     bool operator()(const FormatToken *LHS, const FormatToken *RHS) {
1945       return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(),
1946                                           RHS->Tok.getLocation());
1947     }
1948     const SourceManager &SM;
1949   };
1950 
1951   // Tokens to be deleted.
1952   std::set<FormatToken *, FormatTokenLess> DeletedTokens;
1953   // The line numbers of lines to be deleted.
1954   std::set<unsigned> DeletedLines;
1955 };
1956 
1957 struct IncludeDirective {
1958   StringRef Filename;
1959   StringRef Text;
1960   unsigned Offset;
1961   int Category;
1962 };
1963 
1964 } // end anonymous namespace
1965 
1966 // Determines whether 'Ranges' intersects with ('Start', 'End').
1967 static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
1968                          unsigned End) {
1969   for (auto Range : Ranges) {
1970     if (Range.getOffset() < End &&
1971         Range.getOffset() + Range.getLength() > Start)
1972       return true;
1973   }
1974   return false;
1975 }
1976 
1977 // Sorts a block of includes given by 'Includes' alphabetically adding the
1978 // necessary replacement to 'Replaces'. 'Includes' must be in strict source
1979 // order.
1980 static void sortIncludes(const FormatStyle &Style,
1981                          const SmallVectorImpl<IncludeDirective> &Includes,
1982                          ArrayRef<tooling::Range> Ranges, StringRef FileName,
1983                          tooling::Replacements &Replaces, unsigned *Cursor) {
1984   if (!affectsRange(Ranges, Includes.front().Offset,
1985                     Includes.back().Offset + Includes.back().Text.size()))
1986     return;
1987   SmallVector<unsigned, 16> Indices;
1988   for (unsigned i = 0, e = Includes.size(); i != e; ++i)
1989     Indices.push_back(i);
1990   std::stable_sort(
1991       Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
1992         return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
1993                std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
1994       });
1995 
1996   // If the #includes are out of order, we generate a single replacement fixing
1997   // the entire block. Otherwise, no replacement is generated.
1998   bool OutOfOrder = false;
1999   for (unsigned i = 1, e = Indices.size(); i != e; ++i) {
2000     if (Indices[i] != i) {
2001       OutOfOrder = true;
2002       break;
2003     }
2004   }
2005   if (!OutOfOrder)
2006     return;
2007 
2008   std::string result;
2009   bool CursorMoved = false;
2010   for (unsigned Index : Indices) {
2011     if (!result.empty())
2012       result += "\n";
2013     result += Includes[Index].Text;
2014 
2015     if (Cursor && !CursorMoved) {
2016       unsigned Start = Includes[Index].Offset;
2017       unsigned End = Start + Includes[Index].Text.size();
2018       if (*Cursor >= Start && *Cursor < End) {
2019         *Cursor = Includes.front().Offset + result.size() + *Cursor - End;
2020         CursorMoved = true;
2021       }
2022     }
2023   }
2024 
2025   // Sorting #includes shouldn't change their total number of characters.
2026   // This would otherwise mess up 'Ranges'.
2027   assert(result.size() ==
2028          Includes.back().Offset + Includes.back().Text.size() -
2029              Includes.front().Offset);
2030 
2031   Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset,
2032                                        result.size(), result));
2033 }
2034 
2035 tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
2036                                    ArrayRef<tooling::Range> Ranges,
2037                                    StringRef FileName, unsigned *Cursor) {
2038   tooling::Replacements Replaces;
2039   if (!Style.SortIncludes)
2040     return Replaces;
2041 
2042   unsigned Prev = 0;
2043   unsigned SearchFrom = 0;
2044   llvm::Regex IncludeRegex(
2045       R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
2046   SmallVector<StringRef, 4> Matches;
2047   SmallVector<IncludeDirective, 16> IncludesInBlock;
2048 
2049   // In compiled files, consider the first #include to be the main #include of
2050   // the file if it is not a system #include. This ensures that the header
2051   // doesn't have hidden dependencies
2052   // (http://llvm.org/docs/CodingStandards.html#include-style).
2053   //
2054   // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
2055   // cases where the first #include is unlikely to be the main header.
2056   bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
2057                   FileName.endswith(".cpp") || FileName.endswith(".c++") ||
2058                   FileName.endswith(".cxx") || FileName.endswith(".m") ||
2059                   FileName.endswith(".mm");
2060   StringRef FileStem = llvm::sys::path::stem(FileName);
2061   bool FirstIncludeBlock = true;
2062   bool MainIncludeFound = false;
2063 
2064   // Create pre-compiled regular expressions for the #include categories.
2065   SmallVector<llvm::Regex, 4> CategoryRegexs;
2066   for (const auto &Category : Style.IncludeCategories)
2067     CategoryRegexs.emplace_back(Category.Regex);
2068 
2069   bool FormattingOff = false;
2070 
2071   for (;;) {
2072     auto Pos = Code.find('\n', SearchFrom);
2073     StringRef Line =
2074         Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
2075 
2076     StringRef Trimmed = Line.trim();
2077     if (Trimmed == "// clang-format off")
2078       FormattingOff = true;
2079     else if (Trimmed == "// clang-format on")
2080       FormattingOff = false;
2081 
2082     if (!FormattingOff && !Line.endswith("\\")) {
2083       if (IncludeRegex.match(Line, &Matches)) {
2084         StringRef IncludeName = Matches[2];
2085         int Category = INT_MAX;
2086         for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
2087           if (CategoryRegexs[i].match(IncludeName)) {
2088             Category = Style.IncludeCategories[i].Priority;
2089             break;
2090           }
2091         }
2092         if (IsSource && !MainIncludeFound && Category > 0 &&
2093             FirstIncludeBlock && IncludeName.startswith("\"")) {
2094           StringRef HeaderStem =
2095               llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
2096           if (FileStem.startswith(HeaderStem)) {
2097             llvm::Regex MainIncludeRegex(
2098                 (HeaderStem + Style.IncludeIsMainRegex).str());
2099             if (MainIncludeRegex.match(FileStem)) {
2100               Category = 0;
2101               MainIncludeFound = true;
2102             }
2103           }
2104         }
2105         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
2106       } else if (!IncludesInBlock.empty()) {
2107         sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
2108                      Cursor);
2109         IncludesInBlock.clear();
2110         FirstIncludeBlock = false;
2111       }
2112       Prev = Pos + 1;
2113     }
2114     if (Pos == StringRef::npos || Pos + 1 == Code.size())
2115       break;
2116     SearchFrom = Pos + 1;
2117   }
2118   if (!IncludesInBlock.empty())
2119     sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
2120   return Replaces;
2121 }
2122 
2123 template <typename T>
2124 static tooling::Replacements
2125 processReplacements(T ProcessFunc, StringRef Code,
2126                     const tooling::Replacements &Replaces,
2127                     const FormatStyle &Style) {
2128   if (Replaces.empty())
2129     return tooling::Replacements();
2130 
2131   std::string NewCode = applyAllReplacements(Code, Replaces);
2132   std::vector<tooling::Range> ChangedRanges =
2133       tooling::calculateChangedRanges(Replaces);
2134   StringRef FileName = Replaces.begin()->getFilePath();
2135 
2136   tooling::Replacements FormatReplaces =
2137       ProcessFunc(Style, NewCode, ChangedRanges, FileName);
2138 
2139   return mergeReplacements(Replaces, FormatReplaces);
2140 }
2141 
2142 tooling::Replacements formatReplacements(StringRef Code,
2143                                          const tooling::Replacements &Replaces,
2144                                          const FormatStyle &Style) {
2145   // We need to use lambda function here since there are two versions of
2146   // `reformat`.
2147   auto Reformat = [](const FormatStyle &Style, StringRef Code,
2148                      std::vector<tooling::Range> Ranges,
2149                      StringRef FileName) -> tooling::Replacements {
2150     return reformat(Style, Code, Ranges, FileName);
2151   };
2152   return processReplacements(Reformat, Code, Replaces, Style);
2153 }
2154 
2155 tooling::Replacements
2156 cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
2157                           const FormatStyle &Style) {
2158   // We need to use lambda function here since there are two versions of
2159   // `cleanup`.
2160   auto Cleanup = [](const FormatStyle &Style, StringRef Code,
2161                     std::vector<tooling::Range> Ranges,
2162                     StringRef FileName) -> tooling::Replacements {
2163     return cleanup(Style, Code, Ranges, FileName);
2164   };
2165   return processReplacements(Cleanup, Code, Replaces, Style);
2166 }
2167 
2168 tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SM,
2169                                FileID ID, ArrayRef<CharSourceRange> Ranges,
2170                                bool *IncompleteFormat) {
2171   FormatStyle Expanded = expandPresets(Style);
2172   if (Expanded.DisableFormat)
2173     return tooling::Replacements();
2174 
2175   Environment Env(SM, ID, Ranges);
2176   Formatter Format(Env, Expanded, IncompleteFormat);
2177   return Format.process();
2178 }
2179 
2180 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
2181                                ArrayRef<tooling::Range> Ranges,
2182                                StringRef FileName, bool *IncompleteFormat) {
2183   FormatStyle Expanded = expandPresets(Style);
2184   if (Expanded.DisableFormat)
2185     return tooling::Replacements();
2186 
2187   std::unique_ptr<Environment> Env =
2188       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
2189   Formatter Format(*Env, Expanded, IncompleteFormat);
2190   return Format.process();
2191 }
2192 
2193 tooling::Replacements cleanup(const FormatStyle &Style, SourceManager &SM,
2194                               FileID ID, ArrayRef<CharSourceRange> Ranges) {
2195   Environment Env(SM, ID, Ranges);
2196   Cleaner Clean(Env, Style);
2197   return Clean.process();
2198 }
2199 
2200 tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
2201                               ArrayRef<tooling::Range> Ranges,
2202                               StringRef FileName) {
2203   std::unique_ptr<Environment> Env =
2204       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
2205   Cleaner Clean(*Env, Style);
2206   return Clean.process();
2207 }
2208 
2209 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
2210   LangOptions LangOpts;
2211   LangOpts.CPlusPlus = 1;
2212   LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2213   LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2214   LangOpts.LineComment = 1;
2215   bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
2216   LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
2217   LangOpts.Bool = 1;
2218   LangOpts.ObjC1 = 1;
2219   LangOpts.ObjC2 = 1;
2220   LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
2221   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
2222   return LangOpts;
2223 }
2224 
2225 const char *StyleOptionHelpDescription =
2226     "Coding style, currently supports:\n"
2227     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
2228     "Use -style=file to load style configuration from\n"
2229     ".clang-format file located in one of the parent\n"
2230     "directories of the source file (or current\n"
2231     "directory for stdin).\n"
2232     "Use -style=\"{key: value, ...}\" to set specific\n"
2233     "parameters, e.g.:\n"
2234     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
2235 
2236 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
2237   if (FileName.endswith(".java"))
2238     return FormatStyle::LK_Java;
2239   if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
2240     return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
2241   if (FileName.endswith_lower(".proto") ||
2242       FileName.endswith_lower(".protodevel"))
2243     return FormatStyle::LK_Proto;
2244   if (FileName.endswith_lower(".td"))
2245     return FormatStyle::LK_TableGen;
2246   return FormatStyle::LK_Cpp;
2247 }
2248 
2249 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
2250                      StringRef FallbackStyle, vfs::FileSystem *FS) {
2251   if (!FS) {
2252     FS = vfs::getRealFileSystem().get();
2253   }
2254   FormatStyle Style = getLLVMStyle();
2255   Style.Language = getLanguageByFileName(FileName);
2256   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
2257     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
2258                  << "\" using LLVM style\n";
2259     return Style;
2260   }
2261 
2262   if (StyleName.startswith("{")) {
2263     // Parse YAML/JSON style from the command line.
2264     if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
2265       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
2266                    << FallbackStyle << " style\n";
2267     }
2268     return Style;
2269   }
2270 
2271   if (!StyleName.equals_lower("file")) {
2272     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
2273       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
2274                    << " style\n";
2275     return Style;
2276   }
2277 
2278   // Look for .clang-format/_clang-format file in the file's parent directories.
2279   SmallString<128> UnsuitableConfigFiles;
2280   SmallString<128> Path(FileName);
2281   llvm::sys::fs::make_absolute(Path);
2282   for (StringRef Directory = Path; !Directory.empty();
2283        Directory = llvm::sys::path::parent_path(Directory)) {
2284 
2285     auto Status = FS->status(Directory);
2286     if (!Status ||
2287         Status->getType() != llvm::sys::fs::file_type::directory_file) {
2288       continue;
2289     }
2290 
2291     SmallString<128> ConfigFile(Directory);
2292 
2293     llvm::sys::path::append(ConfigFile, ".clang-format");
2294     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2295 
2296     Status = FS->status(ConfigFile.str());
2297     bool IsFile =
2298         Status && (Status->getType() == llvm::sys::fs::file_type::regular_file);
2299     if (!IsFile) {
2300       // Try _clang-format too, since dotfiles are not commonly used on Windows.
2301       ConfigFile = Directory;
2302       llvm::sys::path::append(ConfigFile, "_clang-format");
2303       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2304       Status = FS->status(ConfigFile.str());
2305       IsFile = Status &&
2306                (Status->getType() == llvm::sys::fs::file_type::regular_file);
2307     }
2308 
2309     if (IsFile) {
2310       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
2311           FS->getBufferForFile(ConfigFile.str());
2312       if (std::error_code EC = Text.getError()) {
2313         llvm::errs() << EC.message() << "\n";
2314         break;
2315       }
2316       if (std::error_code ec =
2317               parseConfiguration(Text.get()->getBuffer(), &Style)) {
2318         if (ec == ParseError::Unsuitable) {
2319           if (!UnsuitableConfigFiles.empty())
2320             UnsuitableConfigFiles.append(", ");
2321           UnsuitableConfigFiles.append(ConfigFile);
2322           continue;
2323         }
2324         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
2325                      << "\n";
2326         break;
2327       }
2328       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2329       return Style;
2330     }
2331   }
2332   if (!UnsuitableConfigFiles.empty()) {
2333     llvm::errs() << "Configuration file(s) do(es) not support "
2334                  << getLanguageName(Style.Language) << ": "
2335                  << UnsuitableConfigFiles << "\n";
2336   }
2337   return Style;
2338 }
2339 
2340 } // namespace format
2341 } // namespace clang
2342