1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Format/Format.h"
17 #include "AffectedRangeManager.h"
18 #include "ContinuationIndenter.h"
19 #include "FormatInternal.h"
20 #include "FormatTokenLexer.h"
21 #include "NamespaceEndCommentsFixer.h"
22 #include "SortJavaScriptImports.h"
23 #include "TokenAnalyzer.h"
24 #include "TokenAnnotator.h"
25 #include "UnwrappedLineFormatter.h"
26 #include "UnwrappedLineParser.h"
27 #include "UsingDeclarationsSorter.h"
28 #include "WhitespaceManager.h"
29 #include "clang/Basic/Diagnostic.h"
30 #include "clang/Basic/DiagnosticOptions.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/VirtualFileSystem.h"
33 #include "clang/Lex/Lexer.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/Allocator.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/Regex.h"
40 #include "llvm/Support/YAMLTraits.h"
41 #include <algorithm>
42 #include <memory>
43 #include <string>
44 
45 #define DEBUG_TYPE "format-formatter"
46 
47 using clang::format::FormatStyle;
48 
49 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
50 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat)
51 
52 namespace llvm {
53 namespace yaml {
54 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
55   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
56     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
57     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
58     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
59     IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC);
60     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
61     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
62     IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto);
63   }
64 };
65 
66 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
67   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
68     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
69     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
70     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
71     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
72     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
73   }
74 };
75 
76 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
77   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
78     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
79     IO.enumCase(Value, "false", FormatStyle::UT_Never);
80     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
81     IO.enumCase(Value, "true", FormatStyle::UT_Always);
82     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
83     IO.enumCase(Value, "ForContinuationAndIndentation",
84                 FormatStyle::UT_ForContinuationAndIndentation);
85   }
86 };
87 
88 template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
89   static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
90     IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
91     IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
92     IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
93   }
94 };
95 
96 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
97   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
98     IO.enumCase(Value, "None", FormatStyle::SFS_None);
99     IO.enumCase(Value, "false", FormatStyle::SFS_None);
100     IO.enumCase(Value, "All", FormatStyle::SFS_All);
101     IO.enumCase(Value, "true", FormatStyle::SFS_All);
102     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
103     IO.enumCase(Value, "InlineOnly", FormatStyle::SFS_InlineOnly);
104     IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
105   }
106 };
107 
108 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
109   static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
110     IO.enumCase(Value, "All", FormatStyle::BOS_All);
111     IO.enumCase(Value, "true", FormatStyle::BOS_All);
112     IO.enumCase(Value, "None", FormatStyle::BOS_None);
113     IO.enumCase(Value, "false", FormatStyle::BOS_None);
114     IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
115   }
116 };
117 
118 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
119   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
120     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
121     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
122     IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
123     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
124     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
125     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
126     IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
127     IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
128   }
129 };
130 
131 template <>
132 struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> {
133   static void
134   enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) {
135     IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon);
136     IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma);
137     IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon);
138   }
139 };
140 
141 template <>
142 struct ScalarEnumerationTraits<FormatStyle::PPDirectiveIndentStyle> {
143   static void enumeration(IO &IO, FormatStyle::PPDirectiveIndentStyle &Value) {
144     IO.enumCase(Value, "None", FormatStyle::PPDIS_None);
145     IO.enumCase(Value, "AfterHash", FormatStyle::PPDIS_AfterHash);
146   }
147 };
148 
149 template <>
150 struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
151   static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
152     IO.enumCase(Value, "None", FormatStyle::RTBS_None);
153     IO.enumCase(Value, "All", FormatStyle::RTBS_All);
154     IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
155     IO.enumCase(Value, "TopLevelDefinitions",
156                 FormatStyle::RTBS_TopLevelDefinitions);
157     IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
158   }
159 };
160 
161 template <>
162 struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
163   static void
164   enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
165     IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
166     IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
167     IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
168 
169     // For backward compatibility.
170     IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
171     IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
172   }
173 };
174 
175 template <>
176 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
177   static void enumeration(IO &IO,
178                           FormatStyle::NamespaceIndentationKind &Value) {
179     IO.enumCase(Value, "None", FormatStyle::NI_None);
180     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
181     IO.enumCase(Value, "All", FormatStyle::NI_All);
182   }
183 };
184 
185 template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
186   static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
187     IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
188     IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
189     IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
190 
191     // For backward compatibility.
192     IO.enumCase(Value, "true", FormatStyle::BAS_Align);
193     IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
194   }
195 };
196 
197 template <>
198 struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> {
199   static void enumeration(IO &IO,
200                           FormatStyle::EscapedNewlineAlignmentStyle &Value) {
201     IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign);
202     IO.enumCase(Value, "Left", FormatStyle::ENAS_Left);
203     IO.enumCase(Value, "Right", FormatStyle::ENAS_Right);
204 
205     // For backward compatibility.
206     IO.enumCase(Value, "true", FormatStyle::ENAS_Left);
207     IO.enumCase(Value, "false", FormatStyle::ENAS_Right);
208   }
209 };
210 
211 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
212   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
213     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
214     IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
215     IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
216 
217     // For backward compatibility.
218     IO.enumCase(Value, "true", FormatStyle::PAS_Left);
219     IO.enumCase(Value, "false", FormatStyle::PAS_Right);
220   }
221 };
222 
223 template <>
224 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
225   static void enumeration(IO &IO,
226                           FormatStyle::SpaceBeforeParensOptions &Value) {
227     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
228     IO.enumCase(Value, "ControlStatements",
229                 FormatStyle::SBPO_ControlStatements);
230     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
231 
232     // For backward compatibility.
233     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
234     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
235   }
236 };
237 
238 template <> struct MappingTraits<FormatStyle> {
239   static void mapping(IO &IO, FormatStyle &Style) {
240     // When reading, read the language first, we need it for getPredefinedStyle.
241     IO.mapOptional("Language", Style.Language);
242 
243     if (IO.outputting()) {
244       StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
245                                  "Mozilla", "WebKit", "GNU"};
246       ArrayRef<StringRef> Styles(StylesArray);
247       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
248         StringRef StyleName(Styles[i]);
249         FormatStyle PredefinedStyle;
250         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
251             Style == PredefinedStyle) {
252           IO.mapOptional("# BasedOnStyle", StyleName);
253           break;
254         }
255       }
256     } else {
257       StringRef BasedOnStyle;
258       IO.mapOptional("BasedOnStyle", BasedOnStyle);
259       if (!BasedOnStyle.empty()) {
260         FormatStyle::LanguageKind OldLanguage = Style.Language;
261         FormatStyle::LanguageKind Language =
262             ((FormatStyle *)IO.getContext())->Language;
263         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
264           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
265           return;
266         }
267         Style.Language = OldLanguage;
268       }
269     }
270 
271     // For backward compatibility.
272     if (!IO.outputting()) {
273       IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines);
274       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
275       IO.mapOptional("IndentFunctionDeclarationAfterType",
276                      Style.IndentWrappedFunctionNames);
277       IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
278       IO.mapOptional("SpaceAfterControlStatementKeyword",
279                      Style.SpaceBeforeParens);
280     }
281 
282     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
283     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
284     IO.mapOptional("AlignConsecutiveAssignments",
285                    Style.AlignConsecutiveAssignments);
286     IO.mapOptional("AlignConsecutiveDeclarations",
287                    Style.AlignConsecutiveDeclarations);
288     IO.mapOptional("AlignEscapedNewlines", Style.AlignEscapedNewlines);
289     IO.mapOptional("AlignOperands", Style.AlignOperands);
290     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
291     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
292                    Style.AllowAllParametersOfDeclarationOnNextLine);
293     IO.mapOptional("AllowShortBlocksOnASingleLine",
294                    Style.AllowShortBlocksOnASingleLine);
295     IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
296                    Style.AllowShortCaseLabelsOnASingleLine);
297     IO.mapOptional("AllowShortFunctionsOnASingleLine",
298                    Style.AllowShortFunctionsOnASingleLine);
299     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
300                    Style.AllowShortIfStatementsOnASingleLine);
301     IO.mapOptional("AllowShortLoopsOnASingleLine",
302                    Style.AllowShortLoopsOnASingleLine);
303     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
304                    Style.AlwaysBreakAfterDefinitionReturnType);
305     IO.mapOptional("AlwaysBreakAfterReturnType",
306                    Style.AlwaysBreakAfterReturnType);
307     // If AlwaysBreakAfterDefinitionReturnType was specified but
308     // AlwaysBreakAfterReturnType was not, initialize the latter from the
309     // former for backwards compatibility.
310     if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
311         Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
312       if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
313         Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
314       else if (Style.AlwaysBreakAfterDefinitionReturnType ==
315                FormatStyle::DRTBS_TopLevel)
316         Style.AlwaysBreakAfterReturnType =
317             FormatStyle::RTBS_TopLevelDefinitions;
318     }
319 
320     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
321                    Style.AlwaysBreakBeforeMultilineStrings);
322     IO.mapOptional("AlwaysBreakTemplateDeclarations",
323                    Style.AlwaysBreakTemplateDeclarations);
324     IO.mapOptional("BinPackArguments", Style.BinPackArguments);
325     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
326     IO.mapOptional("BraceWrapping", Style.BraceWrapping);
327     IO.mapOptional("BreakBeforeBinaryOperators",
328                    Style.BreakBeforeBinaryOperators);
329     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
330     IO.mapOptional("BreakBeforeInheritanceComma",
331                    Style.BreakBeforeInheritanceComma);
332     IO.mapOptional("BreakBeforeTernaryOperators",
333                    Style.BreakBeforeTernaryOperators);
334 
335     bool BreakConstructorInitializersBeforeComma = false;
336     IO.mapOptional("BreakConstructorInitializersBeforeComma",
337                    BreakConstructorInitializersBeforeComma);
338     IO.mapOptional("BreakConstructorInitializers",
339                    Style.BreakConstructorInitializers);
340     // If BreakConstructorInitializersBeforeComma was specified but
341     // BreakConstructorInitializers was not, initialize the latter from the
342     // former for backwards compatibility.
343     if (BreakConstructorInitializersBeforeComma &&
344         Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon)
345       Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
346 
347     IO.mapOptional("BreakAfterJavaFieldAnnotations",
348                    Style.BreakAfterJavaFieldAnnotations);
349     IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
350     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
351     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
352     IO.mapOptional("CompactNamespaces", Style.CompactNamespaces);
353     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
354                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
355     IO.mapOptional("ConstructorInitializerIndentWidth",
356                    Style.ConstructorInitializerIndentWidth);
357     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
358     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
359     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
360     IO.mapOptional("DisableFormat", Style.DisableFormat);
361     IO.mapOptional("ExperimentalAutoDetectBinPacking",
362                    Style.ExperimentalAutoDetectBinPacking);
363     IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);
364     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
365     IO.mapOptional("IncludeBlocks", Style.IncludeBlocks);
366     IO.mapOptional("IncludeCategories", Style.IncludeCategories);
367     IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
368     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
369     IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives);
370     IO.mapOptional("IndentWidth", Style.IndentWidth);
371     IO.mapOptional("IndentWrappedFunctionNames",
372                    Style.IndentWrappedFunctionNames);
373     IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
374     IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports);
375     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
376                    Style.KeepEmptyLinesAtTheStartOfBlocks);
377     IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
378     IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
379     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
380     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
381     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
382     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
383     IO.mapOptional("ObjCSpaceBeforeProtocolList",
384                    Style.ObjCSpaceBeforeProtocolList);
385     IO.mapOptional("PenaltyBreakAssignment", Style.PenaltyBreakAssignment);
386     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
387                    Style.PenaltyBreakBeforeFirstCallParameter);
388     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
389     IO.mapOptional("PenaltyBreakFirstLessLess",
390                    Style.PenaltyBreakFirstLessLess);
391     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
392     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
393     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
394                    Style.PenaltyReturnTypeOnItsOwnLine);
395     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
396     IO.mapOptional("RawStringFormats", Style.RawStringFormats);
397     IO.mapOptional("ReflowComments", Style.ReflowComments);
398     IO.mapOptional("SortIncludes", Style.SortIncludes);
399     IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations);
400     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
401     IO.mapOptional("SpaceAfterTemplateKeyword",
402                    Style.SpaceAfterTemplateKeyword);
403     IO.mapOptional("SpaceBeforeAssignmentOperators",
404                    Style.SpaceBeforeAssignmentOperators);
405     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
406     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
407     IO.mapOptional("SpacesBeforeTrailingComments",
408                    Style.SpacesBeforeTrailingComments);
409     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
410     IO.mapOptional("SpacesInContainerLiterals",
411                    Style.SpacesInContainerLiterals);
412     IO.mapOptional("SpacesInCStyleCastParentheses",
413                    Style.SpacesInCStyleCastParentheses);
414     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
415     IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
416     IO.mapOptional("Standard", Style.Standard);
417     IO.mapOptional("TabWidth", Style.TabWidth);
418     IO.mapOptional("UseTab", Style.UseTab);
419   }
420 };
421 
422 template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
423   static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
424     IO.mapOptional("AfterClass", Wrapping.AfterClass);
425     IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
426     IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
427     IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
428     IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
429     IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
430     IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
431     IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
432     IO.mapOptional("AfterExternBlock", Wrapping.AfterExternBlock);
433     IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
434     IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
435     IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
436     IO.mapOptional("SplitEmptyFunction", Wrapping.SplitEmptyFunction);
437     IO.mapOptional("SplitEmptyRecord", Wrapping.SplitEmptyRecord);
438     IO.mapOptional("SplitEmptyNamespace", Wrapping.SplitEmptyNamespace);
439   }
440 };
441 
442 template <> struct MappingTraits<FormatStyle::IncludeCategory> {
443   static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
444     IO.mapOptional("Regex", Category.Regex);
445     IO.mapOptional("Priority", Category.Priority);
446   }
447 };
448 
449 template <> struct ScalarEnumerationTraits<FormatStyle::IncludeBlocksStyle> {
450   static void enumeration(IO &IO, FormatStyle::IncludeBlocksStyle &Value) {
451     IO.enumCase(Value, "Preserve", FormatStyle::IBS_Preserve);
452     IO.enumCase(Value, "Merge", FormatStyle::IBS_Merge);
453     IO.enumCase(Value, "Regroup", FormatStyle::IBS_Regroup);
454   }
455 };
456 
457 template <> struct MappingTraits<FormatStyle::RawStringFormat> {
458   static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) {
459     IO.mapOptional("Language", Format.Language);
460     IO.mapOptional("Delimiters", Format.Delimiters);
461     IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions);
462     IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);
463   }
464 };
465 
466 // Allows to read vector<FormatStyle> while keeping default values.
467 // IO.getContext() should contain a pointer to the FormatStyle structure, that
468 // will be used to get default values for missing keys.
469 // If the first element has no Language specified, it will be treated as the
470 // default one for the following elements.
471 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
472   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
473     return Seq.size();
474   }
475   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
476                               size_t Index) {
477     if (Index >= Seq.size()) {
478       assert(Index == Seq.size());
479       FormatStyle Template;
480       if (!Seq.empty() && Seq[0].Language == FormatStyle::LK_None) {
481         Template = Seq[0];
482       } else {
483         Template = *((const FormatStyle *)IO.getContext());
484         Template.Language = FormatStyle::LK_None;
485       }
486       Seq.resize(Index + 1, Template);
487     }
488     return Seq[Index];
489   }
490 };
491 } // namespace yaml
492 } // namespace llvm
493 
494 namespace clang {
495 namespace format {
496 
497 const std::error_category &getParseCategory() {
498   static ParseErrorCategory C;
499   return C;
500 }
501 std::error_code make_error_code(ParseError e) {
502   return std::error_code(static_cast<int>(e), getParseCategory());
503 }
504 
505 inline llvm::Error make_string_error(const llvm::Twine &Message) {
506   return llvm::make_error<llvm::StringError>(Message,
507                                              llvm::inconvertibleErrorCode());
508 }
509 
510 const char *ParseErrorCategory::name() const noexcept {
511   return "clang-format.parse_error";
512 }
513 
514 std::string ParseErrorCategory::message(int EV) const {
515   switch (static_cast<ParseError>(EV)) {
516   case ParseError::Success:
517     return "Success";
518   case ParseError::Error:
519     return "Invalid argument";
520   case ParseError::Unsuitable:
521     return "Unsuitable";
522   }
523   llvm_unreachable("unexpected parse error");
524 }
525 
526 static FormatStyle expandPresets(const FormatStyle &Style) {
527   if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
528     return Style;
529   FormatStyle Expanded = Style;
530   Expanded.BraceWrapping = {false, false, false, false, false,
531                             false, false, false, false, false,
532                             false, false, true,  true,  true};
533   switch (Style.BreakBeforeBraces) {
534   case FormatStyle::BS_Linux:
535     Expanded.BraceWrapping.AfterClass = true;
536     Expanded.BraceWrapping.AfterFunction = true;
537     Expanded.BraceWrapping.AfterNamespace = true;
538     break;
539   case FormatStyle::BS_Mozilla:
540     Expanded.BraceWrapping.AfterClass = true;
541     Expanded.BraceWrapping.AfterEnum = true;
542     Expanded.BraceWrapping.AfterFunction = true;
543     Expanded.BraceWrapping.AfterStruct = true;
544     Expanded.BraceWrapping.AfterUnion = true;
545     Expanded.BraceWrapping.AfterExternBlock = true;
546     Expanded.BraceWrapping.SplitEmptyFunction = true;
547     Expanded.BraceWrapping.SplitEmptyRecord = false;
548     break;
549   case FormatStyle::BS_Stroustrup:
550     Expanded.BraceWrapping.AfterFunction = true;
551     Expanded.BraceWrapping.BeforeCatch = true;
552     Expanded.BraceWrapping.BeforeElse = true;
553     break;
554   case FormatStyle::BS_Allman:
555     Expanded.BraceWrapping.AfterClass = true;
556     Expanded.BraceWrapping.AfterControlStatement = true;
557     Expanded.BraceWrapping.AfterEnum = true;
558     Expanded.BraceWrapping.AfterFunction = true;
559     Expanded.BraceWrapping.AfterNamespace = true;
560     Expanded.BraceWrapping.AfterObjCDeclaration = true;
561     Expanded.BraceWrapping.AfterStruct = true;
562     Expanded.BraceWrapping.AfterExternBlock = true;
563     Expanded.BraceWrapping.BeforeCatch = true;
564     Expanded.BraceWrapping.BeforeElse = true;
565     break;
566   case FormatStyle::BS_GNU:
567     Expanded.BraceWrapping = {true, true, true, true, true, true, true, true,
568                               true, true, true, true, true, true, true};
569     break;
570   case FormatStyle::BS_WebKit:
571     Expanded.BraceWrapping.AfterFunction = true;
572     break;
573   default:
574     break;
575   }
576   return Expanded;
577 }
578 
579 FormatStyle getLLVMStyle() {
580   FormatStyle LLVMStyle;
581   LLVMStyle.Language = FormatStyle::LK_Cpp;
582   LLVMStyle.AccessModifierOffset = -2;
583   LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right;
584   LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
585   LLVMStyle.AlignOperands = true;
586   LLVMStyle.AlignTrailingComments = true;
587   LLVMStyle.AlignConsecutiveAssignments = false;
588   LLVMStyle.AlignConsecutiveDeclarations = false;
589   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
590   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
591   LLVMStyle.AllowShortBlocksOnASingleLine = false;
592   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
593   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
594   LLVMStyle.AllowShortLoopsOnASingleLine = false;
595   LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
596   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
597   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
598   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
599   LLVMStyle.BinPackArguments = true;
600   LLVMStyle.BinPackParameters = true;
601   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
602   LLVMStyle.BreakBeforeTernaryOperators = true;
603   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
604   LLVMStyle.BraceWrapping = {false, false, false, false, false,
605                              false, false, false, false, false,
606                              false, false, true,  true,  true};
607   LLVMStyle.BreakAfterJavaFieldAnnotations = false;
608   LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon;
609   LLVMStyle.BreakBeforeInheritanceComma = false;
610   LLVMStyle.BreakStringLiterals = true;
611   LLVMStyle.ColumnLimit = 80;
612   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
613   LLVMStyle.CompactNamespaces = false;
614   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
615   LLVMStyle.ConstructorInitializerIndentWidth = 4;
616   LLVMStyle.ContinuationIndentWidth = 4;
617   LLVMStyle.Cpp11BracedListStyle = true;
618   LLVMStyle.DerivePointerAlignment = false;
619   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
620   LLVMStyle.FixNamespaceComments = true;
621   LLVMStyle.ForEachMacros.push_back("foreach");
622   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
623   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
624   LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
625                                  {"^(<|\"(gtest|gmock|isl|json)/)", 3},
626                                  {".*", 1}};
627   LLVMStyle.IncludeIsMainRegex = "(Test)?$";
628   LLVMStyle.IncludeBlocks = FormatStyle::IBS_Preserve;
629   LLVMStyle.IndentCaseLabels = false;
630   LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None;
631   LLVMStyle.IndentWrappedFunctionNames = false;
632   LLVMStyle.IndentWidth = 2;
633   LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
634   LLVMStyle.JavaScriptWrapImports = true;
635   LLVMStyle.TabWidth = 8;
636   LLVMStyle.MaxEmptyLinesToKeep = 1;
637   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
638   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
639   LLVMStyle.ObjCBlockIndentWidth = 2;
640   LLVMStyle.ObjCSpaceAfterProperty = false;
641   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
642   LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
643   LLVMStyle.SpacesBeforeTrailingComments = 1;
644   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
645   LLVMStyle.UseTab = FormatStyle::UT_Never;
646   LLVMStyle.ReflowComments = true;
647   LLVMStyle.SpacesInParentheses = false;
648   LLVMStyle.SpacesInSquareBrackets = false;
649   LLVMStyle.SpaceInEmptyParentheses = false;
650   LLVMStyle.SpacesInContainerLiterals = true;
651   LLVMStyle.SpacesInCStyleCastParentheses = false;
652   LLVMStyle.SpaceAfterCStyleCast = false;
653   LLVMStyle.SpaceAfterTemplateKeyword = true;
654   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
655   LLVMStyle.SpaceBeforeAssignmentOperators = true;
656   LLVMStyle.SpacesInAngles = false;
657 
658   LLVMStyle.PenaltyBreakAssignment = prec::Assignment;
659   LLVMStyle.PenaltyBreakComment = 300;
660   LLVMStyle.PenaltyBreakFirstLessLess = 120;
661   LLVMStyle.PenaltyBreakString = 1000;
662   LLVMStyle.PenaltyExcessCharacter = 1000000;
663   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
664   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
665 
666   LLVMStyle.DisableFormat = false;
667   LLVMStyle.SortIncludes = true;
668   LLVMStyle.SortUsingDeclarations = true;
669 
670   return LLVMStyle;
671 }
672 
673 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
674   if (Language == FormatStyle::LK_TextProto) {
675     FormatStyle GoogleStyle = getGoogleStyle(FormatStyle::LK_Proto);
676     GoogleStyle.Language = FormatStyle::LK_TextProto;
677     return GoogleStyle;
678   }
679 
680   FormatStyle GoogleStyle = getLLVMStyle();
681   GoogleStyle.Language = Language;
682 
683   GoogleStyle.AccessModifierOffset = -1;
684   GoogleStyle.AlignEscapedNewlines = FormatStyle::ENAS_Left;
685   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
686   GoogleStyle.AllowShortLoopsOnASingleLine = true;
687   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
688   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
689   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
690   GoogleStyle.DerivePointerAlignment = true;
691   GoogleStyle.IncludeCategories = {
692       {"^<ext/.*\\.h>", 2}, {"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
693   GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
694   GoogleStyle.IndentCaseLabels = true;
695   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
696   GoogleStyle.ObjCSpaceAfterProperty = false;
697   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
698   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
699   GoogleStyle.RawStringFormats = {{
700       FormatStyle::LK_TextProto,
701       /*Delimiters=*/
702       {
703           "pb",
704           "PB",
705           "proto",
706           "PROTO",
707           "textproto",
708           "TEXTPROTO",
709       },
710       /*EnclosingFunctionNames=*/
711        {
712            "EqualsProto",
713            "PARSE_TEXT_PROTO",
714            "ParseTextProto",
715        },
716       /*BasedOnStyle=*/"google",
717   }};
718   GoogleStyle.SpacesBeforeTrailingComments = 2;
719   GoogleStyle.Standard = FormatStyle::LS_Auto;
720 
721   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
722   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
723 
724   if (Language == FormatStyle::LK_Java) {
725     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
726     GoogleStyle.AlignOperands = false;
727     GoogleStyle.AlignTrailingComments = false;
728     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
729     GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
730     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
731     GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
732     GoogleStyle.ColumnLimit = 100;
733     GoogleStyle.SpaceAfterCStyleCast = true;
734     GoogleStyle.SpacesBeforeTrailingComments = 1;
735   } else if (Language == FormatStyle::LK_JavaScript) {
736     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
737     GoogleStyle.AlignOperands = false;
738     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
739     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
740     GoogleStyle.BreakBeforeTernaryOperators = false;
741     // taze:, triple slash directives (`/// <...`), @tag followed by { for a lot
742     // of JSDoc tags, and @see, which is commonly followed by overlong URLs.
743     GoogleStyle.CommentPragmas =
744         "(taze:|^/[ \t]*<|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)";
745     GoogleStyle.MaxEmptyLinesToKeep = 3;
746     GoogleStyle.NamespaceIndentation = FormatStyle::NI_All;
747     GoogleStyle.SpacesInContainerLiterals = false;
748     GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
749     GoogleStyle.JavaScriptWrapImports = false;
750   } else if (Language == FormatStyle::LK_Proto) {
751     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
752     GoogleStyle.SpacesInContainerLiterals = false;
753   } else if (Language == FormatStyle::LK_ObjC) {
754     GoogleStyle.ColumnLimit = 100;
755   }
756 
757   return GoogleStyle;
758 }
759 
760 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
761   FormatStyle ChromiumStyle = getGoogleStyle(Language);
762   if (Language == FormatStyle::LK_Java) {
763     ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
764     ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
765     ChromiumStyle.ContinuationIndentWidth = 8;
766     ChromiumStyle.IndentWidth = 4;
767   } else if (Language == FormatStyle::LK_JavaScript) {
768     ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
769     ChromiumStyle.AllowShortLoopsOnASingleLine = false;
770   } else {
771     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
772     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
773     ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
774     ChromiumStyle.AllowShortLoopsOnASingleLine = false;
775     ChromiumStyle.BinPackParameters = false;
776     ChromiumStyle.DerivePointerAlignment = false;
777     if (Language == FormatStyle::LK_ObjC)
778       ChromiumStyle.ColumnLimit = 80;
779   }
780   return ChromiumStyle;
781 }
782 
783 FormatStyle getMozillaStyle() {
784   FormatStyle MozillaStyle = getLLVMStyle();
785   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
786   MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
787   MozillaStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel;
788   MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
789       FormatStyle::DRTBS_TopLevel;
790   MozillaStyle.AlwaysBreakTemplateDeclarations = true;
791   MozillaStyle.BinPackParameters = false;
792   MozillaStyle.BinPackArguments = false;
793   MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
794   MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
795   MozillaStyle.BreakBeforeInheritanceComma = true;
796   MozillaStyle.ConstructorInitializerIndentWidth = 2;
797   MozillaStyle.ContinuationIndentWidth = 2;
798   MozillaStyle.Cpp11BracedListStyle = false;
799   MozillaStyle.FixNamespaceComments = false;
800   MozillaStyle.IndentCaseLabels = true;
801   MozillaStyle.ObjCSpaceAfterProperty = true;
802   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
803   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
804   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
805   MozillaStyle.SpaceAfterTemplateKeyword = false;
806   return MozillaStyle;
807 }
808 
809 FormatStyle getWebKitStyle() {
810   FormatStyle Style = getLLVMStyle();
811   Style.AccessModifierOffset = -4;
812   Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
813   Style.AlignOperands = false;
814   Style.AlignTrailingComments = false;
815   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
816   Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
817   Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
818   Style.Cpp11BracedListStyle = false;
819   Style.ColumnLimit = 0;
820   Style.FixNamespaceComments = false;
821   Style.IndentWidth = 4;
822   Style.NamespaceIndentation = FormatStyle::NI_Inner;
823   Style.ObjCBlockIndentWidth = 4;
824   Style.ObjCSpaceAfterProperty = true;
825   Style.PointerAlignment = FormatStyle::PAS_Left;
826   return Style;
827 }
828 
829 FormatStyle getGNUStyle() {
830   FormatStyle Style = getLLVMStyle();
831   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
832   Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
833   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
834   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
835   Style.BreakBeforeTernaryOperators = true;
836   Style.Cpp11BracedListStyle = false;
837   Style.ColumnLimit = 79;
838   Style.FixNamespaceComments = false;
839   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
840   Style.Standard = FormatStyle::LS_Cpp03;
841   return Style;
842 }
843 
844 FormatStyle getNoStyle() {
845   FormatStyle NoStyle = getLLVMStyle();
846   NoStyle.DisableFormat = true;
847   NoStyle.SortIncludes = false;
848   NoStyle.SortUsingDeclarations = false;
849   return NoStyle;
850 }
851 
852 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
853                         FormatStyle *Style) {
854   if (Name.equals_lower("llvm")) {
855     *Style = getLLVMStyle();
856   } else if (Name.equals_lower("chromium")) {
857     *Style = getChromiumStyle(Language);
858   } else if (Name.equals_lower("mozilla")) {
859     *Style = getMozillaStyle();
860   } else if (Name.equals_lower("google")) {
861     *Style = getGoogleStyle(Language);
862   } else if (Name.equals_lower("webkit")) {
863     *Style = getWebKitStyle();
864   } else if (Name.equals_lower("gnu")) {
865     *Style = getGNUStyle();
866   } else if (Name.equals_lower("none")) {
867     *Style = getNoStyle();
868   } else {
869     return false;
870   }
871 
872   Style->Language = Language;
873   return true;
874 }
875 
876 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
877   assert(Style);
878   FormatStyle::LanguageKind Language = Style->Language;
879   assert(Language != FormatStyle::LK_None);
880   if (Text.trim().empty())
881     return make_error_code(ParseError::Error);
882   Style->StyleSet.Clear();
883   std::vector<FormatStyle> Styles;
884   llvm::yaml::Input Input(Text);
885   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
886   // values for the fields, keys for which are missing from the configuration.
887   // Mapping also uses the context to get the language to find the correct
888   // base style.
889   Input.setContext(Style);
890   Input >> Styles;
891   if (Input.error())
892     return Input.error();
893 
894   for (unsigned i = 0; i < Styles.size(); ++i) {
895     // Ensures that only the first configuration can skip the Language option.
896     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
897       return make_error_code(ParseError::Error);
898     // Ensure that each language is configured at most once.
899     for (unsigned j = 0; j < i; ++j) {
900       if (Styles[i].Language == Styles[j].Language) {
901         DEBUG(llvm::dbgs()
902               << "Duplicate languages in the config file on positions " << j
903               << " and " << i << "\n");
904         return make_error_code(ParseError::Error);
905       }
906     }
907   }
908   // Look for a suitable configuration starting from the end, so we can
909   // find the configuration for the specific language first, and the default
910   // configuration (which can only be at slot 0) after it.
911   FormatStyle::FormatStyleSet StyleSet;
912   bool LanguageFound = false;
913   for (int i = Styles.size() - 1; i >= 0; --i) {
914     if (Styles[i].Language != FormatStyle::LK_None)
915       StyleSet.Add(Styles[i]);
916     if (Styles[i].Language == Language)
917       LanguageFound = true;
918   }
919   if (!LanguageFound) {
920     if (Styles.empty() || Styles[0].Language != FormatStyle::LK_None)
921       return make_error_code(ParseError::Unsuitable);
922     FormatStyle DefaultStyle = Styles[0];
923     DefaultStyle.Language = Language;
924     StyleSet.Add(std::move(DefaultStyle));
925   }
926   *Style = *StyleSet.Get(Language);
927   return make_error_code(ParseError::Success);
928 }
929 
930 std::string configurationAsText(const FormatStyle &Style) {
931   std::string Text;
932   llvm::raw_string_ostream Stream(Text);
933   llvm::yaml::Output Output(Stream);
934   // We use the same mapping method for input and output, so we need a non-const
935   // reference here.
936   FormatStyle NonConstStyle = expandPresets(Style);
937   Output << NonConstStyle;
938   return Stream.str();
939 }
940 
941 llvm::Optional<FormatStyle>
942 FormatStyle::FormatStyleSet::Get(FormatStyle::LanguageKind Language) const {
943   if (!Styles)
944     return None;
945   auto It = Styles->find(Language);
946   if (It == Styles->end())
947     return None;
948   FormatStyle Style = It->second;
949   Style.StyleSet = *this;
950   return Style;
951 }
952 
953 void FormatStyle::FormatStyleSet::Add(FormatStyle Style) {
954   assert(Style.Language != LK_None &&
955          "Cannot add a style for LK_None to a StyleSet");
956   assert(
957       !Style.StyleSet.Styles &&
958       "Cannot add a style associated with an existing StyleSet to a StyleSet");
959   if (!Styles)
960     Styles = std::make_shared<MapType>();
961   (*Styles)[Style.Language] = std::move(Style);
962 }
963 
964 void FormatStyle::FormatStyleSet::Clear() {
965   Styles.reset();
966 }
967 
968 llvm::Optional<FormatStyle>
969 FormatStyle::GetLanguageStyle(FormatStyle::LanguageKind Language) const {
970   return StyleSet.Get(Language);
971 }
972 
973 namespace {
974 
975 class JavaScriptRequoter : public TokenAnalyzer {
976 public:
977   JavaScriptRequoter(const Environment &Env, const FormatStyle &Style)
978       : TokenAnalyzer(Env, Style) {}
979 
980   std::pair<tooling::Replacements, unsigned>
981   analyze(TokenAnnotator &Annotator,
982           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
983           FormatTokenLexer &Tokens) override {
984     AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
985                                           AnnotatedLines.end());
986     tooling::Replacements Result;
987     requoteJSStringLiteral(AnnotatedLines, Result);
988     return {Result, 0};
989   }
990 
991 private:
992   // Replaces double/single-quoted string literal as appropriate, re-escaping
993   // the contents in the process.
994   void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
995                               tooling::Replacements &Result) {
996     for (AnnotatedLine *Line : Lines) {
997       requoteJSStringLiteral(Line->Children, Result);
998       if (!Line->Affected)
999         continue;
1000       for (FormatToken *FormatTok = Line->First; FormatTok;
1001            FormatTok = FormatTok->Next) {
1002         StringRef Input = FormatTok->TokenText;
1003         if (FormatTok->Finalized || !FormatTok->isStringLiteral() ||
1004             // NB: testing for not starting with a double quote to avoid
1005             // breaking `template strings`.
1006             (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
1007              !Input.startswith("\"")) ||
1008             (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
1009              !Input.startswith("\'")))
1010           continue;
1011 
1012         // Change start and end quote.
1013         bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
1014         SourceLocation Start = FormatTok->Tok.getLocation();
1015         auto Replace = [&](SourceLocation Start, unsigned Length,
1016                            StringRef ReplacementText) {
1017           auto Err = Result.add(tooling::Replacement(
1018               Env.getSourceManager(), Start, Length, ReplacementText));
1019           // FIXME: handle error. For now, print error message and skip the
1020           // replacement for release version.
1021           if (Err) {
1022             llvm::errs() << llvm::toString(std::move(Err)) << "\n";
1023             assert(false);
1024           }
1025         };
1026         Replace(Start, 1, IsSingle ? "'" : "\"");
1027         Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
1028                 IsSingle ? "'" : "\"");
1029 
1030         // Escape internal quotes.
1031         bool Escaped = false;
1032         for (size_t i = 1; i < Input.size() - 1; i++) {
1033           switch (Input[i]) {
1034           case '\\':
1035             if (!Escaped && i + 1 < Input.size() &&
1036                 ((IsSingle && Input[i + 1] == '"') ||
1037                  (!IsSingle && Input[i + 1] == '\''))) {
1038               // Remove this \, it's escaping a " or ' that no longer needs
1039               // escaping
1040               Replace(Start.getLocWithOffset(i), 1, "");
1041               continue;
1042             }
1043             Escaped = !Escaped;
1044             break;
1045           case '\"':
1046           case '\'':
1047             if (!Escaped && IsSingle == (Input[i] == '\'')) {
1048               // Escape the quote.
1049               Replace(Start.getLocWithOffset(i), 0, "\\");
1050             }
1051             Escaped = false;
1052             break;
1053           default:
1054             Escaped = false;
1055             break;
1056           }
1057         }
1058       }
1059     }
1060   }
1061 };
1062 
1063 class Formatter : public TokenAnalyzer {
1064 public:
1065   Formatter(const Environment &Env, const FormatStyle &Style,
1066             FormattingAttemptStatus *Status)
1067       : TokenAnalyzer(Env, Style), Status(Status) {}
1068 
1069   std::pair<tooling::Replacements, unsigned>
1070   analyze(TokenAnnotator &Annotator,
1071           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1072           FormatTokenLexer &Tokens) override {
1073     tooling::Replacements Result;
1074     deriveLocalStyle(AnnotatedLines);
1075     AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
1076                                           AnnotatedLines.end());
1077     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1078       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1079     }
1080     Annotator.setCommentLineLevels(AnnotatedLines);
1081 
1082     WhitespaceManager Whitespaces(
1083         Env.getSourceManager(), Style,
1084         inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
1085     ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
1086                                   Env.getSourceManager(), Whitespaces, Encoding,
1087                                   BinPackInconclusiveFunctions);
1088     unsigned Penalty =
1089         UnwrappedLineFormatter(&Indenter, &Whitespaces, Style,
1090                                Tokens.getKeywords(), Env.getSourceManager(),
1091                                Status)
1092             .format(AnnotatedLines, /*DryRun=*/false,
1093                     /*AdditionalIndent=*/0,
1094                     /*FixBadIndentation=*/false,
1095                     /*FirstStartColumn=*/Env.getFirstStartColumn(),
1096                     /*NextStartColumn=*/Env.getNextStartColumn(),
1097                     /*LastStartColumn=*/Env.getLastStartColumn());
1098     for (const auto &R : Whitespaces.generateReplacements())
1099       if (Result.add(R))
1100         return std::make_pair(Result, 0);
1101     return std::make_pair(Result, Penalty);
1102   }
1103 
1104 private:
1105   static bool inputUsesCRLF(StringRef Text) {
1106     return Text.count('\r') * 2 > Text.count('\n');
1107   }
1108 
1109   bool
1110   hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1111     for (const AnnotatedLine *Line : Lines) {
1112       if (hasCpp03IncompatibleFormat(Line->Children))
1113         return true;
1114       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
1115         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1116           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1117             return true;
1118           if (Tok->is(TT_TemplateCloser) &&
1119               Tok->Previous->is(TT_TemplateCloser))
1120             return true;
1121         }
1122       }
1123     }
1124     return false;
1125   }
1126 
1127   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1128     int AlignmentDiff = 0;
1129     for (const AnnotatedLine *Line : Lines) {
1130       AlignmentDiff += countVariableAlignments(Line->Children);
1131       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
1132         if (!Tok->is(TT_PointerOrReference))
1133           continue;
1134         bool SpaceBefore =
1135             Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1136         bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
1137                           Tok->Next->WhitespaceRange.getEnd();
1138         if (SpaceBefore && !SpaceAfter)
1139           ++AlignmentDiff;
1140         if (!SpaceBefore && SpaceAfter)
1141           --AlignmentDiff;
1142       }
1143     }
1144     return AlignmentDiff;
1145   }
1146 
1147   void
1148   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1149     bool HasBinPackedFunction = false;
1150     bool HasOnePerLineFunction = false;
1151     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1152       if (!AnnotatedLines[i]->First->Next)
1153         continue;
1154       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1155       while (Tok->Next) {
1156         if (Tok->PackingKind == PPK_BinPacked)
1157           HasBinPackedFunction = true;
1158         if (Tok->PackingKind == PPK_OnePerLine)
1159           HasOnePerLineFunction = true;
1160 
1161         Tok = Tok->Next;
1162       }
1163     }
1164     if (Style.DerivePointerAlignment)
1165       Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
1166                                    ? FormatStyle::PAS_Left
1167                                    : FormatStyle::PAS_Right;
1168     if (Style.Standard == FormatStyle::LS_Auto)
1169       Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
1170                            ? FormatStyle::LS_Cpp11
1171                            : FormatStyle::LS_Cpp03;
1172     BinPackInconclusiveFunctions =
1173         HasBinPackedFunction || !HasOnePerLineFunction;
1174   }
1175 
1176   bool BinPackInconclusiveFunctions;
1177   FormattingAttemptStatus *Status;
1178 };
1179 
1180 // This class clean up the erroneous/redundant code around the given ranges in
1181 // file.
1182 class Cleaner : public TokenAnalyzer {
1183 public:
1184   Cleaner(const Environment &Env, const FormatStyle &Style)
1185       : TokenAnalyzer(Env, Style),
1186         DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}
1187 
1188   // FIXME: eliminate unused parameters.
1189   std::pair<tooling::Replacements, unsigned>
1190   analyze(TokenAnnotator &Annotator,
1191           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1192           FormatTokenLexer &Tokens) override {
1193     // FIXME: in the current implementation the granularity of affected range
1194     // is an annotated line. However, this is not sufficient. Furthermore,
1195     // redundant code introduced by replacements does not necessarily
1196     // intercept with ranges of replacements that result in the redundancy.
1197     // To determine if some redundant code is actually introduced by
1198     // replacements(e.g. deletions), we need to come up with a more
1199     // sophisticated way of computing affected ranges.
1200     AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
1201                                           AnnotatedLines.end());
1202 
1203     checkEmptyNamespace(AnnotatedLines);
1204 
1205     for (auto &Line : AnnotatedLines) {
1206       if (Line->Affected) {
1207         cleanupRight(Line->First, tok::comma, tok::comma);
1208         cleanupRight(Line->First, TT_CtorInitializerColon, tok::comma);
1209         cleanupRight(Line->First, tok::l_paren, tok::comma);
1210         cleanupLeft(Line->First, tok::comma, tok::r_paren);
1211         cleanupLeft(Line->First, TT_CtorInitializerComma, tok::l_brace);
1212         cleanupLeft(Line->First, TT_CtorInitializerColon, tok::l_brace);
1213         cleanupLeft(Line->First, TT_CtorInitializerColon, tok::equal);
1214       }
1215     }
1216 
1217     return {generateFixes(), 0};
1218   }
1219 
1220 private:
1221   bool containsOnlyComments(const AnnotatedLine &Line) {
1222     for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) {
1223       if (Tok->isNot(tok::comment))
1224         return false;
1225     }
1226     return true;
1227   }
1228 
1229   // Iterate through all lines and remove any empty (nested) namespaces.
1230   void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1231     std::set<unsigned> DeletedLines;
1232     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1233       auto &Line = *AnnotatedLines[i];
1234       if (Line.startsWith(tok::kw_namespace) ||
1235           Line.startsWith(tok::kw_inline, tok::kw_namespace)) {
1236         checkEmptyNamespace(AnnotatedLines, i, i, DeletedLines);
1237       }
1238     }
1239 
1240     for (auto Line : DeletedLines) {
1241       FormatToken *Tok = AnnotatedLines[Line]->First;
1242       while (Tok) {
1243         deleteToken(Tok);
1244         Tok = Tok->Next;
1245       }
1246     }
1247   }
1248 
1249   // The function checks if the namespace, which starts from \p CurrentLine, and
1250   // its nested namespaces are empty and delete them if they are empty. It also
1251   // sets \p NewLine to the last line checked.
1252   // Returns true if the current namespace is empty.
1253   bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1254                            unsigned CurrentLine, unsigned &NewLine,
1255                            std::set<unsigned> &DeletedLines) {
1256     unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
1257     if (Style.BraceWrapping.AfterNamespace) {
1258       // If the left brace is in a new line, we should consume it first so that
1259       // it does not make the namespace non-empty.
1260       // FIXME: error handling if there is no left brace.
1261       if (!AnnotatedLines[++CurrentLine]->startsWith(tok::l_brace)) {
1262         NewLine = CurrentLine;
1263         return false;
1264       }
1265     } else if (!AnnotatedLines[CurrentLine]->endsWith(tok::l_brace)) {
1266       return false;
1267     }
1268     while (++CurrentLine < End) {
1269       if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace))
1270         break;
1271 
1272       if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
1273           AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
1274                                                   tok::kw_namespace)) {
1275         if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine,
1276                                  DeletedLines))
1277           return false;
1278         CurrentLine = NewLine;
1279         continue;
1280       }
1281 
1282       if (containsOnlyComments(*AnnotatedLines[CurrentLine]))
1283         continue;
1284 
1285       // If there is anything other than comments or nested namespaces in the
1286       // current namespace, the namespace cannot be empty.
1287       NewLine = CurrentLine;
1288       return false;
1289     }
1290 
1291     NewLine = CurrentLine;
1292     if (CurrentLine >= End)
1293       return false;
1294 
1295     // Check if the empty namespace is actually affected by changed ranges.
1296     if (!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange(
1297             AnnotatedLines[InitLine]->First->Tok.getLocation(),
1298             AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc())))
1299       return false;
1300 
1301     for (unsigned i = InitLine; i <= CurrentLine; ++i) {
1302       DeletedLines.insert(i);
1303     }
1304 
1305     return true;
1306   }
1307 
1308   // Checks pairs {start, start->next},..., {end->previous, end} and deletes one
1309   // of the token in the pair if the left token has \p LK token kind and the
1310   // right token has \p RK token kind. If \p DeleteLeft is true, the left token
1311   // is deleted on match; otherwise, the right token is deleted.
1312   template <typename LeftKind, typename RightKind>
1313   void cleanupPair(FormatToken *Start, LeftKind LK, RightKind RK,
1314                    bool DeleteLeft) {
1315     auto NextNotDeleted = [this](const FormatToken &Tok) -> FormatToken * {
1316       for (auto *Res = Tok.Next; Res; Res = Res->Next)
1317         if (!Res->is(tok::comment) &&
1318             DeletedTokens.find(Res) == DeletedTokens.end())
1319           return Res;
1320       return nullptr;
1321     };
1322     for (auto *Left = Start; Left;) {
1323       auto *Right = NextNotDeleted(*Left);
1324       if (!Right)
1325         break;
1326       if (Left->is(LK) && Right->is(RK)) {
1327         deleteToken(DeleteLeft ? Left : Right);
1328         for (auto *Tok = Left->Next; Tok && Tok != Right; Tok = Tok->Next)
1329           deleteToken(Tok);
1330         // If the right token is deleted, we should keep the left token
1331         // unchanged and pair it with the new right token.
1332         if (!DeleteLeft)
1333           continue;
1334       }
1335       Left = Right;
1336     }
1337   }
1338 
1339   template <typename LeftKind, typename RightKind>
1340   void cleanupLeft(FormatToken *Start, LeftKind LK, RightKind RK) {
1341     cleanupPair(Start, LK, RK, /*DeleteLeft=*/true);
1342   }
1343 
1344   template <typename LeftKind, typename RightKind>
1345   void cleanupRight(FormatToken *Start, LeftKind LK, RightKind RK) {
1346     cleanupPair(Start, LK, RK, /*DeleteLeft=*/false);
1347   }
1348 
1349   // Delete the given token.
1350   inline void deleteToken(FormatToken *Tok) {
1351     if (Tok)
1352       DeletedTokens.insert(Tok);
1353   }
1354 
1355   tooling::Replacements generateFixes() {
1356     tooling::Replacements Fixes;
1357     std::vector<FormatToken *> Tokens;
1358     std::copy(DeletedTokens.begin(), DeletedTokens.end(),
1359               std::back_inserter(Tokens));
1360 
1361     // Merge multiple continuous token deletions into one big deletion so that
1362     // the number of replacements can be reduced. This makes computing affected
1363     // ranges more efficient when we run reformat on the changed code.
1364     unsigned Idx = 0;
1365     while (Idx < Tokens.size()) {
1366       unsigned St = Idx, End = Idx;
1367       while ((End + 1) < Tokens.size() &&
1368              Tokens[End]->Next == Tokens[End + 1]) {
1369         End++;
1370       }
1371       auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
1372                                               Tokens[End]->Tok.getEndLoc());
1373       auto Err =
1374           Fixes.add(tooling::Replacement(Env.getSourceManager(), SR, ""));
1375       // FIXME: better error handling. for now just print error message and skip
1376       // for the release version.
1377       if (Err) {
1378         llvm::errs() << llvm::toString(std::move(Err)) << "\n";
1379         assert(false && "Fixes must not conflict!");
1380       }
1381       Idx = End + 1;
1382     }
1383 
1384     return Fixes;
1385   }
1386 
1387   // Class for less-than inequality comparason for the set `RedundantTokens`.
1388   // We store tokens in the order they appear in the translation unit so that
1389   // we do not need to sort them in `generateFixes()`.
1390   struct FormatTokenLess {
1391     FormatTokenLess(const SourceManager &SM) : SM(SM) {}
1392 
1393     bool operator()(const FormatToken *LHS, const FormatToken *RHS) const {
1394       return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(),
1395                                           RHS->Tok.getLocation());
1396     }
1397     const SourceManager &SM;
1398   };
1399 
1400   // Tokens to be deleted.
1401   std::set<FormatToken *, FormatTokenLess> DeletedTokens;
1402 };
1403 
1404 class ObjCHeaderStyleGuesser : public TokenAnalyzer {
1405 public:
1406   ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style)
1407       : TokenAnalyzer(Env, Style), IsObjC(false) {}
1408 
1409   std::pair<tooling::Replacements, unsigned>
1410   analyze(TokenAnnotator &Annotator,
1411           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1412           FormatTokenLexer &Tokens) override {
1413     assert(Style.Language == FormatStyle::LK_Cpp);
1414     IsObjC = guessIsObjC(AnnotatedLines, Tokens.getKeywords());
1415     tooling::Replacements Result;
1416     return {Result, 0};
1417   }
1418 
1419   bool isObjC() { return IsObjC; }
1420 
1421 private:
1422   static bool guessIsObjC(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1423                           const AdditionalKeywords &Keywords) {
1424     // Keep this array sorted, since we are binary searching over it.
1425     static constexpr llvm::StringLiteral FoundationIdentifiers[] = {
1426         "CGFloat",
1427         "NSAffineTransform",
1428         "NSArray",
1429         "NSAttributedString",
1430         "NSCache",
1431         "NSCharacterSet",
1432         "NSCountedSet",
1433         "NSData",
1434         "NSDataDetector",
1435         "NSDecimal",
1436         "NSDecimalNumber",
1437         "NSDictionary",
1438         "NSEdgeInsets",
1439         "NSHashTable",
1440         "NSIndexPath",
1441         "NSIndexSet",
1442         "NSInteger",
1443         "NSLocale",
1444         "NSMapTable",
1445         "NSMutableArray",
1446         "NSMutableAttributedString",
1447         "NSMutableCharacterSet",
1448         "NSMutableData",
1449         "NSMutableDictionary",
1450         "NSMutableIndexSet",
1451         "NSMutableOrderedSet",
1452         "NSMutableSet",
1453         "NSMutableString",
1454         "NSNumber",
1455         "NSNumberFormatter",
1456         "NSOrderedSet",
1457         "NSPoint",
1458         "NSPointerArray",
1459         "NSRange",
1460         "NSRect",
1461         "NSRegularExpression",
1462         "NSSet",
1463         "NSSize",
1464         "NSString",
1465         "NSUInteger",
1466         "NSURL",
1467         "NSURLComponents",
1468         "NSURLQueryItem",
1469         "NSUUID",
1470     };
1471 
1472     for (auto &Line : AnnotatedLines) {
1473       for (FormatToken *FormatTok = Line->First->Next; FormatTok;
1474            FormatTok = FormatTok->Next) {
1475         if ((FormatTok->Previous->is(tok::at) &&
1476              (FormatTok->isObjCAtKeyword(tok::objc_interface) ||
1477               FormatTok->isObjCAtKeyword(tok::objc_implementation) ||
1478               FormatTok->isObjCAtKeyword(tok::objc_protocol) ||
1479               FormatTok->isObjCAtKeyword(tok::objc_end) ||
1480               FormatTok->isOneOf(tok::numeric_constant, tok::l_square,
1481                                  tok::l_brace))) ||
1482             (FormatTok->Tok.isAnyIdentifier() &&
1483              std::binary_search(std::begin(FoundationIdentifiers),
1484                                 std::end(FoundationIdentifiers),
1485                                 FormatTok->TokenText)) ||
1486             FormatTok->is(TT_ObjCStringLiteral) ||
1487             FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1488                                TT_ObjCBlockLBrace, TT_ObjCBlockLParen,
1489                                TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr,
1490                                TT_ObjCMethodSpecifier, TT_ObjCProperty)) {
1491           return true;
1492         }
1493       }
1494     }
1495     return false;
1496   }
1497 
1498   bool IsObjC;
1499 };
1500 
1501 struct IncludeDirective {
1502   StringRef Filename;
1503   StringRef Text;
1504   unsigned Offset;
1505   int Category;
1506 };
1507 
1508 } // end anonymous namespace
1509 
1510 // Determines whether 'Ranges' intersects with ('Start', 'End').
1511 static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
1512                          unsigned End) {
1513   for (auto Range : Ranges) {
1514     if (Range.getOffset() < End &&
1515         Range.getOffset() + Range.getLength() > Start)
1516       return true;
1517   }
1518   return false;
1519 }
1520 
1521 // Returns a pair (Index, OffsetToEOL) describing the position of the cursor
1522 // before sorting/deduplicating. Index is the index of the include under the
1523 // cursor in the original set of includes. If this include has duplicates, it is
1524 // the index of the first of the duplicates as the others are going to be
1525 // removed. OffsetToEOL describes the cursor's position relative to the end of
1526 // its current line.
1527 // If `Cursor` is not on any #include, `Index` will be UINT_MAX.
1528 static std::pair<unsigned, unsigned>
1529 FindCursorIndex(const SmallVectorImpl<IncludeDirective> &Includes,
1530                 const SmallVectorImpl<unsigned> &Indices, unsigned Cursor) {
1531   unsigned CursorIndex = UINT_MAX;
1532   unsigned OffsetToEOL = 0;
1533   for (int i = 0, e = Includes.size(); i != e; ++i) {
1534     unsigned Start = Includes[Indices[i]].Offset;
1535     unsigned End = Start + Includes[Indices[i]].Text.size();
1536     if (!(Cursor >= Start && Cursor < End))
1537       continue;
1538     CursorIndex = Indices[i];
1539     OffsetToEOL = End - Cursor;
1540     // Put the cursor on the only remaining #include among the duplicate
1541     // #includes.
1542     while (--i >= 0 && Includes[CursorIndex].Text == Includes[Indices[i]].Text)
1543       CursorIndex = i;
1544     break;
1545   }
1546   return std::make_pair(CursorIndex, OffsetToEOL);
1547 }
1548 
1549 // Sorts and deduplicate a block of includes given by 'Includes' alphabetically
1550 // adding the necessary replacement to 'Replaces'. 'Includes' must be in strict
1551 // source order.
1552 // #include directives with the same text will be deduplicated, and only the
1553 // first #include in the duplicate #includes remains. If the `Cursor` is
1554 // provided and put on a deleted #include, it will be moved to the remaining
1555 // #include in the duplicate #includes.
1556 static void sortCppIncludes(const FormatStyle &Style,
1557                             const SmallVectorImpl<IncludeDirective> &Includes,
1558                             ArrayRef<tooling::Range> Ranges, StringRef FileName,
1559                             tooling::Replacements &Replaces, unsigned *Cursor) {
1560   unsigned IncludesBeginOffset = Includes.front().Offset;
1561   unsigned IncludesEndOffset =
1562       Includes.back().Offset + Includes.back().Text.size();
1563   unsigned IncludesBlockSize = IncludesEndOffset - IncludesBeginOffset;
1564   if (!affectsRange(Ranges, IncludesBeginOffset, IncludesEndOffset))
1565     return;
1566   SmallVector<unsigned, 16> Indices;
1567   for (unsigned i = 0, e = Includes.size(); i != e; ++i)
1568     Indices.push_back(i);
1569   std::stable_sort(
1570       Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
1571         return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
1572                std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
1573       });
1574   // The index of the include on which the cursor will be put after
1575   // sorting/deduplicating.
1576   unsigned CursorIndex;
1577   // The offset from cursor to the end of line.
1578   unsigned CursorToEOLOffset;
1579   if (Cursor)
1580     std::tie(CursorIndex, CursorToEOLOffset) =
1581         FindCursorIndex(Includes, Indices, *Cursor);
1582 
1583   // Deduplicate #includes.
1584   Indices.erase(std::unique(Indices.begin(), Indices.end(),
1585                             [&](unsigned LHSI, unsigned RHSI) {
1586                               return Includes[LHSI].Text == Includes[RHSI].Text;
1587                             }),
1588                 Indices.end());
1589 
1590   int CurrentCategory = Includes.front().Category;
1591 
1592   // If the #includes are out of order, we generate a single replacement fixing
1593   // the entire block. Otherwise, no replacement is generated.
1594   if (Indices.size() == Includes.size() &&
1595       std::is_sorted(Indices.begin(), Indices.end()) &&
1596       Style.IncludeBlocks == FormatStyle::IBS_Preserve)
1597     return;
1598 
1599   std::string result;
1600   for (unsigned Index : Indices) {
1601     if (!result.empty()) {
1602       result += "\n";
1603       if (Style.IncludeBlocks == FormatStyle::IBS_Regroup &&
1604           CurrentCategory != Includes[Index].Category)
1605         result += "\n";
1606     }
1607     result += Includes[Index].Text;
1608     if (Cursor && CursorIndex == Index)
1609       *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset;
1610     CurrentCategory = Includes[Index].Category;
1611   }
1612 
1613   auto Err = Replaces.add(tooling::Replacement(
1614       FileName, Includes.front().Offset, IncludesBlockSize, result));
1615   // FIXME: better error handling. For now, just skip the replacement for the
1616   // release version.
1617   if (Err) {
1618     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
1619     assert(false);
1620   }
1621 }
1622 
1623 namespace {
1624 
1625 // This class manages priorities of #include categories and calculates
1626 // priorities for headers.
1627 class IncludeCategoryManager {
1628 public:
1629   IncludeCategoryManager(const FormatStyle &Style, StringRef FileName)
1630       : Style(Style), FileName(FileName) {
1631     FileStem = llvm::sys::path::stem(FileName);
1632     for (const auto &Category : Style.IncludeCategories)
1633       CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase);
1634     IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
1635                  FileName.endswith(".cpp") || FileName.endswith(".c++") ||
1636                  FileName.endswith(".cxx") || FileName.endswith(".m") ||
1637                  FileName.endswith(".mm");
1638   }
1639 
1640   // Returns the priority of the category which \p IncludeName belongs to.
1641   // If \p CheckMainHeader is true and \p IncludeName is a main header, returns
1642   // 0. Otherwise, returns the priority of the matching category or INT_MAX.
1643   int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) {
1644     int Ret = INT_MAX;
1645     for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
1646       if (CategoryRegexs[i].match(IncludeName)) {
1647         Ret = Style.IncludeCategories[i].Priority;
1648         break;
1649       }
1650     if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
1651       Ret = 0;
1652     return Ret;
1653   }
1654 
1655 private:
1656   bool isMainHeader(StringRef IncludeName) const {
1657     if (!IncludeName.startswith("\""))
1658       return false;
1659     StringRef HeaderStem =
1660         llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
1661     if (FileStem.startswith(HeaderStem) ||
1662         FileStem.startswith_lower(HeaderStem)) {
1663       llvm::Regex MainIncludeRegex(
1664           (HeaderStem + Style.IncludeIsMainRegex).str(),
1665           llvm::Regex::IgnoreCase);
1666       if (MainIncludeRegex.match(FileStem))
1667         return true;
1668     }
1669     return false;
1670   }
1671 
1672   const FormatStyle &Style;
1673   bool IsMainFile;
1674   StringRef FileName;
1675   StringRef FileStem;
1676   SmallVector<llvm::Regex, 4> CategoryRegexs;
1677 };
1678 
1679 const char IncludeRegexPattern[] =
1680     R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
1681 
1682 } // anonymous namespace
1683 
1684 tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
1685                                       ArrayRef<tooling::Range> Ranges,
1686                                       StringRef FileName,
1687                                       tooling::Replacements &Replaces,
1688                                       unsigned *Cursor) {
1689   unsigned Prev = 0;
1690   unsigned SearchFrom = 0;
1691   llvm::Regex IncludeRegex(IncludeRegexPattern);
1692   SmallVector<StringRef, 4> Matches;
1693   SmallVector<IncludeDirective, 16> IncludesInBlock;
1694 
1695   // In compiled files, consider the first #include to be the main #include of
1696   // the file if it is not a system #include. This ensures that the header
1697   // doesn't have hidden dependencies
1698   // (http://llvm.org/docs/CodingStandards.html#include-style).
1699   //
1700   // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
1701   // cases where the first #include is unlikely to be the main header.
1702   IncludeCategoryManager Categories(Style, FileName);
1703   bool FirstIncludeBlock = true;
1704   bool MainIncludeFound = false;
1705   bool FormattingOff = false;
1706 
1707   for (;;) {
1708     auto Pos = Code.find('\n', SearchFrom);
1709     StringRef Line =
1710         Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
1711 
1712     StringRef Trimmed = Line.trim();
1713     if (Trimmed == "// clang-format off")
1714       FormattingOff = true;
1715     else if (Trimmed == "// clang-format on")
1716       FormattingOff = false;
1717 
1718     const bool EmptyLineSkipped =
1719         Trimmed.empty() && (Style.IncludeBlocks == FormatStyle::IBS_Merge ||
1720                             Style.IncludeBlocks == FormatStyle::IBS_Regroup);
1721 
1722     if (!FormattingOff && !Line.endswith("\\")) {
1723       if (IncludeRegex.match(Line, &Matches)) {
1724         StringRef IncludeName = Matches[2];
1725         int Category = Categories.getIncludePriority(
1726             IncludeName,
1727             /*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock);
1728         if (Category == 0)
1729           MainIncludeFound = true;
1730         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
1731       } else if (!IncludesInBlock.empty() && !EmptyLineSkipped) {
1732         sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
1733                         Cursor);
1734         IncludesInBlock.clear();
1735         FirstIncludeBlock = false;
1736       }
1737       Prev = Pos + 1;
1738     }
1739     if (Pos == StringRef::npos || Pos + 1 == Code.size())
1740       break;
1741     SearchFrom = Pos + 1;
1742   }
1743   if (!IncludesInBlock.empty())
1744     sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
1745   return Replaces;
1746 }
1747 
1748 bool isMpegTS(StringRef Code) {
1749   // MPEG transport streams use the ".ts" file extension. clang-format should
1750   // not attempt to format those. MPEG TS' frame format starts with 0x47 every
1751   // 189 bytes - detect that and return.
1752   return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47;
1753 }
1754 
1755 bool isLikelyXml(StringRef Code) { return Code.ltrim().startswith("<"); }
1756 
1757 tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
1758                                    ArrayRef<tooling::Range> Ranges,
1759                                    StringRef FileName, unsigned *Cursor) {
1760   tooling::Replacements Replaces;
1761   if (!Style.SortIncludes)
1762     return Replaces;
1763   if (isLikelyXml(Code))
1764     return Replaces;
1765   if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript &&
1766       isMpegTS(Code))
1767     return Replaces;
1768   if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript)
1769     return sortJavaScriptImports(Style, Code, Ranges, FileName);
1770   sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor);
1771   return Replaces;
1772 }
1773 
1774 template <typename T>
1775 static llvm::Expected<tooling::Replacements>
1776 processReplacements(T ProcessFunc, StringRef Code,
1777                     const tooling::Replacements &Replaces,
1778                     const FormatStyle &Style) {
1779   if (Replaces.empty())
1780     return tooling::Replacements();
1781 
1782   auto NewCode = applyAllReplacements(Code, Replaces);
1783   if (!NewCode)
1784     return NewCode.takeError();
1785   std::vector<tooling::Range> ChangedRanges = Replaces.getAffectedRanges();
1786   StringRef FileName = Replaces.begin()->getFilePath();
1787 
1788   tooling::Replacements FormatReplaces =
1789       ProcessFunc(Style, *NewCode, ChangedRanges, FileName);
1790 
1791   return Replaces.merge(FormatReplaces);
1792 }
1793 
1794 llvm::Expected<tooling::Replacements>
1795 formatReplacements(StringRef Code, const tooling::Replacements &Replaces,
1796                    const FormatStyle &Style) {
1797   // We need to use lambda function here since there are two versions of
1798   // `sortIncludes`.
1799   auto SortIncludes = [](const FormatStyle &Style, StringRef Code,
1800                          std::vector<tooling::Range> Ranges,
1801                          StringRef FileName) -> tooling::Replacements {
1802     return sortIncludes(Style, Code, Ranges, FileName);
1803   };
1804   auto SortedReplaces =
1805       processReplacements(SortIncludes, Code, Replaces, Style);
1806   if (!SortedReplaces)
1807     return SortedReplaces.takeError();
1808 
1809   // We need to use lambda function here since there are two versions of
1810   // `reformat`.
1811   auto Reformat = [](const FormatStyle &Style, StringRef Code,
1812                      std::vector<tooling::Range> Ranges,
1813                      StringRef FileName) -> tooling::Replacements {
1814     return reformat(Style, Code, Ranges, FileName);
1815   };
1816   return processReplacements(Reformat, Code, *SortedReplaces, Style);
1817 }
1818 
1819 namespace {
1820 
1821 inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
1822   return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 &&
1823          llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
1824 }
1825 
1826 inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
1827   return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
1828 }
1829 
1830 // Returns the offset after skipping a sequence of tokens, matched by \p
1831 // GetOffsetAfterSequence, from the start of the code.
1832 // \p GetOffsetAfterSequence should be a function that matches a sequence of
1833 // tokens and returns an offset after the sequence.
1834 unsigned getOffsetAfterTokenSequence(
1835     StringRef FileName, StringRef Code, const FormatStyle &Style,
1836     llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
1837         GetOffsetAfterSequence) {
1838   std::unique_ptr<Environment> Env =
1839       Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
1840   const SourceManager &SourceMgr = Env->getSourceManager();
1841   Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
1842             getFormattingLangOpts(Style));
1843   Token Tok;
1844   // Get the first token.
1845   Lex.LexFromRawLexer(Tok);
1846   return GetOffsetAfterSequence(SourceMgr, Lex, Tok);
1847 }
1848 
1849 // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
1850 // \p Tok will be the token after this directive; otherwise, it can be any token
1851 // after the given \p Tok (including \p Tok).
1852 bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) {
1853   bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
1854                  Tok.is(tok::raw_identifier) &&
1855                  Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
1856                  Tok.is(tok::raw_identifier);
1857   if (Matched)
1858     Lex.LexFromRawLexer(Tok);
1859   return Matched;
1860 }
1861 
1862 void skipComments(Lexer &Lex, Token &Tok) {
1863   while (Tok.is(tok::comment))
1864     if (Lex.LexFromRawLexer(Tok))
1865       return;
1866 }
1867 
1868 // Returns the offset after header guard directives and any comments
1869 // before/after header guards. If no header guard presents in the code, this
1870 // will returns the offset after skipping all comments from the start of the
1871 // code.
1872 unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
1873                                                StringRef Code,
1874                                                const FormatStyle &Style) {
1875   return getOffsetAfterTokenSequence(
1876       FileName, Code, Style,
1877       [](const SourceManager &SM, Lexer &Lex, Token Tok) {
1878         skipComments(Lex, Tok);
1879         unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
1880         if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
1881           skipComments(Lex, Tok);
1882           if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
1883             return SM.getFileOffset(Tok.getLocation());
1884         }
1885         return InitialOffset;
1886       });
1887 }
1888 
1889 // Check if a sequence of tokens is like
1890 //    "#include ("header.h" | <header.h>)".
1891 // If it is, \p Tok will be the token after this directive; otherwise, it can be
1892 // any token after the given \p Tok (including \p Tok).
1893 bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
1894   auto Matched = [&]() {
1895     Lex.LexFromRawLexer(Tok);
1896     return true;
1897   };
1898   if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
1899       Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
1900     if (Lex.LexFromRawLexer(Tok))
1901       return false;
1902     if (Tok.is(tok::string_literal))
1903       return Matched();
1904     if (Tok.is(tok::less)) {
1905       while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
1906       }
1907       if (Tok.is(tok::greater))
1908         return Matched();
1909     }
1910   }
1911   return false;
1912 }
1913 
1914 // Returns the offset of the last #include directive after which a new
1915 // #include can be inserted. This ignores #include's after the #include block(s)
1916 // in the beginning of a file to avoid inserting headers into code sections
1917 // where new #include's should not be added by default.
1918 // These code sections include:
1919 //      - raw string literals (containing #include).
1920 //      - #if blocks.
1921 //      - Special #include's among declarations (e.g. functions).
1922 //
1923 // If no #include after which a new #include can be inserted, this returns the
1924 // offset after skipping all comments from the start of the code.
1925 // Inserting after an #include is not allowed if it comes after code that is not
1926 // #include (e.g. pre-processing directive that is not #include, declarations).
1927 unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
1928                                      const FormatStyle &Style) {
1929   return getOffsetAfterTokenSequence(
1930       FileName, Code, Style,
1931       [](const SourceManager &SM, Lexer &Lex, Token Tok) {
1932         skipComments(Lex, Tok);
1933         unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
1934         while (checkAndConsumeInclusiveDirective(Lex, Tok))
1935           MaxOffset = SM.getFileOffset(Tok.getLocation());
1936         return MaxOffset;
1937       });
1938 }
1939 
1940 bool isDeletedHeader(llvm::StringRef HeaderName,
1941                      const std::set<llvm::StringRef> &HeadersToDelete) {
1942   return HeadersToDelete.count(HeaderName) ||
1943          HeadersToDelete.count(HeaderName.trim("\"<>"));
1944 }
1945 
1946 // FIXME: insert empty lines between newly created blocks.
1947 tooling::Replacements
1948 fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
1949                         const FormatStyle &Style) {
1950   if (!Style.isCpp())
1951     return Replaces;
1952 
1953   tooling::Replacements HeaderInsertions;
1954   std::set<llvm::StringRef> HeadersToDelete;
1955   tooling::Replacements Result;
1956   for (const auto &R : Replaces) {
1957     if (isHeaderInsertion(R)) {
1958       // Replacements from \p Replaces must be conflict-free already, so we can
1959       // simply consume the error.
1960       llvm::consumeError(HeaderInsertions.add(R));
1961     } else if (isHeaderDeletion(R)) {
1962       HeadersToDelete.insert(R.getReplacementText());
1963     } else if (R.getOffset() == UINT_MAX) {
1964       llvm::errs() << "Insertions other than header #include insertion are "
1965                       "not supported! "
1966                    << R.getReplacementText() << "\n";
1967     } else {
1968       llvm::consumeError(Result.add(R));
1969     }
1970   }
1971   if (HeaderInsertions.empty() && HeadersToDelete.empty())
1972     return Replaces;
1973 
1974   llvm::Regex IncludeRegex(IncludeRegexPattern);
1975   llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)");
1976   SmallVector<StringRef, 4> Matches;
1977 
1978   StringRef FileName = Replaces.begin()->getFilePath();
1979   IncludeCategoryManager Categories(Style, FileName);
1980 
1981   // Record the offset of the end of the last include in each category.
1982   std::map<int, int> CategoryEndOffsets;
1983   // All possible priorities.
1984   // Add 0 for main header and INT_MAX for headers that are not in any category.
1985   std::set<int> Priorities = {0, INT_MAX};
1986   for (const auto &Category : Style.IncludeCategories)
1987     Priorities.insert(Category.Priority);
1988   int FirstIncludeOffset = -1;
1989   // All new headers should be inserted after this offset.
1990   unsigned MinInsertOffset =
1991       getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
1992   StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
1993   // Max insertion offset in the original code.
1994   unsigned MaxInsertOffset =
1995       MinInsertOffset +
1996       getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style);
1997   SmallVector<StringRef, 32> Lines;
1998   TrimmedCode.split(Lines, '\n');
1999   unsigned Offset = MinInsertOffset;
2000   unsigned NextLineOffset;
2001   std::set<StringRef> ExistingIncludes;
2002   for (auto Line : Lines) {
2003     NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
2004     if (IncludeRegex.match(Line, &Matches)) {
2005       // The header name with quotes or angle brackets.
2006       StringRef IncludeName = Matches[2];
2007       ExistingIncludes.insert(IncludeName);
2008       // Only record the offset of current #include if we can insert after it.
2009       if (Offset <= MaxInsertOffset) {
2010         int Category = Categories.getIncludePriority(
2011             IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
2012         CategoryEndOffsets[Category] = NextLineOffset;
2013         if (FirstIncludeOffset < 0)
2014           FirstIncludeOffset = Offset;
2015       }
2016       if (isDeletedHeader(IncludeName, HeadersToDelete)) {
2017         // If this is the last line without trailing newline, we need to make
2018         // sure we don't delete across the file boundary.
2019         unsigned Length = std::min(Line.size() + 1, Code.size() - Offset);
2020         llvm::Error Err =
2021             Result.add(tooling::Replacement(FileName, Offset, Length, ""));
2022         if (Err) {
2023           // Ignore the deletion on conflict.
2024           llvm::errs() << "Failed to add header deletion replacement for "
2025                        << IncludeName << ": " << llvm::toString(std::move(Err))
2026                        << "\n";
2027         }
2028       }
2029     }
2030     Offset = NextLineOffset;
2031   }
2032 
2033   // Populate CategoryEndOfssets:
2034   // - Ensure that CategoryEndOffset[Highest] is always populated.
2035   // - If CategoryEndOffset[Priority] isn't set, use the next higher value that
2036   //   is set, up to CategoryEndOffset[Highest].
2037   auto Highest = Priorities.begin();
2038   if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
2039     if (FirstIncludeOffset >= 0)
2040       CategoryEndOffsets[*Highest] = FirstIncludeOffset;
2041     else
2042       CategoryEndOffsets[*Highest] = MinInsertOffset;
2043   }
2044   // By this point, CategoryEndOffset[Highest] is always set appropriately:
2045   //  - to an appropriate location before/after existing #includes, or
2046   //  - to right after the header guard, or
2047   //  - to the beginning of the file.
2048   for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
2049     if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
2050       CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
2051 
2052   bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n';
2053   for (const auto &R : HeaderInsertions) {
2054     auto IncludeDirective = R.getReplacementText();
2055     bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
2056     assert(Matched && "Header insertion replacement must have replacement text "
2057                       "'#include ...'");
2058     (void)Matched;
2059     auto IncludeName = Matches[2];
2060     if (ExistingIncludes.find(IncludeName) != ExistingIncludes.end()) {
2061       DEBUG(llvm::dbgs() << "Skip adding existing include : " << IncludeName
2062                          << "\n");
2063       continue;
2064     }
2065     int Category =
2066         Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true);
2067     Offset = CategoryEndOffsets[Category];
2068     std::string NewInclude = !IncludeDirective.endswith("\n")
2069                                  ? (IncludeDirective + "\n").str()
2070                                  : IncludeDirective.str();
2071     // When inserting headers at end of the code, also append '\n' to the code
2072     // if it does not end with '\n'.
2073     if (NeedNewLineAtEnd && Offset == Code.size()) {
2074       NewInclude = "\n" + NewInclude;
2075       NeedNewLineAtEnd = false;
2076     }
2077     auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude);
2078     auto Err = Result.add(NewReplace);
2079     if (Err) {
2080       llvm::consumeError(std::move(Err));
2081       unsigned NewOffset = Result.getShiftedCodePosition(Offset);
2082       NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude);
2083       Result = Result.merge(tooling::Replacements(NewReplace));
2084     }
2085   }
2086   return Result;
2087 }
2088 
2089 } // anonymous namespace
2090 
2091 llvm::Expected<tooling::Replacements>
2092 cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
2093                           const FormatStyle &Style) {
2094   // We need to use lambda function here since there are two versions of
2095   // `cleanup`.
2096   auto Cleanup = [](const FormatStyle &Style, StringRef Code,
2097                     std::vector<tooling::Range> Ranges,
2098                     StringRef FileName) -> tooling::Replacements {
2099     return cleanup(Style, Code, Ranges, FileName);
2100   };
2101   // Make header insertion replacements insert new headers into correct blocks.
2102   tooling::Replacements NewReplaces =
2103       fixCppIncludeInsertions(Code, Replaces, Style);
2104   return processReplacements(Cleanup, Code, NewReplaces, Style);
2105 }
2106 
2107 namespace internal {
2108 std::pair<tooling::Replacements, unsigned>
2109 reformat(const FormatStyle &Style, StringRef Code,
2110          ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
2111          unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,
2112          FormattingAttemptStatus *Status) {
2113   FormatStyle Expanded = expandPresets(Style);
2114   if (Expanded.DisableFormat)
2115     return {tooling::Replacements(), 0};
2116   if (isLikelyXml(Code))
2117     return {tooling::Replacements(), 0};
2118   if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))
2119     return {tooling::Replacements(), 0};
2120 
2121   typedef std::function<std::pair<tooling::Replacements, unsigned>(
2122       const Environment &)>
2123       AnalyzerPass;
2124   SmallVector<AnalyzerPass, 4> Passes;
2125 
2126   if (Style.Language == FormatStyle::LK_Cpp) {
2127     if (Style.FixNamespaceComments)
2128       Passes.emplace_back([&](const Environment &Env) {
2129         return NamespaceEndCommentsFixer(Env, Expanded).process();
2130       });
2131 
2132     if (Style.SortUsingDeclarations)
2133       Passes.emplace_back([&](const Environment &Env) {
2134         return UsingDeclarationsSorter(Env, Expanded).process();
2135       });
2136   }
2137 
2138   if (Style.Language == FormatStyle::LK_JavaScript &&
2139       Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
2140     Passes.emplace_back([&](const Environment &Env) {
2141       return JavaScriptRequoter(Env, Expanded).process();
2142     });
2143 
2144   Passes.emplace_back([&](const Environment &Env) {
2145     return Formatter(Env, Expanded, Status).process();
2146   });
2147 
2148   std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(
2149       Code, FileName, Ranges, FirstStartColumn, NextStartColumn,
2150       LastStartColumn);
2151   llvm::Optional<std::string> CurrentCode = None;
2152   tooling::Replacements Fixes;
2153   unsigned Penalty = 0;
2154   for (size_t I = 0, E = Passes.size(); I < E; ++I) {
2155     std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env);
2156     auto NewCode = applyAllReplacements(
2157         CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first);
2158     if (NewCode) {
2159       Fixes = Fixes.merge(PassFixes.first);
2160       Penalty += PassFixes.second;
2161       if (I + 1 < E) {
2162         CurrentCode = std::move(*NewCode);
2163         Env = Environment::CreateVirtualEnvironment(
2164             *CurrentCode, FileName,
2165             tooling::calculateRangesAfterReplacements(Fixes, Ranges),
2166             FirstStartColumn, NextStartColumn, LastStartColumn);
2167       }
2168     }
2169   }
2170 
2171   return {Fixes, Penalty};
2172 }
2173 } // namespace internal
2174 
2175 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
2176                                ArrayRef<tooling::Range> Ranges,
2177                                StringRef FileName,
2178                                FormattingAttemptStatus *Status) {
2179   return internal::reformat(Style, Code, Ranges,
2180                             /*FirstStartColumn=*/0,
2181                             /*NextStartColumn=*/0,
2182                             /*LastStartColumn=*/0, FileName, Status)
2183       .first;
2184 }
2185 
2186 tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
2187                               ArrayRef<tooling::Range> Ranges,
2188                               StringRef FileName) {
2189   // cleanups only apply to C++ (they mostly concern ctor commas etc.)
2190   if (Style.Language != FormatStyle::LK_Cpp)
2191     return tooling::Replacements();
2192   std::unique_ptr<Environment> Env =
2193       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
2194   Cleaner Clean(*Env, Style);
2195   return Clean.process().first;
2196 }
2197 
2198 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
2199                                ArrayRef<tooling::Range> Ranges,
2200                                StringRef FileName, bool *IncompleteFormat) {
2201   FormattingAttemptStatus Status;
2202   auto Result = reformat(Style, Code, Ranges, FileName, &Status);
2203   if (!Status.FormatComplete)
2204     *IncompleteFormat = true;
2205   return Result;
2206 }
2207 
2208 tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
2209                                               StringRef Code,
2210                                               ArrayRef<tooling::Range> Ranges,
2211                                               StringRef FileName) {
2212   std::unique_ptr<Environment> Env =
2213       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
2214   NamespaceEndCommentsFixer Fix(*Env, Style);
2215   return Fix.process().first;
2216 }
2217 
2218 tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
2219                                             StringRef Code,
2220                                             ArrayRef<tooling::Range> Ranges,
2221                                             StringRef FileName) {
2222   std::unique_ptr<Environment> Env =
2223       Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
2224   UsingDeclarationsSorter Sorter(*Env, Style);
2225   return Sorter.process().first;
2226 }
2227 
2228 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
2229   LangOptions LangOpts;
2230   LangOpts.CPlusPlus = 1;
2231   LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2232   LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2233   LangOpts.CPlusPlus17 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2234   LangOpts.CPlusPlus2a = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2235   LangOpts.LineComment = 1;
2236   bool AlternativeOperators = Style.isCpp();
2237   LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
2238   LangOpts.Bool = 1;
2239   LangOpts.ObjC1 = 1;
2240   LangOpts.ObjC2 = 1;
2241   LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
2242   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
2243   return LangOpts;
2244 }
2245 
2246 const char *StyleOptionHelpDescription =
2247     "Coding style, currently supports:\n"
2248     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
2249     "Use -style=file to load style configuration from\n"
2250     ".clang-format file located in one of the parent\n"
2251     "directories of the source file (or current\n"
2252     "directory for stdin).\n"
2253     "Use -style=\"{key: value, ...}\" to set specific\n"
2254     "parameters, e.g.:\n"
2255     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
2256 
2257 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
2258   if (FileName.endswith(".java"))
2259     return FormatStyle::LK_Java;
2260   if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
2261     return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
2262   if (FileName.endswith(".m") || FileName.endswith(".mm"))
2263     return FormatStyle::LK_ObjC;
2264   if (FileName.endswith_lower(".proto") ||
2265       FileName.endswith_lower(".protodevel"))
2266     return FormatStyle::LK_Proto;
2267   if (FileName.endswith_lower(".textpb") ||
2268       FileName.endswith_lower(".pb.txt") ||
2269       FileName.endswith_lower(".textproto") ||
2270       FileName.endswith_lower(".asciipb"))
2271     return FormatStyle::LK_TextProto;
2272   if (FileName.endswith_lower(".td"))
2273     return FormatStyle::LK_TableGen;
2274   return FormatStyle::LK_Cpp;
2275 }
2276 
2277 llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,
2278                                      StringRef FallbackStyleName,
2279                                      StringRef Code, vfs::FileSystem *FS) {
2280   if (!FS) {
2281     FS = vfs::getRealFileSystem().get();
2282   }
2283   FormatStyle Style = getLLVMStyle();
2284   Style.Language = getLanguageByFileName(FileName);
2285 
2286   if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h")) {
2287     std::unique_ptr<Environment> Env =
2288         Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
2289     ObjCHeaderStyleGuesser Guesser(*Env, Style);
2290     Guesser.process();
2291     if (Guesser.isObjC()) {
2292       Style.Language = FormatStyle::LK_ObjC;
2293     }
2294   }
2295 
2296   FormatStyle FallbackStyle = getNoStyle();
2297   if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle))
2298     return make_string_error("Invalid fallback style \"" + FallbackStyleName);
2299 
2300   if (StyleName.startswith("{")) {
2301     // Parse YAML/JSON style from the command line.
2302     if (std::error_code ec = parseConfiguration(StyleName, &Style))
2303       return make_string_error("Error parsing -style: " + ec.message());
2304     return Style;
2305   }
2306 
2307   if (!StyleName.equals_lower("file")) {
2308     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
2309       return make_string_error("Invalid value for -style");
2310     return Style;
2311   }
2312 
2313   // Look for .clang-format/_clang-format file in the file's parent directories.
2314   SmallString<128> UnsuitableConfigFiles;
2315   SmallString<128> Path(FileName);
2316   if (std::error_code EC = FS->makeAbsolute(Path))
2317     return make_string_error(EC.message());
2318 
2319   for (StringRef Directory = Path; !Directory.empty();
2320        Directory = llvm::sys::path::parent_path(Directory)) {
2321 
2322     auto Status = FS->status(Directory);
2323     if (!Status ||
2324         Status->getType() != llvm::sys::fs::file_type::directory_file) {
2325       continue;
2326     }
2327 
2328     SmallString<128> ConfigFile(Directory);
2329 
2330     llvm::sys::path::append(ConfigFile, ".clang-format");
2331     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2332 
2333     Status = FS->status(ConfigFile.str());
2334     bool FoundConfigFile =
2335         Status && (Status->getType() == llvm::sys::fs::file_type::regular_file);
2336     if (!FoundConfigFile) {
2337       // Try _clang-format too, since dotfiles are not commonly used on Windows.
2338       ConfigFile = Directory;
2339       llvm::sys::path::append(ConfigFile, "_clang-format");
2340       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2341       Status = FS->status(ConfigFile.str());
2342       FoundConfigFile = Status && (Status->getType() ==
2343                                    llvm::sys::fs::file_type::regular_file);
2344     }
2345 
2346     if (FoundConfigFile) {
2347       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
2348           FS->getBufferForFile(ConfigFile.str());
2349       if (std::error_code EC = Text.getError())
2350         return make_string_error(EC.message());
2351       if (std::error_code ec =
2352               parseConfiguration(Text.get()->getBuffer(), &Style)) {
2353         if (ec == ParseError::Unsuitable) {
2354           if (!UnsuitableConfigFiles.empty())
2355             UnsuitableConfigFiles.append(", ");
2356           UnsuitableConfigFiles.append(ConfigFile);
2357           continue;
2358         }
2359         return make_string_error("Error reading " + ConfigFile + ": " +
2360                                  ec.message());
2361       }
2362       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2363       return Style;
2364     }
2365   }
2366   if (!UnsuitableConfigFiles.empty())
2367     return make_string_error("Configuration file(s) do(es) not support " +
2368                              getLanguageName(Style.Language) + ": " +
2369                              UnsuitableConfigFiles);
2370   return FallbackStyle;
2371 }
2372 
2373 } // namespace format
2374 } // namespace clang
2375