1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Format/Format.h"
17 #include "ContinuationIndenter.h"
18 #include "TokenAnnotator.h"
19 #include "UnwrappedLineFormatter.h"
20 #include "UnwrappedLineParser.h"
21 #include "WhitespaceManager.h"
22 #include "clang/Basic/Diagnostic.h"
23 #include "clang/Basic/DiagnosticOptions.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/VirtualFileSystem.h"
26 #include "clang/Lex/Lexer.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/Support/Allocator.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Support/Regex.h"
32 #include "llvm/Support/YAMLTraits.h"
33 #include <queue>
34 #include <string>
35 
36 #define DEBUG_TYPE "format-formatter"
37 
38 using clang::format::FormatStyle;
39 
40 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
41 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
42 
43 namespace llvm {
44 namespace yaml {
45 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
46   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
47     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
48     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
49     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
50     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
51     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
52   }
53 };
54 
55 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
56   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
57     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
58     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
59     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
60     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
61     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
62   }
63 };
64 
65 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
66   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
67     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
68     IO.enumCase(Value, "false", FormatStyle::UT_Never);
69     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
70     IO.enumCase(Value, "true", FormatStyle::UT_Always);
71     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
72     IO.enumCase(Value, "ForContinuationAndIndentation",
73                 FormatStyle::UT_ForContinuationAndIndentation);
74   }
75 };
76 
77 template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
78   static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
79     IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
80     IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
81     IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
82   }
83 };
84 
85 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
86   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
87     IO.enumCase(Value, "None", FormatStyle::SFS_None);
88     IO.enumCase(Value, "false", FormatStyle::SFS_None);
89     IO.enumCase(Value, "All", FormatStyle::SFS_All);
90     IO.enumCase(Value, "true", FormatStyle::SFS_All);
91     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
92     IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
93   }
94 };
95 
96 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
97   static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
98     IO.enumCase(Value, "All", FormatStyle::BOS_All);
99     IO.enumCase(Value, "true", FormatStyle::BOS_All);
100     IO.enumCase(Value, "None", FormatStyle::BOS_None);
101     IO.enumCase(Value, "false", FormatStyle::BOS_None);
102     IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
103   }
104 };
105 
106 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
107   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
108     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
109     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
110     IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
111     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
112     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
113     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
114     IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
115     IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
116   }
117 };
118 
119 template <>
120 struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
121   static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
122     IO.enumCase(Value, "None", FormatStyle::RTBS_None);
123     IO.enumCase(Value, "All", FormatStyle::RTBS_All);
124     IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
125     IO.enumCase(Value, "TopLevelDefinitions",
126                 FormatStyle::RTBS_TopLevelDefinitions);
127     IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
128   }
129 };
130 
131 template <>
132 struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
133   static void
134   enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
135     IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
136     IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
137     IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
138 
139     // For backward compatibility.
140     IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
141     IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
142   }
143 };
144 
145 template <>
146 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
147   static void enumeration(IO &IO,
148                           FormatStyle::NamespaceIndentationKind &Value) {
149     IO.enumCase(Value, "None", FormatStyle::NI_None);
150     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
151     IO.enumCase(Value, "All", FormatStyle::NI_All);
152   }
153 };
154 
155 template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
156   static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
157     IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
158     IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
159     IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
160 
161     // For backward compatibility.
162     IO.enumCase(Value, "true", FormatStyle::BAS_Align);
163     IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
164   }
165 };
166 
167 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
168   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
169     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
170     IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
171     IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
172 
173     // For backward compatibility.
174     IO.enumCase(Value, "true", FormatStyle::PAS_Left);
175     IO.enumCase(Value, "false", FormatStyle::PAS_Right);
176   }
177 };
178 
179 template <>
180 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
181   static void enumeration(IO &IO,
182                           FormatStyle::SpaceBeforeParensOptions &Value) {
183     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
184     IO.enumCase(Value, "ControlStatements",
185                 FormatStyle::SBPO_ControlStatements);
186     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
187 
188     // For backward compatibility.
189     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
190     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
191   }
192 };
193 
194 template <> struct MappingTraits<FormatStyle> {
195   static void mapping(IO &IO, FormatStyle &Style) {
196     // When reading, read the language first, we need it for getPredefinedStyle.
197     IO.mapOptional("Language", Style.Language);
198 
199     if (IO.outputting()) {
200       StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
201                                  "Mozilla", "WebKit", "GNU"};
202       ArrayRef<StringRef> Styles(StylesArray);
203       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
204         StringRef StyleName(Styles[i]);
205         FormatStyle PredefinedStyle;
206         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
207             Style == PredefinedStyle) {
208           IO.mapOptional("# BasedOnStyle", StyleName);
209           break;
210         }
211       }
212     } else {
213       StringRef BasedOnStyle;
214       IO.mapOptional("BasedOnStyle", BasedOnStyle);
215       if (!BasedOnStyle.empty()) {
216         FormatStyle::LanguageKind OldLanguage = Style.Language;
217         FormatStyle::LanguageKind Language =
218             ((FormatStyle *)IO.getContext())->Language;
219         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
220           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
221           return;
222         }
223         Style.Language = OldLanguage;
224       }
225     }
226 
227     // For backward compatibility.
228     if (!IO.outputting()) {
229       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
230       IO.mapOptional("IndentFunctionDeclarationAfterType",
231                      Style.IndentWrappedFunctionNames);
232       IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
233       IO.mapOptional("SpaceAfterControlStatementKeyword",
234                      Style.SpaceBeforeParens);
235     }
236 
237     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
238     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
239     IO.mapOptional("AlignConsecutiveAssignments",
240                    Style.AlignConsecutiveAssignments);
241     IO.mapOptional("AlignConsecutiveDeclarations",
242                    Style.AlignConsecutiveDeclarations);
243     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
244     IO.mapOptional("AlignOperands", Style.AlignOperands);
245     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
246     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
247                    Style.AllowAllParametersOfDeclarationOnNextLine);
248     IO.mapOptional("AllowShortBlocksOnASingleLine",
249                    Style.AllowShortBlocksOnASingleLine);
250     IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
251                    Style.AllowShortCaseLabelsOnASingleLine);
252     IO.mapOptional("AllowShortFunctionsOnASingleLine",
253                    Style.AllowShortFunctionsOnASingleLine);
254     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
255                    Style.AllowShortIfStatementsOnASingleLine);
256     IO.mapOptional("AllowShortLoopsOnASingleLine",
257                    Style.AllowShortLoopsOnASingleLine);
258     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
259                    Style.AlwaysBreakAfterDefinitionReturnType);
260     IO.mapOptional("AlwaysBreakAfterReturnType",
261                    Style.AlwaysBreakAfterReturnType);
262     // If AlwaysBreakAfterDefinitionReturnType was specified but
263     // AlwaysBreakAfterReturnType was not, initialize the latter from the
264     // former for backwards compatibility.
265     if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
266         Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
267       if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
268         Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
269       else if (Style.AlwaysBreakAfterDefinitionReturnType ==
270                FormatStyle::DRTBS_TopLevel)
271         Style.AlwaysBreakAfterReturnType =
272             FormatStyle::RTBS_TopLevelDefinitions;
273     }
274 
275     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
276                    Style.AlwaysBreakBeforeMultilineStrings);
277     IO.mapOptional("AlwaysBreakTemplateDeclarations",
278                    Style.AlwaysBreakTemplateDeclarations);
279     IO.mapOptional("BinPackArguments", Style.BinPackArguments);
280     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
281     IO.mapOptional("BraceWrapping", Style.BraceWrapping);
282     IO.mapOptional("BreakBeforeBinaryOperators",
283                    Style.BreakBeforeBinaryOperators);
284     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
285     IO.mapOptional("BreakBeforeTernaryOperators",
286                    Style.BreakBeforeTernaryOperators);
287     IO.mapOptional("BreakConstructorInitializersBeforeComma",
288                    Style.BreakConstructorInitializersBeforeComma);
289     IO.mapOptional("BreakAfterJavaFieldAnnotations",
290                    Style.BreakAfterJavaFieldAnnotations);
291     IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
292     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
293     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
294     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
295                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
296     IO.mapOptional("ConstructorInitializerIndentWidth",
297                    Style.ConstructorInitializerIndentWidth);
298     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
299     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
300     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
301     IO.mapOptional("DisableFormat", Style.DisableFormat);
302     IO.mapOptional("ExperimentalAutoDetectBinPacking",
303                    Style.ExperimentalAutoDetectBinPacking);
304     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
305     IO.mapOptional("IncludeCategories", Style.IncludeCategories);
306     IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
307     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
308     IO.mapOptional("IndentWidth", Style.IndentWidth);
309     IO.mapOptional("IndentWrappedFunctionNames",
310                    Style.IndentWrappedFunctionNames);
311     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
312                    Style.KeepEmptyLinesAtTheStartOfBlocks);
313     IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
314     IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
315     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
316     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
317     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
318     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
319     IO.mapOptional("ObjCSpaceBeforeProtocolList",
320                    Style.ObjCSpaceBeforeProtocolList);
321     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
322                    Style.PenaltyBreakBeforeFirstCallParameter);
323     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
324     IO.mapOptional("PenaltyBreakFirstLessLess",
325                    Style.PenaltyBreakFirstLessLess);
326     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
327     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
328     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
329                    Style.PenaltyReturnTypeOnItsOwnLine);
330     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
331     IO.mapOptional("ReflowComments", Style.ReflowComments);
332     IO.mapOptional("SortIncludes", Style.SortIncludes);
333     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
334     IO.mapOptional("SpaceBeforeAssignmentOperators",
335                    Style.SpaceBeforeAssignmentOperators);
336     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
337     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
338     IO.mapOptional("SpacesBeforeTrailingComments",
339                    Style.SpacesBeforeTrailingComments);
340     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
341     IO.mapOptional("SpacesInContainerLiterals",
342                    Style.SpacesInContainerLiterals);
343     IO.mapOptional("SpacesInCStyleCastParentheses",
344                    Style.SpacesInCStyleCastParentheses);
345     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
346     IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
347     IO.mapOptional("Standard", Style.Standard);
348     IO.mapOptional("TabWidth", Style.TabWidth);
349     IO.mapOptional("UseTab", Style.UseTab);
350     IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
351   }
352 };
353 
354 template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
355   static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
356     IO.mapOptional("AfterClass", Wrapping.AfterClass);
357     IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
358     IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
359     IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
360     IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
361     IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
362     IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
363     IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
364     IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
365     IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
366     IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
367   }
368 };
369 
370 template <> struct MappingTraits<FormatStyle::IncludeCategory> {
371   static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
372     IO.mapOptional("Regex", Category.Regex);
373     IO.mapOptional("Priority", Category.Priority);
374   }
375 };
376 
377 // Allows to read vector<FormatStyle> while keeping default values.
378 // IO.getContext() should contain a pointer to the FormatStyle structure, that
379 // will be used to get default values for missing keys.
380 // If the first element has no Language specified, it will be treated as the
381 // default one for the following elements.
382 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
383   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
384     return Seq.size();
385   }
386   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
387                               size_t Index) {
388     if (Index >= Seq.size()) {
389       assert(Index == Seq.size());
390       FormatStyle Template;
391       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
392         Template = Seq[0];
393       } else {
394         Template = *((const FormatStyle *)IO.getContext());
395         Template.Language = FormatStyle::LK_None;
396       }
397       Seq.resize(Index + 1, Template);
398     }
399     return Seq[Index];
400   }
401 };
402 } // namespace yaml
403 } // namespace llvm
404 
405 namespace clang {
406 namespace format {
407 
408 const std::error_category &getParseCategory() {
409   static ParseErrorCategory C;
410   return C;
411 }
412 std::error_code make_error_code(ParseError e) {
413   return std::error_code(static_cast<int>(e), getParseCategory());
414 }
415 
416 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
417   return "clang-format.parse_error";
418 }
419 
420 std::string ParseErrorCategory::message(int EV) const {
421   switch (static_cast<ParseError>(EV)) {
422   case ParseError::Success:
423     return "Success";
424   case ParseError::Error:
425     return "Invalid argument";
426   case ParseError::Unsuitable:
427     return "Unsuitable";
428   }
429   llvm_unreachable("unexpected parse error");
430 }
431 
432 static FormatStyle expandPresets(const FormatStyle &Style) {
433   if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
434     return Style;
435   FormatStyle Expanded = Style;
436   Expanded.BraceWrapping = {false, false, false, false, false, false,
437                             false, false, false, false, false};
438   switch (Style.BreakBeforeBraces) {
439   case FormatStyle::BS_Linux:
440     Expanded.BraceWrapping.AfterClass = true;
441     Expanded.BraceWrapping.AfterFunction = true;
442     Expanded.BraceWrapping.AfterNamespace = true;
443     break;
444   case FormatStyle::BS_Mozilla:
445     Expanded.BraceWrapping.AfterClass = true;
446     Expanded.BraceWrapping.AfterEnum = true;
447     Expanded.BraceWrapping.AfterFunction = true;
448     Expanded.BraceWrapping.AfterStruct = true;
449     Expanded.BraceWrapping.AfterUnion = true;
450     break;
451   case FormatStyle::BS_Stroustrup:
452     Expanded.BraceWrapping.AfterFunction = true;
453     Expanded.BraceWrapping.BeforeCatch = true;
454     Expanded.BraceWrapping.BeforeElse = true;
455     break;
456   case FormatStyle::BS_Allman:
457     Expanded.BraceWrapping.AfterClass = true;
458     Expanded.BraceWrapping.AfterControlStatement = true;
459     Expanded.BraceWrapping.AfterEnum = true;
460     Expanded.BraceWrapping.AfterFunction = true;
461     Expanded.BraceWrapping.AfterNamespace = true;
462     Expanded.BraceWrapping.AfterObjCDeclaration = true;
463     Expanded.BraceWrapping.AfterStruct = true;
464     Expanded.BraceWrapping.BeforeCatch = true;
465     Expanded.BraceWrapping.BeforeElse = true;
466     break;
467   case FormatStyle::BS_GNU:
468     Expanded.BraceWrapping = {true, true, true, true, true, true,
469                               true, true, true, true, true};
470     break;
471   case FormatStyle::BS_WebKit:
472     Expanded.BraceWrapping.AfterFunction = true;
473     break;
474   default:
475     break;
476   }
477   return Expanded;
478 }
479 
480 FormatStyle getLLVMStyle() {
481   FormatStyle LLVMStyle;
482   LLVMStyle.Language = FormatStyle::LK_Cpp;
483   LLVMStyle.AccessModifierOffset = -2;
484   LLVMStyle.AlignEscapedNewlinesLeft = false;
485   LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
486   LLVMStyle.AlignOperands = true;
487   LLVMStyle.AlignTrailingComments = true;
488   LLVMStyle.AlignConsecutiveAssignments = false;
489   LLVMStyle.AlignConsecutiveDeclarations = false;
490   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
491   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
492   LLVMStyle.AllowShortBlocksOnASingleLine = false;
493   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
494   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
495   LLVMStyle.AllowShortLoopsOnASingleLine = false;
496   LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
497   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
498   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
499   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
500   LLVMStyle.BinPackParameters = true;
501   LLVMStyle.BinPackArguments = true;
502   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
503   LLVMStyle.BreakBeforeTernaryOperators = true;
504   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
505   LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
506                              false, false, false, false, false};
507   LLVMStyle.BreakAfterJavaFieldAnnotations = false;
508   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
509   LLVMStyle.BreakStringLiterals = true;
510   LLVMStyle.ColumnLimit = 80;
511   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
512   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
513   LLVMStyle.ConstructorInitializerIndentWidth = 4;
514   LLVMStyle.ContinuationIndentWidth = 4;
515   LLVMStyle.Cpp11BracedListStyle = true;
516   LLVMStyle.DerivePointerAlignment = false;
517   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
518   LLVMStyle.ForEachMacros.push_back("foreach");
519   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
520   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
521   LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
522                                  {"^(<|\"(gtest|isl|json)/)", 3},
523                                  {".*", 1}};
524   LLVMStyle.IncludeIsMainRegex = "$";
525   LLVMStyle.IndentCaseLabels = false;
526   LLVMStyle.IndentWrappedFunctionNames = false;
527   LLVMStyle.IndentWidth = 2;
528   LLVMStyle.TabWidth = 8;
529   LLVMStyle.MaxEmptyLinesToKeep = 1;
530   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
531   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
532   LLVMStyle.ObjCBlockIndentWidth = 2;
533   LLVMStyle.ObjCSpaceAfterProperty = false;
534   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
535   LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
536   LLVMStyle.SpacesBeforeTrailingComments = 1;
537   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
538   LLVMStyle.UseTab = FormatStyle::UT_Never;
539   LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
540   LLVMStyle.ReflowComments = true;
541   LLVMStyle.SpacesInParentheses = false;
542   LLVMStyle.SpacesInSquareBrackets = false;
543   LLVMStyle.SpaceInEmptyParentheses = false;
544   LLVMStyle.SpacesInContainerLiterals = true;
545   LLVMStyle.SpacesInCStyleCastParentheses = false;
546   LLVMStyle.SpaceAfterCStyleCast = false;
547   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
548   LLVMStyle.SpaceBeforeAssignmentOperators = true;
549   LLVMStyle.SpacesInAngles = false;
550 
551   LLVMStyle.PenaltyBreakComment = 300;
552   LLVMStyle.PenaltyBreakFirstLessLess = 120;
553   LLVMStyle.PenaltyBreakString = 1000;
554   LLVMStyle.PenaltyExcessCharacter = 1000000;
555   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
556   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
557 
558   LLVMStyle.DisableFormat = false;
559   LLVMStyle.SortIncludes = true;
560 
561   return LLVMStyle;
562 }
563 
564 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
565   FormatStyle GoogleStyle = getLLVMStyle();
566   GoogleStyle.Language = Language;
567 
568   GoogleStyle.AccessModifierOffset = -1;
569   GoogleStyle.AlignEscapedNewlinesLeft = true;
570   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
571   GoogleStyle.AllowShortLoopsOnASingleLine = true;
572   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
573   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
574   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
575   GoogleStyle.DerivePointerAlignment = true;
576   GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
577   GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
578   GoogleStyle.IndentCaseLabels = true;
579   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
580   GoogleStyle.ObjCSpaceAfterProperty = false;
581   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
582   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
583   GoogleStyle.SpacesBeforeTrailingComments = 2;
584   GoogleStyle.Standard = FormatStyle::LS_Auto;
585 
586   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
587   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
588 
589   if (Language == FormatStyle::LK_Java) {
590     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
591     GoogleStyle.AlignOperands = false;
592     GoogleStyle.AlignTrailingComments = false;
593     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
594     GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
595     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
596     GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
597     GoogleStyle.ColumnLimit = 100;
598     GoogleStyle.SpaceAfterCStyleCast = true;
599     GoogleStyle.SpacesBeforeTrailingComments = 1;
600   } else if (Language == FormatStyle::LK_JavaScript) {
601     GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
602     GoogleStyle.AlignOperands = false;
603     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
604     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
605     GoogleStyle.BreakBeforeTernaryOperators = false;
606     GoogleStyle.CommentPragmas = "@(export|return|see|visibility) ";
607     GoogleStyle.MaxEmptyLinesToKeep = 3;
608     GoogleStyle.SpacesInContainerLiterals = false;
609     GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
610   } else if (Language == FormatStyle::LK_Proto) {
611     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
612     GoogleStyle.SpacesInContainerLiterals = false;
613   }
614 
615   return GoogleStyle;
616 }
617 
618 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
619   FormatStyle ChromiumStyle = getGoogleStyle(Language);
620   if (Language == FormatStyle::LK_Java) {
621     ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
622     ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
623     ChromiumStyle.ContinuationIndentWidth = 8;
624     ChromiumStyle.IndentWidth = 4;
625   } else {
626     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
627     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
628     ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
629     ChromiumStyle.AllowShortLoopsOnASingleLine = false;
630     ChromiumStyle.BinPackParameters = false;
631     ChromiumStyle.DerivePointerAlignment = false;
632   }
633   ChromiumStyle.SortIncludes = false;
634   return ChromiumStyle;
635 }
636 
637 FormatStyle getMozillaStyle() {
638   FormatStyle MozillaStyle = getLLVMStyle();
639   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
640   MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
641   MozillaStyle.AlwaysBreakAfterReturnType =
642       FormatStyle::RTBS_TopLevelDefinitions;
643   MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
644       FormatStyle::DRTBS_TopLevel;
645   MozillaStyle.AlwaysBreakTemplateDeclarations = true;
646   MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
647   MozillaStyle.BreakConstructorInitializersBeforeComma = true;
648   MozillaStyle.ConstructorInitializerIndentWidth = 2;
649   MozillaStyle.ContinuationIndentWidth = 2;
650   MozillaStyle.Cpp11BracedListStyle = false;
651   MozillaStyle.IndentCaseLabels = true;
652   MozillaStyle.ObjCSpaceAfterProperty = true;
653   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
654   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
655   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
656   return MozillaStyle;
657 }
658 
659 FormatStyle getWebKitStyle() {
660   FormatStyle Style = getLLVMStyle();
661   Style.AccessModifierOffset = -4;
662   Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
663   Style.AlignOperands = false;
664   Style.AlignTrailingComments = false;
665   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
666   Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
667   Style.BreakConstructorInitializersBeforeComma = true;
668   Style.Cpp11BracedListStyle = false;
669   Style.ColumnLimit = 0;
670   Style.IndentWidth = 4;
671   Style.NamespaceIndentation = FormatStyle::NI_Inner;
672   Style.ObjCBlockIndentWidth = 4;
673   Style.ObjCSpaceAfterProperty = true;
674   Style.PointerAlignment = FormatStyle::PAS_Left;
675   Style.Standard = FormatStyle::LS_Cpp03;
676   return Style;
677 }
678 
679 FormatStyle getGNUStyle() {
680   FormatStyle Style = getLLVMStyle();
681   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
682   Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
683   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
684   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
685   Style.BreakBeforeTernaryOperators = true;
686   Style.Cpp11BracedListStyle = false;
687   Style.ColumnLimit = 79;
688   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
689   Style.Standard = FormatStyle::LS_Cpp03;
690   return Style;
691 }
692 
693 FormatStyle getNoStyle() {
694   FormatStyle NoStyle = getLLVMStyle();
695   NoStyle.DisableFormat = true;
696   NoStyle.SortIncludes = false;
697   return NoStyle;
698 }
699 
700 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
701                         FormatStyle *Style) {
702   if (Name.equals_lower("llvm")) {
703     *Style = getLLVMStyle();
704   } else if (Name.equals_lower("chromium")) {
705     *Style = getChromiumStyle(Language);
706   } else if (Name.equals_lower("mozilla")) {
707     *Style = getMozillaStyle();
708   } else if (Name.equals_lower("google")) {
709     *Style = getGoogleStyle(Language);
710   } else if (Name.equals_lower("webkit")) {
711     *Style = getWebKitStyle();
712   } else if (Name.equals_lower("gnu")) {
713     *Style = getGNUStyle();
714   } else if (Name.equals_lower("none")) {
715     *Style = getNoStyle();
716   } else {
717     return false;
718   }
719 
720   Style->Language = Language;
721   return true;
722 }
723 
724 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
725   assert(Style);
726   FormatStyle::LanguageKind Language = Style->Language;
727   assert(Language != FormatStyle::LK_None);
728   if (Text.trim().empty())
729     return make_error_code(ParseError::Error);
730 
731   std::vector<FormatStyle> Styles;
732   llvm::yaml::Input Input(Text);
733   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
734   // values for the fields, keys for which are missing from the configuration.
735   // Mapping also uses the context to get the language to find the correct
736   // base style.
737   Input.setContext(Style);
738   Input >> Styles;
739   if (Input.error())
740     return Input.error();
741 
742   for (unsigned i = 0; i < Styles.size(); ++i) {
743     // Ensures that only the first configuration can skip the Language option.
744     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
745       return make_error_code(ParseError::Error);
746     // Ensure that each language is configured at most once.
747     for (unsigned j = 0; j < i; ++j) {
748       if (Styles[i].Language == Styles[j].Language) {
749         DEBUG(llvm::dbgs()
750               << "Duplicate languages in the config file on positions " << j
751               << " and " << i << "\n");
752         return make_error_code(ParseError::Error);
753       }
754     }
755   }
756   // Look for a suitable configuration starting from the end, so we can
757   // find the configuration for the specific language first, and the default
758   // configuration (which can only be at slot 0) after it.
759   for (int i = Styles.size() - 1; i >= 0; --i) {
760     if (Styles[i].Language == Language ||
761         Styles[i].Language == FormatStyle::LK_None) {
762       *Style = Styles[i];
763       Style->Language = Language;
764       return make_error_code(ParseError::Success);
765     }
766   }
767   return make_error_code(ParseError::Unsuitable);
768 }
769 
770 std::string configurationAsText(const FormatStyle &Style) {
771   std::string Text;
772   llvm::raw_string_ostream Stream(Text);
773   llvm::yaml::Output Output(Stream);
774   // We use the same mapping method for input and output, so we need a non-const
775   // reference here.
776   FormatStyle NonConstStyle = expandPresets(Style);
777   Output << NonConstStyle;
778   return Stream.str();
779 }
780 
781 namespace {
782 
783 class FormatTokenLexer {
784 public:
785   FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
786                    encoding::Encoding Encoding)
787       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
788         LessStashed(false), Column(0), TrailingWhitespace(0),
789         SourceMgr(SourceMgr), ID(ID), Style(Style),
790         IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
791         Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
792         MacroBlockBeginRegex(Style.MacroBlockBegin),
793         MacroBlockEndRegex(Style.MacroBlockEnd) {
794     Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
795                         getFormattingLangOpts(Style)));
796     Lex->SetKeepWhitespaceMode(true);
797 
798     for (const std::string &ForEachMacro : Style.ForEachMacros)
799       ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
800     std::sort(ForEachMacros.begin(), ForEachMacros.end());
801   }
802 
803   ArrayRef<FormatToken *> lex() {
804     assert(Tokens.empty());
805     assert(FirstInLineIndex == 0);
806     do {
807       Tokens.push_back(getNextToken());
808       if (Style.Language == FormatStyle::LK_JavaScript)
809         tryParseJSRegexLiteral();
810       tryMergePreviousTokens();
811       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
812         FirstInLineIndex = Tokens.size() - 1;
813     } while (Tokens.back()->Tok.isNot(tok::eof));
814     return Tokens;
815   }
816 
817   const AdditionalKeywords &getKeywords() { return Keywords; }
818 
819 private:
820   void tryMergePreviousTokens() {
821     if (tryMerge_TMacro())
822       return;
823     if (tryMergeConflictMarkers())
824       return;
825     if (tryMergeLessLess())
826       return;
827 
828     if (Style.Language == FormatStyle::LK_JavaScript) {
829       if (tryMergeTemplateString())
830         return;
831 
832       static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
833       static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
834                                                      tok::equal};
835       static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
836                                                     tok::greaterequal};
837       static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
838       // FIXME: Investigate what token type gives the correct operator priority.
839       if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
840         return;
841       if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
842         return;
843       if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
844         return;
845       if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
846         return;
847     }
848   }
849 
850   bool tryMergeLessLess() {
851     // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
852     if (Tokens.size() < 3)
853       return false;
854 
855     bool FourthTokenIsLess = false;
856     if (Tokens.size() > 3)
857       FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
858 
859     auto First = Tokens.end() - 3;
860     if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
861         First[0]->isNot(tok::less) || FourthTokenIsLess)
862       return false;
863 
864     // Only merge if there currently is no whitespace between the two "<".
865     if (First[1]->WhitespaceRange.getBegin() !=
866         First[1]->WhitespaceRange.getEnd())
867       return false;
868 
869     First[0]->Tok.setKind(tok::lessless);
870     First[0]->TokenText = "<<";
871     First[0]->ColumnWidth += 1;
872     Tokens.erase(Tokens.end() - 2);
873     return true;
874   }
875 
876   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
877     if (Tokens.size() < Kinds.size())
878       return false;
879 
880     SmallVectorImpl<FormatToken *>::const_iterator First =
881         Tokens.end() - Kinds.size();
882     if (!First[0]->is(Kinds[0]))
883       return false;
884     unsigned AddLength = 0;
885     for (unsigned i = 1; i < Kinds.size(); ++i) {
886       if (!First[i]->is(Kinds[i]) ||
887           First[i]->WhitespaceRange.getBegin() !=
888               First[i]->WhitespaceRange.getEnd())
889         return false;
890       AddLength += First[i]->TokenText.size();
891     }
892     Tokens.resize(Tokens.size() - Kinds.size() + 1);
893     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
894                                     First[0]->TokenText.size() + AddLength);
895     First[0]->ColumnWidth += AddLength;
896     First[0]->Type = NewType;
897     return true;
898   }
899 
900   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
901   bool precedesOperand(FormatToken *Tok) {
902     // NB: This is not entirely correct, as an r_paren can introduce an operand
903     // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
904     // corner case to not matter in practice, though.
905     return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
906                         tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
907                         tok::colon, tok::question, tok::tilde) ||
908            Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
909                         tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
910                         tok::kw_typeof, Keywords.kw_instanceof,
911                         Keywords.kw_in) ||
912            Tok->isBinaryOperator();
913   }
914 
915   bool canPrecedeRegexLiteral(FormatToken *Prev) {
916     if (!Prev)
917       return true;
918 
919     // Regex literals can only follow after prefix unary operators, not after
920     // postfix unary operators. If the '++' is followed by a non-operand
921     // introducing token, the slash here is the operand and not the start of a
922     // regex.
923     if (Prev->isOneOf(tok::plusplus, tok::minusminus))
924       return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
925 
926     // The previous token must introduce an operand location where regex
927     // literals can occur.
928     if (!precedesOperand(Prev))
929       return false;
930 
931     return true;
932   }
933 
934   // Tries to parse a JavaScript Regex literal starting at the current token,
935   // if that begins with a slash and is in a location where JavaScript allows
936   // regex literals. Changes the current token to a regex literal and updates
937   // its text if successful.
938   void tryParseJSRegexLiteral() {
939     FormatToken *RegexToken = Tokens.back();
940     if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
941       return;
942 
943     FormatToken *Prev = nullptr;
944     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
945       // NB: Because previous pointers are not initialized yet, this cannot use
946       // Token.getPreviousNonComment.
947       if ((*I)->isNot(tok::comment)) {
948         Prev = *I;
949         break;
950       }
951     }
952 
953     if (!canPrecedeRegexLiteral(Prev))
954       return;
955 
956     // 'Manually' lex ahead in the current file buffer.
957     const char *Offset = Lex->getBufferLocation();
958     const char *RegexBegin = Offset - RegexToken->TokenText.size();
959     StringRef Buffer = Lex->getBuffer();
960     bool InCharacterClass = false;
961     bool HaveClosingSlash = false;
962     for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
963       // Regular expressions are terminated with a '/', which can only be
964       // escaped using '\' or a character class between '[' and ']'.
965       // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
966       switch (*Offset) {
967       case '\\':
968         // Skip the escaped character.
969         ++Offset;
970         break;
971       case '[':
972         InCharacterClass = true;
973         break;
974       case ']':
975         InCharacterClass = false;
976         break;
977       case '/':
978         if (!InCharacterClass)
979           HaveClosingSlash = true;
980         break;
981       }
982     }
983 
984     RegexToken->Type = TT_RegexLiteral;
985     // Treat regex literals like other string_literals.
986     RegexToken->Tok.setKind(tok::string_literal);
987     RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
988     RegexToken->ColumnWidth = RegexToken->TokenText.size();
989 
990     resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
991   }
992 
993   bool tryMergeTemplateString() {
994     if (Tokens.size() < 2)
995       return false;
996 
997     FormatToken *EndBacktick = Tokens.back();
998     // Backticks get lexed as tok::unknown tokens. If a template string contains
999     // a comment start, it gets lexed as a tok::comment, or tok::unknown if
1000     // unterminated.
1001     if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
1002                               tok::char_constant, tok::unknown))
1003       return false;
1004     size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
1005     // Unknown token that's not actually a backtick, or a comment that doesn't
1006     // contain a backtick.
1007     if (CommentBacktickPos == StringRef::npos)
1008       return false;
1009 
1010     unsigned TokenCount = 0;
1011     bool IsMultiline = false;
1012     unsigned EndColumnInFirstLine =
1013         EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
1014     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
1015       ++TokenCount;
1016       if (I[0]->IsMultiline)
1017         IsMultiline = true;
1018 
1019       // If there was a preceding template string, this must be the start of a
1020       // template string, not the end.
1021       if (I[0]->is(TT_TemplateString))
1022         return false;
1023 
1024       if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
1025         // Keep track of the rhs offset of the last token to wrap across lines -
1026         // its the rhs offset of the first line of the template string, used to
1027         // determine its width.
1028         if (I[0]->IsMultiline)
1029           EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
1030         // If the token has newlines, the token before it (if it exists) is the
1031         // rhs end of the previous line.
1032         if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
1033           EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
1034           IsMultiline = true;
1035         }
1036         continue;
1037       }
1038 
1039       Tokens.resize(Tokens.size() - TokenCount);
1040       Tokens.back()->Type = TT_TemplateString;
1041       const char *EndOffset =
1042           EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
1043       if (CommentBacktickPos != 0) {
1044         // If the backtick was not the first character (e.g. in a comment),
1045         // re-lex after the backtick position.
1046         SourceLocation Loc = EndBacktick->Tok.getLocation();
1047         resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
1048       }
1049       Tokens.back()->TokenText =
1050           StringRef(Tokens.back()->TokenText.data(),
1051                     EndOffset - Tokens.back()->TokenText.data());
1052 
1053       unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
1054       if (EndOriginalColumn == 0) {
1055         SourceLocation Loc = EndBacktick->Tok.getLocation();
1056         EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
1057       }
1058       // If the ` is further down within the token (e.g. in a comment).
1059       EndOriginalColumn += CommentBacktickPos;
1060 
1061       if (IsMultiline) {
1062         // ColumnWidth is from backtick to last token in line.
1063         // LastLineColumnWidth is 0 to backtick.
1064         // x = `some content
1065         //     until here`;
1066         Tokens.back()->ColumnWidth =
1067             EndColumnInFirstLine - Tokens.back()->OriginalColumn;
1068         // +1 for the ` itself.
1069         Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
1070         Tokens.back()->IsMultiline = true;
1071       } else {
1072         // Token simply spans from start to end, +1 for the ` itself.
1073         Tokens.back()->ColumnWidth =
1074             EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
1075       }
1076       return true;
1077     }
1078     return false;
1079   }
1080 
1081   bool tryMerge_TMacro() {
1082     if (Tokens.size() < 4)
1083       return false;
1084     FormatToken *Last = Tokens.back();
1085     if (!Last->is(tok::r_paren))
1086       return false;
1087 
1088     FormatToken *String = Tokens[Tokens.size() - 2];
1089     if (!String->is(tok::string_literal) || String->IsMultiline)
1090       return false;
1091 
1092     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1093       return false;
1094 
1095     FormatToken *Macro = Tokens[Tokens.size() - 4];
1096     if (Macro->TokenText != "_T")
1097       return false;
1098 
1099     const char *Start = Macro->TokenText.data();
1100     const char *End = Last->TokenText.data() + Last->TokenText.size();
1101     String->TokenText = StringRef(Start, End - Start);
1102     String->IsFirst = Macro->IsFirst;
1103     String->LastNewlineOffset = Macro->LastNewlineOffset;
1104     String->WhitespaceRange = Macro->WhitespaceRange;
1105     String->OriginalColumn = Macro->OriginalColumn;
1106     String->ColumnWidth = encoding::columnWidthWithTabs(
1107         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1108     String->NewlinesBefore = Macro->NewlinesBefore;
1109     String->HasUnescapedNewline = Macro->HasUnescapedNewline;
1110 
1111     Tokens.pop_back();
1112     Tokens.pop_back();
1113     Tokens.pop_back();
1114     Tokens.back() = String;
1115     return true;
1116   }
1117 
1118   bool tryMergeConflictMarkers() {
1119     if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1120       return false;
1121 
1122     // Conflict lines look like:
1123     // <marker> <text from the vcs>
1124     // For example:
1125     // >>>>>>> /file/in/file/system at revision 1234
1126     //
1127     // We merge all tokens in a line that starts with a conflict marker
1128     // into a single token with a special token type that the unwrapped line
1129     // parser will use to correctly rebuild the underlying code.
1130 
1131     FileID ID;
1132     // Get the position of the first token in the line.
1133     unsigned FirstInLineOffset;
1134     std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
1135         Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1136     StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
1137     // Calculate the offset of the start of the current line.
1138     auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
1139     if (LineOffset == StringRef::npos) {
1140       LineOffset = 0;
1141     } else {
1142       ++LineOffset;
1143     }
1144 
1145     auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
1146     StringRef LineStart;
1147     if (FirstSpace == StringRef::npos) {
1148       LineStart = Buffer.substr(LineOffset);
1149     } else {
1150       LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1151     }
1152 
1153     TokenType Type = TT_Unknown;
1154     if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
1155       Type = TT_ConflictStart;
1156     } else if (LineStart == "|||||||" || LineStart == "=======" ||
1157                LineStart == "====") {
1158       Type = TT_ConflictAlternative;
1159     } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1160       Type = TT_ConflictEnd;
1161     }
1162 
1163     if (Type != TT_Unknown) {
1164       FormatToken *Next = Tokens.back();
1165 
1166       Tokens.resize(FirstInLineIndex + 1);
1167       // We do not need to build a complete token here, as we will skip it
1168       // during parsing anyway (as we must not touch whitespace around conflict
1169       // markers).
1170       Tokens.back()->Type = Type;
1171       Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1172 
1173       Tokens.push_back(Next);
1174       return true;
1175     }
1176 
1177     return false;
1178   }
1179 
1180   FormatToken *getStashedToken() {
1181     // Create a synthesized second '>' or '<' token.
1182     Token Tok = FormatTok->Tok;
1183     StringRef TokenText = FormatTok->TokenText;
1184 
1185     unsigned OriginalColumn = FormatTok->OriginalColumn;
1186     FormatTok = new (Allocator.Allocate()) FormatToken;
1187     FormatTok->Tok = Tok;
1188     SourceLocation TokLocation =
1189         FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
1190     FormatTok->Tok.setLocation(TokLocation);
1191     FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
1192     FormatTok->TokenText = TokenText;
1193     FormatTok->ColumnWidth = 1;
1194     FormatTok->OriginalColumn = OriginalColumn + 1;
1195 
1196     return FormatTok;
1197   }
1198 
1199   FormatToken *getNextToken() {
1200     if (GreaterStashed) {
1201       GreaterStashed = false;
1202       return getStashedToken();
1203     }
1204     if (LessStashed) {
1205       LessStashed = false;
1206       return getStashedToken();
1207     }
1208 
1209     FormatTok = new (Allocator.Allocate()) FormatToken;
1210     readRawToken(*FormatTok);
1211     SourceLocation WhitespaceStart =
1212         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1213     FormatTok->IsFirst = IsFirstToken;
1214     IsFirstToken = false;
1215 
1216     // Consume and record whitespace until we find a significant token.
1217     unsigned WhitespaceLength = TrailingWhitespace;
1218     while (FormatTok->Tok.is(tok::unknown)) {
1219       StringRef Text = FormatTok->TokenText;
1220       auto EscapesNewline = [&](int pos) {
1221         // A '\r' here is just part of '\r\n'. Skip it.
1222         if (pos >= 0 && Text[pos] == '\r')
1223           --pos;
1224         // See whether there is an odd number of '\' before this.
1225         unsigned count = 0;
1226         for (; pos >= 0; --pos, ++count)
1227           if (Text[pos] != '\\')
1228             break;
1229         return count & 1;
1230       };
1231       // FIXME: This miscounts tok:unknown tokens that are not just
1232       // whitespace, e.g. a '`' character.
1233       for (int i = 0, e = Text.size(); i != e; ++i) {
1234         switch (Text[i]) {
1235         case '\n':
1236           ++FormatTok->NewlinesBefore;
1237           FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
1238           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1239           Column = 0;
1240           break;
1241         case '\r':
1242           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1243           Column = 0;
1244           break;
1245         case '\f':
1246         case '\v':
1247           Column = 0;
1248           break;
1249         case ' ':
1250           ++Column;
1251           break;
1252         case '\t':
1253           Column += Style.TabWidth - Column % Style.TabWidth;
1254           break;
1255         case '\\':
1256           if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
1257             FormatTok->Type = TT_ImplicitStringLiteral;
1258           break;
1259         default:
1260           FormatTok->Type = TT_ImplicitStringLiteral;
1261           break;
1262         }
1263         if (FormatTok->Type == TT_ImplicitStringLiteral)
1264           break;
1265       }
1266 
1267       if (FormatTok->is(TT_ImplicitStringLiteral))
1268         break;
1269       WhitespaceLength += FormatTok->Tok.getLength();
1270 
1271       readRawToken(*FormatTok);
1272     }
1273 
1274     // In case the token starts with escaped newlines, we want to
1275     // take them into account as whitespace - this pattern is quite frequent
1276     // in macro definitions.
1277     // FIXME: Add a more explicit test.
1278     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1279            FormatTok->TokenText[1] == '\n') {
1280       ++FormatTok->NewlinesBefore;
1281       WhitespaceLength += 2;
1282       FormatTok->LastNewlineOffset = 2;
1283       Column = 0;
1284       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1285     }
1286 
1287     FormatTok->WhitespaceRange = SourceRange(
1288         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1289 
1290     FormatTok->OriginalColumn = Column;
1291 
1292     TrailingWhitespace = 0;
1293     if (FormatTok->Tok.is(tok::comment)) {
1294       // FIXME: Add the trimmed whitespace to Column.
1295       StringRef UntrimmedText = FormatTok->TokenText;
1296       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1297       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1298     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1299       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1300       FormatTok->Tok.setIdentifierInfo(&Info);
1301       FormatTok->Tok.setKind(Info.getTokenID());
1302       if (Style.Language == FormatStyle::LK_Java &&
1303           FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1304                              tok::kw_operator)) {
1305         FormatTok->Tok.setKind(tok::identifier);
1306         FormatTok->Tok.setIdentifierInfo(nullptr);
1307       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1308                  FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
1309                                     tok::kw_operator)) {
1310         FormatTok->Tok.setKind(tok::identifier);
1311         FormatTok->Tok.setIdentifierInfo(nullptr);
1312       }
1313     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1314       FormatTok->Tok.setKind(tok::greater);
1315       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1316       GreaterStashed = true;
1317     } else if (FormatTok->Tok.is(tok::lessless)) {
1318       FormatTok->Tok.setKind(tok::less);
1319       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1320       LessStashed = true;
1321     }
1322 
1323     // Now FormatTok is the next non-whitespace token.
1324 
1325     StringRef Text = FormatTok->TokenText;
1326     size_t FirstNewlinePos = Text.find('\n');
1327     if (FirstNewlinePos == StringRef::npos) {
1328       // FIXME: ColumnWidth actually depends on the start column, we need to
1329       // take this into account when the token is moved.
1330       FormatTok->ColumnWidth =
1331           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1332       Column += FormatTok->ColumnWidth;
1333     } else {
1334       FormatTok->IsMultiline = true;
1335       // FIXME: ColumnWidth actually depends on the start column, we need to
1336       // take this into account when the token is moved.
1337       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1338           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1339 
1340       // The last line of the token always starts in column 0.
1341       // Thus, the length can be precomputed even in the presence of tabs.
1342       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1343           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1344           Encoding);
1345       Column = FormatTok->LastLineColumnWidth;
1346     }
1347 
1348     if (Style.Language == FormatStyle::LK_Cpp) {
1349       if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1350             Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1351                 tok::pp_define) &&
1352           std::find(ForEachMacros.begin(), ForEachMacros.end(),
1353                     FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
1354         FormatTok->Type = TT_ForEachMacro;
1355       } else if (FormatTok->is(tok::identifier)) {
1356         if (MacroBlockBeginRegex.match(Text)) {
1357           FormatTok->Type = TT_MacroBlockBegin;
1358         } else if (MacroBlockEndRegex.match(Text)) {
1359           FormatTok->Type = TT_MacroBlockEnd;
1360         }
1361       }
1362     }
1363 
1364     return FormatTok;
1365   }
1366 
1367   FormatToken *FormatTok;
1368   bool IsFirstToken;
1369   bool GreaterStashed, LessStashed;
1370   unsigned Column;
1371   unsigned TrailingWhitespace;
1372   std::unique_ptr<Lexer> Lex;
1373   SourceManager &SourceMgr;
1374   FileID ID;
1375   FormatStyle &Style;
1376   IdentifierTable IdentTable;
1377   AdditionalKeywords Keywords;
1378   encoding::Encoding Encoding;
1379   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1380   // Index (in 'Tokens') of the last token that starts a new line.
1381   unsigned FirstInLineIndex;
1382   SmallVector<FormatToken *, 16> Tokens;
1383   SmallVector<IdentifierInfo *, 8> ForEachMacros;
1384 
1385   bool FormattingDisabled;
1386 
1387   llvm::Regex MacroBlockBeginRegex;
1388   llvm::Regex MacroBlockEndRegex;
1389 
1390   void readRawToken(FormatToken &Tok) {
1391     Lex->LexFromRawLexer(Tok.Tok);
1392     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1393                               Tok.Tok.getLength());
1394     // For formatting, treat unterminated string literals like normal string
1395     // literals.
1396     if (Tok.is(tok::unknown)) {
1397       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1398         Tok.Tok.setKind(tok::string_literal);
1399         Tok.IsUnterminatedLiteral = true;
1400       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1401                  Tok.TokenText == "''") {
1402         Tok.Tok.setKind(tok::string_literal);
1403       }
1404     }
1405 
1406     if (Style.Language == FormatStyle::LK_JavaScript &&
1407         Tok.is(tok::char_constant)) {
1408       Tok.Tok.setKind(tok::string_literal);
1409     }
1410 
1411     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1412                                  Tok.TokenText == "/* clang-format on */")) {
1413       FormattingDisabled = false;
1414     }
1415 
1416     Tok.Finalized = FormattingDisabled;
1417 
1418     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1419                                  Tok.TokenText == "/* clang-format off */")) {
1420       FormattingDisabled = true;
1421     }
1422   }
1423 
1424   void resetLexer(unsigned Offset) {
1425     StringRef Buffer = SourceMgr.getBufferData(ID);
1426     Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1427                         getFormattingLangOpts(Style), Buffer.begin(),
1428                         Buffer.begin() + Offset, Buffer.end()));
1429     Lex->SetKeepWhitespaceMode(true);
1430     TrailingWhitespace = 0;
1431   }
1432 };
1433 
1434 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1435   switch (Language) {
1436   case FormatStyle::LK_Cpp:
1437     return "C++";
1438   case FormatStyle::LK_Java:
1439     return "Java";
1440   case FormatStyle::LK_JavaScript:
1441     return "JavaScript";
1442   case FormatStyle::LK_Proto:
1443     return "Proto";
1444   default:
1445     return "Unknown";
1446   }
1447 }
1448 
1449 class Formatter : public UnwrappedLineConsumer {
1450 public:
1451   Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
1452             ArrayRef<CharSourceRange> Ranges)
1453       : Style(Style), ID(ID), SourceMgr(SourceMgr),
1454         Whitespaces(SourceMgr, Style,
1455                     inputUsesCRLF(SourceMgr.getBufferData(ID))),
1456         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1457         Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
1458     DEBUG(llvm::dbgs() << "File encoding: "
1459                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1460                                                                : "unknown")
1461                        << "\n");
1462     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1463                        << "\n");
1464   }
1465 
1466   tooling::Replacements format(bool *IncompleteFormat) {
1467     tooling::Replacements Result;
1468     FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
1469 
1470     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1471                                *this);
1472     Parser.parse();
1473     assert(UnwrappedLines.rbegin()->empty());
1474     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1475          ++Run) {
1476       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1477       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1478       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1479         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1480       }
1481       tooling::Replacements RunResult =
1482           format(AnnotatedLines, Tokens, Result, IncompleteFormat);
1483       DEBUG({
1484         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1485         for (tooling::Replacements::iterator I = RunResult.begin(),
1486                                              E = RunResult.end();
1487              I != E; ++I) {
1488           llvm::dbgs() << I->toString() << "\n";
1489         }
1490       });
1491       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1492         delete AnnotatedLines[i];
1493       }
1494       Result.insert(RunResult.begin(), RunResult.end());
1495       Whitespaces.reset();
1496     }
1497     return Result;
1498   }
1499 
1500   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1501                                FormatTokenLexer &Tokens,
1502                                tooling::Replacements &Result,
1503                                bool *IncompleteFormat) {
1504     TokenAnnotator Annotator(Style, Tokens.getKeywords());
1505     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1506       Annotator.annotate(*AnnotatedLines[i]);
1507     }
1508     deriveLocalStyle(AnnotatedLines);
1509     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1510     if (Style.Language == FormatStyle::LK_JavaScript &&
1511         Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
1512       requoteJSStringLiteral(AnnotatedLines, Result);
1513 
1514     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1515       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1516     }
1517 
1518     Annotator.setCommentLineLevels(AnnotatedLines);
1519     ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
1520                                   Whitespaces, Encoding,
1521                                   BinPackInconclusiveFunctions);
1522     UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
1523                            IncompleteFormat)
1524         .format(AnnotatedLines);
1525     return Whitespaces.generateReplacements();
1526   }
1527 
1528 private:
1529   // Determines which lines are affected by the SourceRanges given as input.
1530   // Returns \c true if at least one line between I and E or one of their
1531   // children is affected.
1532   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1533                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1534     bool SomeLineAffected = false;
1535     const AnnotatedLine *PreviousLine = nullptr;
1536     while (I != E) {
1537       AnnotatedLine *Line = *I;
1538       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1539 
1540       // If a line is part of a preprocessor directive, it needs to be formatted
1541       // if any token within the directive is affected.
1542       if (Line->InPPDirective) {
1543         FormatToken *Last = Line->Last;
1544         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1545         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1546           Last = (*PPEnd)->Last;
1547           ++PPEnd;
1548         }
1549 
1550         if (affectsTokenRange(*Line->First, *Last,
1551                               /*IncludeLeadingNewlines=*/false)) {
1552           SomeLineAffected = true;
1553           markAllAsAffected(I, PPEnd);
1554         }
1555         I = PPEnd;
1556         continue;
1557       }
1558 
1559       if (nonPPLineAffected(Line, PreviousLine))
1560         SomeLineAffected = true;
1561 
1562       PreviousLine = Line;
1563       ++I;
1564     }
1565     return SomeLineAffected;
1566   }
1567 
1568   // If the last token is a double/single-quoted string literal, generates a
1569   // replacement with a single/double quoted string literal, re-escaping the
1570   // contents in the process.
1571   void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
1572                                  tooling::Replacements &Result) {
1573     for (AnnotatedLine *Line : Lines) {
1574       requoteJSStringLiteral(Line->Children, Result);
1575       if (!Line->Affected)
1576         continue;
1577       for (FormatToken *FormatTok = Line->First; FormatTok;
1578            FormatTok = FormatTok->Next) {
1579         StringRef Input = FormatTok->TokenText;
1580         if (!FormatTok->isStringLiteral() ||
1581             // NB: testing for not starting with a double quote to avoid
1582             // breaking
1583             // `template strings`.
1584             (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
1585              !Input.startswith("\"")) ||
1586             (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
1587              !Input.startswith("\'")))
1588           continue;
1589 
1590         // Change start and end quote.
1591         bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
1592         SourceLocation Start = FormatTok->Tok.getLocation();
1593         auto Replace = [&](SourceLocation Start, unsigned Length,
1594                            StringRef ReplacementText) {
1595           Result.insert(
1596               tooling::Replacement(SourceMgr, Start, Length, ReplacementText));
1597         };
1598         Replace(Start, 1, IsSingle ? "'" : "\"");
1599         Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
1600                 IsSingle ? "'" : "\"");
1601 
1602         // Escape internal quotes.
1603         size_t ColumnWidth = FormatTok->TokenText.size();
1604         bool Escaped = false;
1605         for (size_t i = 1; i < Input.size() - 1; i++) {
1606           switch (Input[i]) {
1607             case '\\':
1608               if (!Escaped && i + 1 < Input.size() &&
1609                   ((IsSingle && Input[i + 1] == '"') ||
1610                    (!IsSingle && Input[i + 1] == '\''))) {
1611                 // Remove this \, it's escaping a " or ' that no longer needs
1612                 // escaping
1613                 ColumnWidth--;
1614                 Replace(Start.getLocWithOffset(i), 1, "");
1615                 continue;
1616               }
1617               Escaped = !Escaped;
1618               break;
1619             case '\"':
1620             case '\'':
1621               if (!Escaped && IsSingle == (Input[i] == '\'')) {
1622                 // Escape the quote.
1623                 Replace(Start.getLocWithOffset(i), 0, "\\");
1624                 ColumnWidth++;
1625               }
1626               Escaped = false;
1627               break;
1628             default:
1629               Escaped = false;
1630               break;
1631           }
1632         }
1633 
1634         // For formatting, count the number of non-escaped single quotes in them
1635         // and adjust ColumnWidth to take the added escapes into account.
1636         // FIXME(martinprobst): this might conflict with code breaking a long string
1637         // literal (which clang-format doesn't do, yet). For that to work, this code
1638         // would have to modify TokenText directly.
1639         FormatTok->ColumnWidth = ColumnWidth;
1640       }
1641     }
1642   }
1643 
1644 
1645   // Determines whether 'Line' is affected by the SourceRanges given as input.
1646   // Returns \c true if line or one if its children is affected.
1647   bool nonPPLineAffected(AnnotatedLine *Line,
1648                          const AnnotatedLine *PreviousLine) {
1649     bool SomeLineAffected = false;
1650     Line->ChildrenAffected =
1651         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1652     if (Line->ChildrenAffected)
1653       SomeLineAffected = true;
1654 
1655     // Stores whether one of the line's tokens is directly affected.
1656     bool SomeTokenAffected = false;
1657     // Stores whether we need to look at the leading newlines of the next token
1658     // in order to determine whether it was affected.
1659     bool IncludeLeadingNewlines = false;
1660 
1661     // Stores whether the first child line of any of this line's tokens is
1662     // affected.
1663     bool SomeFirstChildAffected = false;
1664 
1665     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1666       // Determine whether 'Tok' was affected.
1667       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1668         SomeTokenAffected = true;
1669 
1670       // Determine whether the first child of 'Tok' was affected.
1671       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1672         SomeFirstChildAffected = true;
1673 
1674       IncludeLeadingNewlines = Tok->Children.empty();
1675     }
1676 
1677     // Was this line moved, i.e. has it previously been on the same line as an
1678     // affected line?
1679     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1680                      Line->First->NewlinesBefore == 0;
1681 
1682     bool IsContinuedComment =
1683         Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1684         Line->First->NewlinesBefore < 2 && PreviousLine &&
1685         PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1686 
1687     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1688         IsContinuedComment) {
1689       Line->Affected = true;
1690       SomeLineAffected = true;
1691     }
1692     return SomeLineAffected;
1693   }
1694 
1695   // Marks all lines between I and E as well as all their children as affected.
1696   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1697                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1698     while (I != E) {
1699       (*I)->Affected = true;
1700       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1701       ++I;
1702     }
1703   }
1704 
1705   // Returns true if the range from 'First' to 'Last' intersects with one of the
1706   // input ranges.
1707   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1708                          bool IncludeLeadingNewlines) {
1709     SourceLocation Start = First.WhitespaceRange.getBegin();
1710     if (!IncludeLeadingNewlines)
1711       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1712     SourceLocation End = Last.getStartOfNonWhitespace();
1713     End = End.getLocWithOffset(Last.TokenText.size());
1714     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1715     return affectsCharSourceRange(Range);
1716   }
1717 
1718   // Returns true if one of the input ranges intersect the leading empty lines
1719   // before 'Tok'.
1720   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1721     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1722         Tok.WhitespaceRange.getBegin(),
1723         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1724     return affectsCharSourceRange(EmptyLineRange);
1725   }
1726 
1727   // Returns true if 'Range' intersects with one of the input ranges.
1728   bool affectsCharSourceRange(const CharSourceRange &Range) {
1729     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1730                                                           E = Ranges.end();
1731          I != E; ++I) {
1732       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1733           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1734         return true;
1735     }
1736     return false;
1737   }
1738 
1739   static bool inputUsesCRLF(StringRef Text) {
1740     return Text.count('\r') * 2 > Text.count('\n');
1741   }
1742 
1743   bool
1744   hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1745     for (const AnnotatedLine* Line : Lines) {
1746       if (hasCpp03IncompatibleFormat(Line->Children))
1747         return true;
1748       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
1749         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1750           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1751             return true;
1752           if (Tok->is(TT_TemplateCloser) &&
1753               Tok->Previous->is(TT_TemplateCloser))
1754             return true;
1755         }
1756       }
1757     }
1758     return false;
1759   }
1760 
1761   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
1762     int AlignmentDiff = 0;
1763     for (const AnnotatedLine* Line : Lines) {
1764       AlignmentDiff += countVariableAlignments(Line->Children);
1765       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
1766         if (!Tok->is(TT_PointerOrReference))
1767           continue;
1768         bool SpaceBefore =
1769             Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1770         bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
1771                           Tok->Next->WhitespaceRange.getEnd();
1772         if (SpaceBefore && !SpaceAfter)
1773           ++AlignmentDiff;
1774         if (!SpaceBefore && SpaceAfter)
1775           --AlignmentDiff;
1776       }
1777     }
1778     return AlignmentDiff;
1779   }
1780 
1781   void
1782   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1783     bool HasBinPackedFunction = false;
1784     bool HasOnePerLineFunction = false;
1785     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1786       if (!AnnotatedLines[i]->First->Next)
1787         continue;
1788       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1789       while (Tok->Next) {
1790         if (Tok->PackingKind == PPK_BinPacked)
1791           HasBinPackedFunction = true;
1792         if (Tok->PackingKind == PPK_OnePerLine)
1793           HasOnePerLineFunction = true;
1794 
1795         Tok = Tok->Next;
1796       }
1797     }
1798     if (Style.DerivePointerAlignment)
1799       Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
1800                                    ? FormatStyle::PAS_Left
1801                                    : FormatStyle::PAS_Right;
1802     if (Style.Standard == FormatStyle::LS_Auto)
1803       Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
1804                            ? FormatStyle::LS_Cpp11
1805                            : FormatStyle::LS_Cpp03;
1806     BinPackInconclusiveFunctions =
1807         HasBinPackedFunction || !HasOnePerLineFunction;
1808   }
1809 
1810   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1811     assert(!UnwrappedLines.empty());
1812     UnwrappedLines.back().push_back(TheLine);
1813   }
1814 
1815   void finishRun() override {
1816     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1817   }
1818 
1819   FormatStyle Style;
1820   FileID ID;
1821   SourceManager &SourceMgr;
1822   WhitespaceManager Whitespaces;
1823   SmallVector<CharSourceRange, 8> Ranges;
1824   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1825 
1826   encoding::Encoding Encoding;
1827   bool BinPackInconclusiveFunctions;
1828 };
1829 
1830 struct IncludeDirective {
1831   StringRef Filename;
1832   StringRef Text;
1833   unsigned Offset;
1834   int Category;
1835 };
1836 
1837 } // end anonymous namespace
1838 
1839 // Determines whether 'Ranges' intersects with ('Start', 'End').
1840 static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
1841                          unsigned End) {
1842   for (auto Range : Ranges) {
1843     if (Range.getOffset() < End &&
1844         Range.getOffset() + Range.getLength() > Start)
1845       return true;
1846   }
1847   return false;
1848 }
1849 
1850 // Sorts a block of includes given by 'Includes' alphabetically adding the
1851 // necessary replacement to 'Replaces'. 'Includes' must be in strict source
1852 // order.
1853 static void sortIncludes(const FormatStyle &Style,
1854                          const SmallVectorImpl<IncludeDirective> &Includes,
1855                          ArrayRef<tooling::Range> Ranges, StringRef FileName,
1856                          tooling::Replacements &Replaces, unsigned *Cursor) {
1857   if (!affectsRange(Ranges, Includes.front().Offset,
1858                     Includes.back().Offset + Includes.back().Text.size()))
1859     return;
1860   SmallVector<unsigned, 16> Indices;
1861   for (unsigned i = 0, e = Includes.size(); i != e; ++i)
1862     Indices.push_back(i);
1863   std::stable_sort(
1864       Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
1865         return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
1866                std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
1867       });
1868 
1869   // If the #includes are out of order, we generate a single replacement fixing
1870   // the entire block. Otherwise, no replacement is generated.
1871   bool OutOfOrder = false;
1872   for (unsigned i = 1, e = Indices.size(); i != e; ++i) {
1873     if (Indices[i] != i) {
1874       OutOfOrder = true;
1875       break;
1876     }
1877   }
1878   if (!OutOfOrder)
1879     return;
1880 
1881   std::string result;
1882   bool CursorMoved = false;
1883   for (unsigned Index : Indices) {
1884     if (!result.empty())
1885       result += "\n";
1886     result += Includes[Index].Text;
1887 
1888     if (Cursor && !CursorMoved) {
1889       unsigned Start = Includes[Index].Offset;
1890       unsigned End = Start + Includes[Index].Text.size();
1891       if (*Cursor >= Start && *Cursor < End) {
1892         *Cursor = Includes.front().Offset + result.size() + *Cursor - End;
1893         CursorMoved = true;
1894       }
1895     }
1896   }
1897 
1898   // Sorting #includes shouldn't change their total number of characters.
1899   // This would otherwise mess up 'Ranges'.
1900   assert(result.size() ==
1901          Includes.back().Offset + Includes.back().Text.size() -
1902              Includes.front().Offset);
1903 
1904   Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset,
1905                                        result.size(), result));
1906 }
1907 
1908 tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
1909                                    ArrayRef<tooling::Range> Ranges,
1910                                    StringRef FileName, unsigned *Cursor) {
1911   tooling::Replacements Replaces;
1912   if (!Style.SortIncludes)
1913     return Replaces;
1914 
1915   unsigned Prev = 0;
1916   unsigned SearchFrom = 0;
1917   llvm::Regex IncludeRegex(
1918       R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
1919   SmallVector<StringRef, 4> Matches;
1920   SmallVector<IncludeDirective, 16> IncludesInBlock;
1921 
1922   // In compiled files, consider the first #include to be the main #include of
1923   // the file if it is not a system #include. This ensures that the header
1924   // doesn't have hidden dependencies
1925   // (http://llvm.org/docs/CodingStandards.html#include-style).
1926   //
1927   // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
1928   // cases where the first #include is unlikely to be the main header.
1929   bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
1930                   FileName.endswith(".cpp") || FileName.endswith(".c++") ||
1931                   FileName.endswith(".cxx") || FileName.endswith(".m") ||
1932                   FileName.endswith(".mm");
1933   StringRef FileStem = llvm::sys::path::stem(FileName);
1934   bool FirstIncludeBlock = true;
1935   bool MainIncludeFound = false;
1936 
1937   // Create pre-compiled regular expressions for the #include categories.
1938   SmallVector<llvm::Regex, 4> CategoryRegexs;
1939   for (const auto &Category : Style.IncludeCategories)
1940     CategoryRegexs.emplace_back(Category.Regex);
1941 
1942   bool FormattingOff = false;
1943 
1944   for (;;) {
1945     auto Pos = Code.find('\n', SearchFrom);
1946     StringRef Line =
1947         Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
1948 
1949     StringRef Trimmed = Line.trim();
1950     if (Trimmed == "// clang-format off")
1951       FormattingOff = true;
1952     else if (Trimmed == "// clang-format on")
1953       FormattingOff = false;
1954 
1955     if (!FormattingOff && !Line.endswith("\\")) {
1956       if (IncludeRegex.match(Line, &Matches)) {
1957         StringRef IncludeName = Matches[2];
1958         int Category = INT_MAX;
1959         for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
1960           if (CategoryRegexs[i].match(IncludeName)) {
1961             Category = Style.IncludeCategories[i].Priority;
1962             break;
1963           }
1964         }
1965         if (IsSource && !MainIncludeFound && Category > 0 &&
1966             FirstIncludeBlock && IncludeName.startswith("\"")) {
1967           StringRef HeaderStem =
1968               llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
1969           if (FileStem.startswith(HeaderStem)) {
1970             llvm::Regex MainIncludeRegex(
1971                 (HeaderStem + Style.IncludeIsMainRegex).str());
1972             if (MainIncludeRegex.match(FileStem)) {
1973               Category = 0;
1974               MainIncludeFound = true;
1975             }
1976           }
1977         }
1978         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
1979       } else if (!IncludesInBlock.empty()) {
1980         sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
1981                      Cursor);
1982         IncludesInBlock.clear();
1983         FirstIncludeBlock = false;
1984       }
1985       Prev = Pos + 1;
1986     }
1987     if (Pos == StringRef::npos || Pos + 1 == Code.size())
1988       break;
1989     SearchFrom = Pos + 1;
1990   }
1991   if (!IncludesInBlock.empty())
1992     sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
1993   return Replaces;
1994 }
1995 
1996 tooling::Replacements formatReplacements(StringRef Code,
1997                                          const tooling::Replacements &Replaces,
1998                                          const FormatStyle &Style) {
1999   if (Replaces.empty())
2000     return tooling::Replacements();
2001 
2002   std::string NewCode = applyAllReplacements(Code, Replaces);
2003   std::vector<tooling::Range> ChangedRanges =
2004       tooling::calculateChangedRanges(Replaces);
2005   StringRef FileName = Replaces.begin()->getFilePath();
2006   tooling::Replacements FormatReplaces =
2007       reformat(Style, NewCode, ChangedRanges, FileName);
2008 
2009   tooling::Replacements MergedReplacements =
2010       mergeReplacements(Replaces, FormatReplaces);
2011 
2012   return MergedReplacements;
2013 }
2014 
2015 tooling::Replacements reformat(const FormatStyle &Style,
2016                                SourceManager &SourceMgr, FileID ID,
2017                                ArrayRef<CharSourceRange> Ranges,
2018                                bool *IncompleteFormat) {
2019   FormatStyle Expanded = expandPresets(Style);
2020   if (Expanded.DisableFormat)
2021     return tooling::Replacements();
2022   Formatter formatter(Expanded, SourceMgr, ID, Ranges);
2023   return formatter.format(IncompleteFormat);
2024 }
2025 
2026 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
2027                                ArrayRef<tooling::Range> Ranges,
2028                                StringRef FileName, bool *IncompleteFormat) {
2029   if (Style.DisableFormat)
2030     return tooling::Replacements();
2031 
2032   IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
2033       new vfs::InMemoryFileSystem);
2034   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
2035   DiagnosticsEngine Diagnostics(
2036       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
2037       new DiagnosticOptions);
2038   SourceManager SourceMgr(Diagnostics, Files);
2039   InMemoryFileSystem->addFile(
2040       FileName, 0, llvm::MemoryBuffer::getMemBuffer(
2041                        Code, FileName, /*RequiresNullTerminator=*/false));
2042   FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
2043                                      clang::SrcMgr::C_User);
2044   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
2045   std::vector<CharSourceRange> CharRanges;
2046   for (const tooling::Range &Range : Ranges) {
2047     SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
2048     SourceLocation End = Start.getLocWithOffset(Range.getLength());
2049     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
2050   }
2051   return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
2052 }
2053 
2054 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
2055   LangOptions LangOpts;
2056   LangOpts.CPlusPlus = 1;
2057   LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2058   LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
2059   LangOpts.LineComment = 1;
2060   bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
2061   LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
2062   LangOpts.Bool = 1;
2063   LangOpts.ObjC1 = 1;
2064   LangOpts.ObjC2 = 1;
2065   LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
2066   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
2067   return LangOpts;
2068 }
2069 
2070 const char *StyleOptionHelpDescription =
2071     "Coding style, currently supports:\n"
2072     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
2073     "Use -style=file to load style configuration from\n"
2074     ".clang-format file located in one of the parent\n"
2075     "directories of the source file (or current\n"
2076     "directory for stdin).\n"
2077     "Use -style=\"{key: value, ...}\" to set specific\n"
2078     "parameters, e.g.:\n"
2079     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
2080 
2081 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
2082   if (FileName.endswith(".java"))
2083     return FormatStyle::LK_Java;
2084   if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
2085     return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
2086   if (FileName.endswith_lower(".proto") ||
2087       FileName.endswith_lower(".protodevel"))
2088     return FormatStyle::LK_Proto;
2089   if (FileName.endswith_lower(".td"))
2090     return FormatStyle::LK_TableGen;
2091   return FormatStyle::LK_Cpp;
2092 }
2093 
2094 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
2095                      StringRef FallbackStyle, vfs::FileSystem *FS) {
2096   if (!FS) {
2097     FS = vfs::getRealFileSystem().get();
2098   }
2099   FormatStyle Style = getLLVMStyle();
2100   Style.Language = getLanguageByFileName(FileName);
2101   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
2102     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
2103                  << "\" using LLVM style\n";
2104     return Style;
2105   }
2106 
2107   if (StyleName.startswith("{")) {
2108     // Parse YAML/JSON style from the command line.
2109     if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
2110       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
2111                    << FallbackStyle << " style\n";
2112     }
2113     return Style;
2114   }
2115 
2116   if (!StyleName.equals_lower("file")) {
2117     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
2118       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
2119                    << " style\n";
2120     return Style;
2121   }
2122 
2123   // Look for .clang-format/_clang-format file in the file's parent directories.
2124   SmallString<128> UnsuitableConfigFiles;
2125   SmallString<128> Path(FileName);
2126   llvm::sys::fs::make_absolute(Path);
2127   for (StringRef Directory = Path; !Directory.empty();
2128        Directory = llvm::sys::path::parent_path(Directory)) {
2129 
2130     auto Status = FS->status(Directory);
2131     if (!Status ||
2132         Status->getType() != llvm::sys::fs::file_type::directory_file) {
2133       continue;
2134     }
2135 
2136     SmallString<128> ConfigFile(Directory);
2137 
2138     llvm::sys::path::append(ConfigFile, ".clang-format");
2139     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2140 
2141     Status = FS->status(ConfigFile.str());
2142     bool IsFile =
2143         Status && (Status->getType() == llvm::sys::fs::file_type::regular_file);
2144     if (!IsFile) {
2145       // Try _clang-format too, since dotfiles are not commonly used on Windows.
2146       ConfigFile = Directory;
2147       llvm::sys::path::append(ConfigFile, "_clang-format");
2148       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
2149       Status = FS->status(ConfigFile.str());
2150       IsFile = Status &&
2151                (Status->getType() == llvm::sys::fs::file_type::regular_file);
2152     }
2153 
2154     if (IsFile) {
2155       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
2156           FS->getBufferForFile(ConfigFile.str());
2157       if (std::error_code EC = Text.getError()) {
2158         llvm::errs() << EC.message() << "\n";
2159         break;
2160       }
2161       if (std::error_code ec =
2162               parseConfiguration(Text.get()->getBuffer(), &Style)) {
2163         if (ec == ParseError::Unsuitable) {
2164           if (!UnsuitableConfigFiles.empty())
2165             UnsuitableConfigFiles.append(", ");
2166           UnsuitableConfigFiles.append(ConfigFile);
2167           continue;
2168         }
2169         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
2170                      << "\n";
2171         break;
2172       }
2173       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
2174       return Style;
2175     }
2176   }
2177   if (!UnsuitableConfigFiles.empty()) {
2178     llvm::errs() << "Configuration file(s) do(es) not support "
2179                  << getLanguageName(Style.Language) << ": "
2180                  << UnsuitableConfigFiles << "\n";
2181   }
2182   return Style;
2183 }
2184 
2185 } // namespace format
2186 } // namespace clang
2187