1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "ContinuationIndenter.h"
17 #include "TokenAnnotator.h"
18 #include "UnwrappedLineFormatter.h"
19 #include "UnwrappedLineParser.h"
20 #include "WhitespaceManager.h"
21 #include "clang/Basic/Diagnostic.h"
22 #include "clang/Basic/DiagnosticOptions.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
33 
34 #define DEBUG_TYPE "format-formatter"
35 
36 using clang::format::FormatStyle;
37 
38 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
39 
40 namespace llvm {
41 namespace yaml {
42 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
43   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
44     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
45     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
46     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
47     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
48   }
49 };
50 
51 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
52   static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
53     IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
54     IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
55     IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
56     IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
57     IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
58   }
59 };
60 
61 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
62   static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
63     IO.enumCase(Value, "Never", FormatStyle::UT_Never);
64     IO.enumCase(Value, "false", FormatStyle::UT_Never);
65     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
66     IO.enumCase(Value, "true", FormatStyle::UT_Always);
67     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
68   }
69 };
70 
71 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
72   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
73     IO.enumCase(Value, "None", FormatStyle::SFS_None);
74     IO.enumCase(Value, "false", FormatStyle::SFS_None);
75     IO.enumCase(Value, "All", FormatStyle::SFS_All);
76     IO.enumCase(Value, "true", FormatStyle::SFS_All);
77     IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
78     IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
79   }
80 };
81 
82 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
83   static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
84     IO.enumCase(Value, "All", FormatStyle::BOS_All);
85     IO.enumCase(Value, "true", FormatStyle::BOS_All);
86     IO.enumCase(Value, "None", FormatStyle::BOS_None);
87     IO.enumCase(Value, "false", FormatStyle::BOS_None);
88     IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
89   }
90 };
91 
92 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
93   static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
94     IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
95     IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
96     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
97     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
98     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
99   }
100 };
101 
102 template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
103   static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
104     IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
105     IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
106     IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
107 
108     // For backward compatibility.
109     IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
110     IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
111   }
112 };
113 
114 template <>
115 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
116   static void enumeration(IO &IO,
117                           FormatStyle::NamespaceIndentationKind &Value) {
118     IO.enumCase(Value, "None", FormatStyle::NI_None);
119     IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
120     IO.enumCase(Value, "All", FormatStyle::NI_All);
121   }
122 };
123 
124 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
125   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
126     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
127     IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
128     IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
129 
130     // For backward compatibility.
131     IO.enumCase(Value, "true", FormatStyle::PAS_Left);
132     IO.enumCase(Value, "false", FormatStyle::PAS_Right);
133   }
134 };
135 
136 template <>
137 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
138   static void enumeration(IO &IO,
139                           FormatStyle::SpaceBeforeParensOptions &Value) {
140     IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
141     IO.enumCase(Value, "ControlStatements",
142                 FormatStyle::SBPO_ControlStatements);
143     IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
144 
145     // For backward compatibility.
146     IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
147     IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
148   }
149 };
150 
151 template <> struct MappingTraits<FormatStyle> {
152   static void mapping(IO &IO, FormatStyle &Style) {
153     // When reading, read the language first, we need it for getPredefinedStyle.
154     IO.mapOptional("Language", Style.Language);
155 
156     if (IO.outputting()) {
157       StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
158                                  "Mozilla", "WebKit", "GNU"};
159       ArrayRef<StringRef> Styles(StylesArray);
160       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
161         StringRef StyleName(Styles[i]);
162         FormatStyle PredefinedStyle;
163         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
164             Style == PredefinedStyle) {
165           IO.mapOptional("# BasedOnStyle", StyleName);
166           break;
167         }
168       }
169     } else {
170       StringRef BasedOnStyle;
171       IO.mapOptional("BasedOnStyle", BasedOnStyle);
172       if (!BasedOnStyle.empty()) {
173         FormatStyle::LanguageKind OldLanguage = Style.Language;
174         FormatStyle::LanguageKind Language =
175             ((FormatStyle *)IO.getContext())->Language;
176         if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
177           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
178           return;
179         }
180         Style.Language = OldLanguage;
181       }
182     }
183 
184     // For backward compatibility.
185     if (!IO.outputting()) {
186       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
187       IO.mapOptional("IndentFunctionDeclarationAfterType",
188                      Style.IndentWrappedFunctionNames);
189       IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
190       IO.mapOptional("SpaceAfterControlStatementKeyword",
191                      Style.SpaceBeforeParens);
192     }
193 
194     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
195     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
196     IO.mapOptional("AlignConsecutiveAssignments",
197                    Style.AlignConsecutiveAssignments);
198     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
199     IO.mapOptional("AlignOperands", Style.AlignOperands);
200     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
201     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
202                    Style.AllowAllParametersOfDeclarationOnNextLine);
203     IO.mapOptional("AllowShortBlocksOnASingleLine",
204                    Style.AllowShortBlocksOnASingleLine);
205     IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
206                    Style.AllowShortCaseLabelsOnASingleLine);
207     IO.mapOptional("AllowShortFunctionsOnASingleLine",
208                    Style.AllowShortFunctionsOnASingleLine);
209     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
210                    Style.AllowShortIfStatementsOnASingleLine);
211     IO.mapOptional("AllowShortLoopsOnASingleLine",
212                    Style.AllowShortLoopsOnASingleLine);
213     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
214                    Style.AlwaysBreakAfterDefinitionReturnType);
215     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
216                    Style.AlwaysBreakBeforeMultilineStrings);
217     IO.mapOptional("AlwaysBreakTemplateDeclarations",
218                    Style.AlwaysBreakTemplateDeclarations);
219     IO.mapOptional("BinPackArguments", Style.BinPackArguments);
220     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
221     IO.mapOptional("BreakBeforeBinaryOperators",
222                    Style.BreakBeforeBinaryOperators);
223     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
224     IO.mapOptional("BreakBeforeTernaryOperators",
225                    Style.BreakBeforeTernaryOperators);
226     IO.mapOptional("BreakConstructorInitializersBeforeComma",
227                    Style.BreakConstructorInitializersBeforeComma);
228     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
229     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
230     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
231                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
232     IO.mapOptional("ConstructorInitializerIndentWidth",
233                    Style.ConstructorInitializerIndentWidth);
234     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
235     IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
236     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
237     IO.mapOptional("DisableFormat", Style.DisableFormat);
238     IO.mapOptional("ExperimentalAutoDetectBinPacking",
239                    Style.ExperimentalAutoDetectBinPacking);
240     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
241     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
242     IO.mapOptional("IndentWidth", Style.IndentWidth);
243     IO.mapOptional("IndentWrappedFunctionNames",
244                    Style.IndentWrappedFunctionNames);
245     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
246                    Style.KeepEmptyLinesAtTheStartOfBlocks);
247     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
248     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
249     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
250     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
251     IO.mapOptional("ObjCSpaceBeforeProtocolList",
252                    Style.ObjCSpaceBeforeProtocolList);
253     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
254                    Style.PenaltyBreakBeforeFirstCallParameter);
255     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
256     IO.mapOptional("PenaltyBreakFirstLessLess",
257                    Style.PenaltyBreakFirstLessLess);
258     IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
259     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
260     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
261                    Style.PenaltyReturnTypeOnItsOwnLine);
262     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
263     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
264     IO.mapOptional("SpaceBeforeAssignmentOperators",
265                    Style.SpaceBeforeAssignmentOperators);
266     IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
267     IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
268     IO.mapOptional("SpacesBeforeTrailingComments",
269                    Style.SpacesBeforeTrailingComments);
270     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
271     IO.mapOptional("SpacesInContainerLiterals",
272                    Style.SpacesInContainerLiterals);
273     IO.mapOptional("SpacesInCStyleCastParentheses",
274                    Style.SpacesInCStyleCastParentheses);
275     IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
276     IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
277     IO.mapOptional("Standard", Style.Standard);
278     IO.mapOptional("TabWidth", Style.TabWidth);
279     IO.mapOptional("UseTab", Style.UseTab);
280   }
281 };
282 
283 // Allows to read vector<FormatStyle> while keeping default values.
284 // IO.getContext() should contain a pointer to the FormatStyle structure, that
285 // will be used to get default values for missing keys.
286 // If the first element has no Language specified, it will be treated as the
287 // default one for the following elements.
288 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
289   static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
290     return Seq.size();
291   }
292   static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
293                               size_t Index) {
294     if (Index >= Seq.size()) {
295       assert(Index == Seq.size());
296       FormatStyle Template;
297       if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
298         Template = Seq[0];
299       } else {
300         Template = *((const FormatStyle *)IO.getContext());
301         Template.Language = FormatStyle::LK_None;
302       }
303       Seq.resize(Index + 1, Template);
304     }
305     return Seq[Index];
306   }
307 };
308 }
309 }
310 
311 namespace clang {
312 namespace format {
313 
314 const std::error_category &getParseCategory() {
315   static ParseErrorCategory C;
316   return C;
317 }
318 std::error_code make_error_code(ParseError e) {
319   return std::error_code(static_cast<int>(e), getParseCategory());
320 }
321 
322 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
323   return "clang-format.parse_error";
324 }
325 
326 std::string ParseErrorCategory::message(int EV) const {
327   switch (static_cast<ParseError>(EV)) {
328   case ParseError::Success:
329     return "Success";
330   case ParseError::Error:
331     return "Invalid argument";
332   case ParseError::Unsuitable:
333     return "Unsuitable";
334   }
335   llvm_unreachable("unexpected parse error");
336 }
337 
338 FormatStyle getLLVMStyle() {
339   FormatStyle LLVMStyle;
340   LLVMStyle.Language = FormatStyle::LK_Cpp;
341   LLVMStyle.AccessModifierOffset = -2;
342   LLVMStyle.AlignEscapedNewlinesLeft = false;
343   LLVMStyle.AlignAfterOpenBracket = true;
344   LLVMStyle.AlignOperands = true;
345   LLVMStyle.AlignTrailingComments = true;
346   LLVMStyle.AlignConsecutiveAssignments = false;
347   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
348   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
349   LLVMStyle.AllowShortBlocksOnASingleLine = false;
350   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
351   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
352   LLVMStyle.AllowShortLoopsOnASingleLine = false;
353   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
354   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
355   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
356   LLVMStyle.BinPackParameters = true;
357   LLVMStyle.BinPackArguments = true;
358   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
359   LLVMStyle.BreakBeforeTernaryOperators = true;
360   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
361   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
362   LLVMStyle.ColumnLimit = 80;
363   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
364   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
365   LLVMStyle.ConstructorInitializerIndentWidth = 4;
366   LLVMStyle.ContinuationIndentWidth = 4;
367   LLVMStyle.Cpp11BracedListStyle = true;
368   LLVMStyle.DerivePointerAlignment = false;
369   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
370   LLVMStyle.ForEachMacros.push_back("foreach");
371   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
372   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
373   LLVMStyle.IndentCaseLabels = false;
374   LLVMStyle.IndentWrappedFunctionNames = false;
375   LLVMStyle.IndentWidth = 2;
376   LLVMStyle.TabWidth = 8;
377   LLVMStyle.MaxEmptyLinesToKeep = 1;
378   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
379   LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
380   LLVMStyle.ObjCBlockIndentWidth = 2;
381   LLVMStyle.ObjCSpaceAfterProperty = false;
382   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
383   LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
384   LLVMStyle.SpacesBeforeTrailingComments = 1;
385   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
386   LLVMStyle.UseTab = FormatStyle::UT_Never;
387   LLVMStyle.SpacesInParentheses = false;
388   LLVMStyle.SpacesInSquareBrackets = false;
389   LLVMStyle.SpaceInEmptyParentheses = false;
390   LLVMStyle.SpacesInContainerLiterals = true;
391   LLVMStyle.SpacesInCStyleCastParentheses = false;
392   LLVMStyle.SpaceAfterCStyleCast = false;
393   LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
394   LLVMStyle.SpaceBeforeAssignmentOperators = true;
395   LLVMStyle.SpacesInAngles = false;
396 
397   LLVMStyle.PenaltyBreakComment = 300;
398   LLVMStyle.PenaltyBreakFirstLessLess = 120;
399   LLVMStyle.PenaltyBreakString = 1000;
400   LLVMStyle.PenaltyExcessCharacter = 1000000;
401   LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
402   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
403 
404   LLVMStyle.DisableFormat = false;
405 
406   return LLVMStyle;
407 }
408 
409 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
410   FormatStyle GoogleStyle = getLLVMStyle();
411   GoogleStyle.Language = Language;
412 
413   GoogleStyle.AccessModifierOffset = -1;
414   GoogleStyle.AlignEscapedNewlinesLeft = true;
415   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
416   GoogleStyle.AllowShortLoopsOnASingleLine = true;
417   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
418   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
419   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
420   GoogleStyle.DerivePointerAlignment = true;
421   GoogleStyle.IndentCaseLabels = true;
422   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
423   GoogleStyle.ObjCSpaceAfterProperty = false;
424   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
425   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
426   GoogleStyle.SpacesBeforeTrailingComments = 2;
427   GoogleStyle.Standard = FormatStyle::LS_Auto;
428 
429   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
430   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
431 
432   if (Language == FormatStyle::LK_Java) {
433     GoogleStyle.AlignAfterOpenBracket = false;
434     GoogleStyle.AlignOperands = false;
435     GoogleStyle.AlignTrailingComments = false;
436     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
437     GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
438     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
439     GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
440     GoogleStyle.ColumnLimit = 100;
441     GoogleStyle.SpaceAfterCStyleCast = true;
442     GoogleStyle.SpacesBeforeTrailingComments = 1;
443   } else if (Language == FormatStyle::LK_JavaScript) {
444     GoogleStyle.BreakBeforeTernaryOperators = false;
445     GoogleStyle.MaxEmptyLinesToKeep = 3;
446     GoogleStyle.SpacesInContainerLiterals = false;
447     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
448     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
449   } else if (Language == FormatStyle::LK_Proto) {
450     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
451     GoogleStyle.SpacesInContainerLiterals = false;
452   }
453 
454   return GoogleStyle;
455 }
456 
457 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
458   FormatStyle ChromiumStyle = getGoogleStyle(Language);
459   if (Language == FormatStyle::LK_Java) {
460     ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
461     ChromiumStyle.IndentWidth = 4;
462     ChromiumStyle.ContinuationIndentWidth = 8;
463   } else {
464     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
465     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
466     ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
467     ChromiumStyle.AllowShortLoopsOnASingleLine = false;
468     ChromiumStyle.BinPackParameters = false;
469     ChromiumStyle.DerivePointerAlignment = false;
470   }
471   return ChromiumStyle;
472 }
473 
474 FormatStyle getMozillaStyle() {
475   FormatStyle MozillaStyle = getLLVMStyle();
476   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
477   MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
478   MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
479       FormatStyle::DRTBS_TopLevel;
480   MozillaStyle.AlwaysBreakTemplateDeclarations = true;
481   MozillaStyle.BreakConstructorInitializersBeforeComma = true;
482   MozillaStyle.ConstructorInitializerIndentWidth = 2;
483   MozillaStyle.ContinuationIndentWidth = 2;
484   MozillaStyle.Cpp11BracedListStyle = false;
485   MozillaStyle.IndentCaseLabels = true;
486   MozillaStyle.ObjCSpaceAfterProperty = true;
487   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
488   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
489   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
490   return MozillaStyle;
491 }
492 
493 FormatStyle getWebKitStyle() {
494   FormatStyle Style = getLLVMStyle();
495   Style.AccessModifierOffset = -4;
496   Style.AlignAfterOpenBracket = false;
497   Style.AlignOperands = false;
498   Style.AlignTrailingComments = false;
499   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
500   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
501   Style.BreakConstructorInitializersBeforeComma = true;
502   Style.Cpp11BracedListStyle = false;
503   Style.ColumnLimit = 0;
504   Style.IndentWidth = 4;
505   Style.NamespaceIndentation = FormatStyle::NI_Inner;
506   Style.ObjCBlockIndentWidth = 4;
507   Style.ObjCSpaceAfterProperty = true;
508   Style.PointerAlignment = FormatStyle::PAS_Left;
509   Style.Standard = FormatStyle::LS_Cpp03;
510   return Style;
511 }
512 
513 FormatStyle getGNUStyle() {
514   FormatStyle Style = getLLVMStyle();
515   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
516   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
517   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
518   Style.BreakBeforeTernaryOperators = true;
519   Style.Cpp11BracedListStyle = false;
520   Style.ColumnLimit = 79;
521   Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
522   Style.Standard = FormatStyle::LS_Cpp03;
523   return Style;
524 }
525 
526 FormatStyle getNoStyle() {
527   FormatStyle NoStyle = getLLVMStyle();
528   NoStyle.DisableFormat = true;
529   return NoStyle;
530 }
531 
532 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
533                         FormatStyle *Style) {
534   if (Name.equals_lower("llvm")) {
535     *Style = getLLVMStyle();
536   } else if (Name.equals_lower("chromium")) {
537     *Style = getChromiumStyle(Language);
538   } else if (Name.equals_lower("mozilla")) {
539     *Style = getMozillaStyle();
540   } else if (Name.equals_lower("google")) {
541     *Style = getGoogleStyle(Language);
542   } else if (Name.equals_lower("webkit")) {
543     *Style = getWebKitStyle();
544   } else if (Name.equals_lower("gnu")) {
545     *Style = getGNUStyle();
546   } else if (Name.equals_lower("none")) {
547     *Style = getNoStyle();
548   } else {
549     return false;
550   }
551 
552   Style->Language = Language;
553   return true;
554 }
555 
556 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
557   assert(Style);
558   FormatStyle::LanguageKind Language = Style->Language;
559   assert(Language != FormatStyle::LK_None);
560   if (Text.trim().empty())
561     return make_error_code(ParseError::Error);
562 
563   std::vector<FormatStyle> Styles;
564   llvm::yaml::Input Input(Text);
565   // DocumentListTraits<vector<FormatStyle>> uses the context to get default
566   // values for the fields, keys for which are missing from the configuration.
567   // Mapping also uses the context to get the language to find the correct
568   // base style.
569   Input.setContext(Style);
570   Input >> Styles;
571   if (Input.error())
572     return Input.error();
573 
574   for (unsigned i = 0; i < Styles.size(); ++i) {
575     // Ensures that only the first configuration can skip the Language option.
576     if (Styles[i].Language == FormatStyle::LK_None && i != 0)
577       return make_error_code(ParseError::Error);
578     // Ensure that each language is configured at most once.
579     for (unsigned j = 0; j < i; ++j) {
580       if (Styles[i].Language == Styles[j].Language) {
581         DEBUG(llvm::dbgs()
582               << "Duplicate languages in the config file on positions " << j
583               << " and " << i << "\n");
584         return make_error_code(ParseError::Error);
585       }
586     }
587   }
588   // Look for a suitable configuration starting from the end, so we can
589   // find the configuration for the specific language first, and the default
590   // configuration (which can only be at slot 0) after it.
591   for (int i = Styles.size() - 1; i >= 0; --i) {
592     if (Styles[i].Language == Language ||
593         Styles[i].Language == FormatStyle::LK_None) {
594       *Style = Styles[i];
595       Style->Language = Language;
596       return make_error_code(ParseError::Success);
597     }
598   }
599   return make_error_code(ParseError::Unsuitable);
600 }
601 
602 std::string configurationAsText(const FormatStyle &Style) {
603   std::string Text;
604   llvm::raw_string_ostream Stream(Text);
605   llvm::yaml::Output Output(Stream);
606   // We use the same mapping method for input and output, so we need a non-const
607   // reference here.
608   FormatStyle NonConstStyle = Style;
609   Output << NonConstStyle;
610   return Stream.str();
611 }
612 
613 namespace {
614 
615 class FormatTokenLexer {
616 public:
617   FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
618                    encoding::Encoding Encoding)
619       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
620         LessStashed(false), Column(0), TrailingWhitespace(0),
621         SourceMgr(SourceMgr), ID(ID), Style(Style),
622         IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
623         Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) {
624     Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
625                         getFormattingLangOpts(Style)));
626     Lex->SetKeepWhitespaceMode(true);
627 
628     for (const std::string &ForEachMacro : Style.ForEachMacros)
629       ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
630     std::sort(ForEachMacros.begin(), ForEachMacros.end());
631   }
632 
633   ArrayRef<FormatToken *> lex() {
634     assert(Tokens.empty());
635     assert(FirstInLineIndex == 0);
636     do {
637       Tokens.push_back(getNextToken());
638       tryMergePreviousTokens();
639       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
640         FirstInLineIndex = Tokens.size() - 1;
641     } while (Tokens.back()->Tok.isNot(tok::eof));
642     return Tokens;
643   }
644 
645   const AdditionalKeywords &getKeywords() { return Keywords; }
646 
647 private:
648   void tryMergePreviousTokens() {
649     if (tryMerge_TMacro())
650       return;
651     if (tryMergeConflictMarkers())
652       return;
653     if (tryMergeLessLess())
654       return;
655 
656     if (Style.Language == FormatStyle::LK_JavaScript) {
657       if (tryMergeJSRegexLiteral())
658         return;
659       if (tryMergeEscapeSequence())
660         return;
661       if (tryMergeTemplateString())
662         return;
663 
664       static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
665       static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
666                                                      tok::equal};
667       static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
668                                                     tok::greaterequal};
669       static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
670       // FIXME: Investigate what token type gives the correct operator priority.
671       if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
672         return;
673       if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
674         return;
675       if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
676         return;
677       if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
678         return;
679     }
680   }
681 
682   bool tryMergeLessLess() {
683     // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
684     if (Tokens.size() < 3)
685       return false;
686 
687     bool FourthTokenIsLess = false;
688     if (Tokens.size() > 3)
689       FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
690 
691     auto First = Tokens.end() - 3;
692     if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
693         First[0]->isNot(tok::less) || FourthTokenIsLess)
694       return false;
695 
696     // Only merge if there currently is no whitespace between the two "<".
697     if (First[1]->WhitespaceRange.getBegin() !=
698         First[1]->WhitespaceRange.getEnd())
699       return false;
700 
701     First[0]->Tok.setKind(tok::lessless);
702     First[0]->TokenText = "<<";
703     First[0]->ColumnWidth += 1;
704     Tokens.erase(Tokens.end() - 2);
705     return true;
706   }
707 
708   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
709     if (Tokens.size() < Kinds.size())
710       return false;
711 
712     SmallVectorImpl<FormatToken *>::const_iterator First =
713         Tokens.end() - Kinds.size();
714     if (!First[0]->is(Kinds[0]))
715       return false;
716     unsigned AddLength = 0;
717     for (unsigned i = 1; i < Kinds.size(); ++i) {
718       if (!First[i]->is(Kinds[i]) ||
719           First[i]->WhitespaceRange.getBegin() !=
720               First[i]->WhitespaceRange.getEnd())
721         return false;
722       AddLength += First[i]->TokenText.size();
723     }
724     Tokens.resize(Tokens.size() - Kinds.size() + 1);
725     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
726                                     First[0]->TokenText.size() + AddLength);
727     First[0]->ColumnWidth += AddLength;
728     First[0]->Type = NewType;
729     return true;
730   }
731 
732   // Tries to merge an escape sequence, i.e. a "\\" and the following
733   // character. Use e.g. inside JavaScript regex literals.
734   bool tryMergeEscapeSequence() {
735     if (Tokens.size() < 2)
736       return false;
737     FormatToken *Previous = Tokens[Tokens.size() - 2];
738     if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\")
739       return false;
740     ++Previous->ColumnWidth;
741     StringRef Text = Previous->TokenText;
742     Previous->TokenText = StringRef(Text.data(), Text.size() + 1);
743     resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1);
744     Tokens.resize(Tokens.size() - 1);
745     Column = Previous->OriginalColumn + Previous->ColumnWidth;
746     return true;
747   }
748 
749   // Try to determine whether the current token ends a JavaScript regex literal.
750   // We heuristically assume that this is a regex literal if we find two
751   // unescaped slashes on a line and the token before the first slash is one of
752   // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
753   // a division.
754   bool tryMergeJSRegexLiteral() {
755     if (Tokens.size() < 2)
756       return false;
757 
758     // If this is a string literal with a slash inside, compute the slash's
759     // offset and try to find the beginning of the regex literal.
760     // Also look at tok::unknown, as it can be an unterminated char literal.
761     size_t SlashInStringPos = StringRef::npos;
762     if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant,
763                                tok::unknown)) {
764       // Start search from position 1 as otherwise, this is an unknown token
765       // for an unterminated /*-comment which is handled elsewhere.
766       SlashInStringPos = Tokens.back()->TokenText.find('/', 1);
767       if (SlashInStringPos == StringRef::npos)
768         return false;
769     }
770 
771     // If a regex literal ends in "\//", this gets represented by an unknown
772     // token "\" and a comment.
773     bool MightEndWithEscapedSlash =
774         Tokens.back()->is(tok::comment) &&
775         Tokens.back()->TokenText.startswith("//") &&
776         Tokens[Tokens.size() - 2]->TokenText == "\\";
777     if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos &&
778         (Tokens.back()->isNot(tok::slash) ||
779          (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
780           Tokens[Tokens.size() - 2]->TokenText == "\\")))
781       return false;
782 
783     unsigned TokenCount = 0;
784     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
785       ++TokenCount;
786       if (I[0]->isOneOf(tok::slash, tok::slashequal) && I + 1 != E &&
787           (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
788                          tok::exclaim, tok::l_square, tok::colon, tok::comma,
789                          tok::question, tok::kw_return) ||
790            I[1]->isBinaryOperator())) {
791         unsigned LastColumn = Tokens.back()->OriginalColumn;
792         SourceLocation Loc = Tokens.back()->Tok.getLocation();
793         if (MightEndWithEscapedSlash) {
794           // This regex literal ends in '\//'. Skip past the '//' of the last
795           // token and re-start lexing from there.
796           resetLexer(SourceMgr.getFileOffset(Loc) + 2);
797         } else if (SlashInStringPos != StringRef::npos) {
798           // This regex literal ends in a string_literal with a slash inside.
799           // Calculate end column and reset lexer appropriately.
800           resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1);
801           LastColumn += SlashInStringPos;
802         }
803         Tokens.resize(Tokens.size() - TokenCount);
804         Tokens.back()->Tok.setKind(tok::unknown);
805         Tokens.back()->Type = TT_RegexLiteral;
806         Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
807         return true;
808       }
809 
810       // There can't be a newline inside a regex literal.
811       if (I[0]->NewlinesBefore > 0)
812         return false;
813     }
814     return false;
815   }
816 
817   bool tryMergeTemplateString() {
818     if (Tokens.size() < 2)
819       return false;
820 
821     FormatToken *EndBacktick = Tokens.back();
822     // Backticks get lexed as tok::unknown tokens. If a template string contains
823     // a comment start, it gets lexed as a tok::comment, or tok::unknown if
824     // unterminated.
825     if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
826                               tok::char_constant, tok::unknown))
827       return false;
828     size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
829     // Unknown token that's not actually a backtick, or a comment that doesn't
830     // contain a backtick.
831     if (CommentBacktickPos == StringRef::npos)
832       return false;
833 
834     unsigned TokenCount = 0;
835     bool IsMultiline = false;
836     unsigned EndColumnInFirstLine =
837         EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
838     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
839       ++TokenCount;
840       if (I[0]->IsMultiline)
841         IsMultiline = true;
842 
843       // If there was a preceding template string, this must be the start of a
844       // template string, not the end.
845       if (I[0]->is(TT_TemplateString))
846         return false;
847 
848       if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
849         // Keep track of the rhs offset of the last token to wrap across lines -
850         // its the rhs offset of the first line of the template string, used to
851         // determine its width.
852         if (I[0]->IsMultiline)
853           EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
854         // If the token has newlines, the token before it (if it exists) is the
855         // rhs end of the previous line.
856         if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
857           EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
858           IsMultiline = true;
859         }
860         continue;
861       }
862 
863       Tokens.resize(Tokens.size() - TokenCount);
864       Tokens.back()->Type = TT_TemplateString;
865       const char *EndOffset =
866           EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
867       if (CommentBacktickPos != 0) {
868         // If the backtick was not the first character (e.g. in a comment),
869         // re-lex after the backtick position.
870         SourceLocation Loc = EndBacktick->Tok.getLocation();
871         resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
872       }
873       Tokens.back()->TokenText =
874           StringRef(Tokens.back()->TokenText.data(),
875                     EndOffset - Tokens.back()->TokenText.data());
876 
877       unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
878       if (EndOriginalColumn == 0) {
879         SourceLocation Loc = EndBacktick->Tok.getLocation();
880         EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
881       }
882       // If the ` is further down within the token (e.g. in a comment).
883       EndOriginalColumn += CommentBacktickPos;
884 
885       if (IsMultiline) {
886         // ColumnWidth is from backtick to last token in line.
887         // LastLineColumnWidth is 0 to backtick.
888         // x = `some content
889         //     until here`;
890         Tokens.back()->ColumnWidth =
891             EndColumnInFirstLine - Tokens.back()->OriginalColumn;
892         // +1 for the ` itself.
893         Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
894         Tokens.back()->IsMultiline = true;
895       } else {
896         // Token simply spans from start to end, +1 for the ` itself.
897         Tokens.back()->ColumnWidth =
898             EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
899       }
900       return true;
901     }
902     return false;
903   }
904 
905   bool tryMerge_TMacro() {
906     if (Tokens.size() < 4)
907       return false;
908     FormatToken *Last = Tokens.back();
909     if (!Last->is(tok::r_paren))
910       return false;
911 
912     FormatToken *String = Tokens[Tokens.size() - 2];
913     if (!String->is(tok::string_literal) || String->IsMultiline)
914       return false;
915 
916     if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
917       return false;
918 
919     FormatToken *Macro = Tokens[Tokens.size() - 4];
920     if (Macro->TokenText != "_T")
921       return false;
922 
923     const char *Start = Macro->TokenText.data();
924     const char *End = Last->TokenText.data() + Last->TokenText.size();
925     String->TokenText = StringRef(Start, End - Start);
926     String->IsFirst = Macro->IsFirst;
927     String->LastNewlineOffset = Macro->LastNewlineOffset;
928     String->WhitespaceRange = Macro->WhitespaceRange;
929     String->OriginalColumn = Macro->OriginalColumn;
930     String->ColumnWidth = encoding::columnWidthWithTabs(
931         String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
932     String->NewlinesBefore = Macro->NewlinesBefore;
933     String->HasUnescapedNewline = Macro->HasUnescapedNewline;
934 
935     Tokens.pop_back();
936     Tokens.pop_back();
937     Tokens.pop_back();
938     Tokens.back() = String;
939     return true;
940   }
941 
942   bool tryMergeConflictMarkers() {
943     if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
944       return false;
945 
946     // Conflict lines look like:
947     // <marker> <text from the vcs>
948     // For example:
949     // >>>>>>> /file/in/file/system at revision 1234
950     //
951     // We merge all tokens in a line that starts with a conflict marker
952     // into a single token with a special token type that the unwrapped line
953     // parser will use to correctly rebuild the underlying code.
954 
955     FileID ID;
956     // Get the position of the first token in the line.
957     unsigned FirstInLineOffset;
958     std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
959         Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
960     StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
961     // Calculate the offset of the start of the current line.
962     auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
963     if (LineOffset == StringRef::npos) {
964       LineOffset = 0;
965     } else {
966       ++LineOffset;
967     }
968 
969     auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
970     StringRef LineStart;
971     if (FirstSpace == StringRef::npos) {
972       LineStart = Buffer.substr(LineOffset);
973     } else {
974       LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
975     }
976 
977     TokenType Type = TT_Unknown;
978     if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
979       Type = TT_ConflictStart;
980     } else if (LineStart == "|||||||" || LineStart == "=======" ||
981                LineStart == "====") {
982       Type = TT_ConflictAlternative;
983     } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
984       Type = TT_ConflictEnd;
985     }
986 
987     if (Type != TT_Unknown) {
988       FormatToken *Next = Tokens.back();
989 
990       Tokens.resize(FirstInLineIndex + 1);
991       // We do not need to build a complete token here, as we will skip it
992       // during parsing anyway (as we must not touch whitespace around conflict
993       // markers).
994       Tokens.back()->Type = Type;
995       Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
996 
997       Tokens.push_back(Next);
998       return true;
999     }
1000 
1001     return false;
1002   }
1003 
1004   FormatToken *getStashedToken() {
1005     // Create a synthesized second '>' or '<' token.
1006     Token Tok = FormatTok->Tok;
1007     StringRef TokenText = FormatTok->TokenText;
1008 
1009     unsigned OriginalColumn = FormatTok->OriginalColumn;
1010     FormatTok = new (Allocator.Allocate()) FormatToken;
1011     FormatTok->Tok = Tok;
1012     SourceLocation TokLocation =
1013         FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
1014     FormatTok->Tok.setLocation(TokLocation);
1015     FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
1016     FormatTok->TokenText = TokenText;
1017     FormatTok->ColumnWidth = 1;
1018     FormatTok->OriginalColumn = OriginalColumn + 1;
1019 
1020     return FormatTok;
1021   }
1022 
1023   FormatToken *getNextToken() {
1024     if (GreaterStashed) {
1025       GreaterStashed = false;
1026       return getStashedToken();
1027     }
1028     if (LessStashed) {
1029       LessStashed = false;
1030       return getStashedToken();
1031     }
1032 
1033     FormatTok = new (Allocator.Allocate()) FormatToken;
1034     readRawToken(*FormatTok);
1035     SourceLocation WhitespaceStart =
1036         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1037     FormatTok->IsFirst = IsFirstToken;
1038     IsFirstToken = false;
1039 
1040     // Consume and record whitespace until we find a significant token.
1041     unsigned WhitespaceLength = TrailingWhitespace;
1042     while (FormatTok->Tok.is(tok::unknown)) {
1043       StringRef Text = FormatTok->TokenText;
1044       auto EscapesNewline = [&](int pos) {
1045         // A '\r' here is just part of '\r\n'. Skip it.
1046         if (pos >= 0 && Text[pos] == '\r')
1047           --pos;
1048         // See whether there is an odd number of '\' before this.
1049         unsigned count = 0;
1050         for (; pos >= 0; --pos, ++count)
1051           if (Text[pos] != '\\')
1052             break;
1053         return count & 1;
1054       };
1055       // FIXME: This miscounts tok:unknown tokens that are not just
1056       // whitespace, e.g. a '`' character.
1057       for (int i = 0, e = Text.size(); i != e; ++i) {
1058         switch (Text[i]) {
1059         case '\n':
1060           ++FormatTok->NewlinesBefore;
1061           FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
1062           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1063           Column = 0;
1064           break;
1065         case '\r':
1066           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1067           Column = 0;
1068           break;
1069         case '\f':
1070         case '\v':
1071           Column = 0;
1072           break;
1073         case ' ':
1074           ++Column;
1075           break;
1076         case '\t':
1077           Column += Style.TabWidth - Column % Style.TabWidth;
1078           break;
1079         case '\\':
1080           if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
1081             FormatTok->Type = TT_ImplicitStringLiteral;
1082           break;
1083         default:
1084           FormatTok->Type = TT_ImplicitStringLiteral;
1085           break;
1086         }
1087       }
1088 
1089       if (FormatTok->is(TT_ImplicitStringLiteral))
1090         break;
1091       WhitespaceLength += FormatTok->Tok.getLength();
1092 
1093       readRawToken(*FormatTok);
1094     }
1095 
1096     // In case the token starts with escaped newlines, we want to
1097     // take them into account as whitespace - this pattern is quite frequent
1098     // in macro definitions.
1099     // FIXME: Add a more explicit test.
1100     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1101            FormatTok->TokenText[1] == '\n') {
1102       ++FormatTok->NewlinesBefore;
1103       WhitespaceLength += 2;
1104       FormatTok->LastNewlineOffset = 2;
1105       Column = 0;
1106       FormatTok->TokenText = FormatTok->TokenText.substr(2);
1107     }
1108 
1109     FormatTok->WhitespaceRange = SourceRange(
1110         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1111 
1112     FormatTok->OriginalColumn = Column;
1113 
1114     TrailingWhitespace = 0;
1115     if (FormatTok->Tok.is(tok::comment)) {
1116       // FIXME: Add the trimmed whitespace to Column.
1117       StringRef UntrimmedText = FormatTok->TokenText;
1118       FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1119       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1120     } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1121       IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1122       FormatTok->Tok.setIdentifierInfo(&Info);
1123       FormatTok->Tok.setKind(Info.getTokenID());
1124       if (Style.Language == FormatStyle::LK_Java &&
1125           FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) {
1126         FormatTok->Tok.setKind(tok::identifier);
1127         FormatTok->Tok.setIdentifierInfo(nullptr);
1128       }
1129     } else if (FormatTok->Tok.is(tok::greatergreater)) {
1130       FormatTok->Tok.setKind(tok::greater);
1131       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1132       GreaterStashed = true;
1133     } else if (FormatTok->Tok.is(tok::lessless)) {
1134       FormatTok->Tok.setKind(tok::less);
1135       FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1136       LessStashed = true;
1137     }
1138 
1139     // Now FormatTok is the next non-whitespace token.
1140 
1141     StringRef Text = FormatTok->TokenText;
1142     size_t FirstNewlinePos = Text.find('\n');
1143     if (FirstNewlinePos == StringRef::npos) {
1144       // FIXME: ColumnWidth actually depends on the start column, we need to
1145       // take this into account when the token is moved.
1146       FormatTok->ColumnWidth =
1147           encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1148       Column += FormatTok->ColumnWidth;
1149     } else {
1150       FormatTok->IsMultiline = true;
1151       // FIXME: ColumnWidth actually depends on the start column, we need to
1152       // take this into account when the token is moved.
1153       FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1154           Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1155 
1156       // The last line of the token always starts in column 0.
1157       // Thus, the length can be precomputed even in the presence of tabs.
1158       FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1159           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1160           Encoding);
1161       Column = FormatTok->LastLineColumnWidth;
1162     }
1163 
1164     if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1165           Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1166               tok::pp_define) &&
1167         std::find(ForEachMacros.begin(), ForEachMacros.end(),
1168                   FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end())
1169       FormatTok->Type = TT_ForEachMacro;
1170 
1171     return FormatTok;
1172   }
1173 
1174   FormatToken *FormatTok;
1175   bool IsFirstToken;
1176   bool GreaterStashed, LessStashed;
1177   unsigned Column;
1178   unsigned TrailingWhitespace;
1179   std::unique_ptr<Lexer> Lex;
1180   SourceManager &SourceMgr;
1181   FileID ID;
1182   FormatStyle &Style;
1183   IdentifierTable IdentTable;
1184   AdditionalKeywords Keywords;
1185   encoding::Encoding Encoding;
1186   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1187   // Index (in 'Tokens') of the last token that starts a new line.
1188   unsigned FirstInLineIndex;
1189   SmallVector<FormatToken *, 16> Tokens;
1190   SmallVector<IdentifierInfo *, 8> ForEachMacros;
1191 
1192   bool FormattingDisabled;
1193 
1194   void readRawToken(FormatToken &Tok) {
1195     Lex->LexFromRawLexer(Tok.Tok);
1196     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1197                               Tok.Tok.getLength());
1198     // For formatting, treat unterminated string literals like normal string
1199     // literals.
1200     if (Tok.is(tok::unknown)) {
1201       if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1202         Tok.Tok.setKind(tok::string_literal);
1203         Tok.IsUnterminatedLiteral = true;
1204       } else if (Style.Language == FormatStyle::LK_JavaScript &&
1205                  Tok.TokenText == "''") {
1206         Tok.Tok.setKind(tok::char_constant);
1207       }
1208     }
1209 
1210     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1211                                  Tok.TokenText == "/* clang-format on */")) {
1212       FormattingDisabled = false;
1213     }
1214 
1215     Tok.Finalized = FormattingDisabled;
1216 
1217     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1218                                  Tok.TokenText == "/* clang-format off */")) {
1219       FormattingDisabled = true;
1220     }
1221   }
1222 
1223   void resetLexer(unsigned Offset) {
1224     StringRef Buffer = SourceMgr.getBufferData(ID);
1225     Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1226                         getFormattingLangOpts(Style), Buffer.begin(),
1227                         Buffer.begin() + Offset, Buffer.end()));
1228     Lex->SetKeepWhitespaceMode(true);
1229   }
1230 };
1231 
1232 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1233   switch (Language) {
1234   case FormatStyle::LK_Cpp:
1235     return "C++";
1236   case FormatStyle::LK_Java:
1237     return "Java";
1238   case FormatStyle::LK_JavaScript:
1239     return "JavaScript";
1240   case FormatStyle::LK_Proto:
1241     return "Proto";
1242   default:
1243     return "Unknown";
1244   }
1245 }
1246 
1247 class Formatter : public UnwrappedLineConsumer {
1248 public:
1249   Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
1250             ArrayRef<CharSourceRange> Ranges)
1251       : Style(Style), ID(ID), SourceMgr(SourceMgr),
1252         Whitespaces(SourceMgr, Style,
1253                     inputUsesCRLF(SourceMgr.getBufferData(ID))),
1254         Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1255         Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
1256     DEBUG(llvm::dbgs() << "File encoding: "
1257                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1258                                                                : "unknown")
1259                        << "\n");
1260     DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1261                        << "\n");
1262   }
1263 
1264   tooling::Replacements format(bool *IncompleteFormat) {
1265     tooling::Replacements Result;
1266     FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
1267 
1268     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1269                                *this);
1270     Parser.parse();
1271     assert(UnwrappedLines.rbegin()->empty());
1272     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1273          ++Run) {
1274       DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1275       SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1276       for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1277         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1278       }
1279       tooling::Replacements RunResult =
1280           format(AnnotatedLines, Tokens, IncompleteFormat);
1281       DEBUG({
1282         llvm::dbgs() << "Replacements for run " << Run << ":\n";
1283         for (tooling::Replacements::iterator I = RunResult.begin(),
1284                                              E = RunResult.end();
1285              I != E; ++I) {
1286           llvm::dbgs() << I->toString() << "\n";
1287         }
1288       });
1289       for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1290         delete AnnotatedLines[i];
1291       }
1292       Result.insert(RunResult.begin(), RunResult.end());
1293       Whitespaces.reset();
1294     }
1295     return Result;
1296   }
1297 
1298   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1299                                FormatTokenLexer &Tokens,
1300                                bool *IncompleteFormat) {
1301     TokenAnnotator Annotator(Style, Tokens.getKeywords());
1302     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1303       Annotator.annotate(*AnnotatedLines[i]);
1304     }
1305     deriveLocalStyle(AnnotatedLines);
1306     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1307       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1308     }
1309     computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1310 
1311     Annotator.setCommentLineLevels(AnnotatedLines);
1312     ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
1313                                   Whitespaces, Encoding,
1314                                   BinPackInconclusiveFunctions);
1315     UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
1316                            IncompleteFormat)
1317         .format(AnnotatedLines);
1318     return Whitespaces.generateReplacements();
1319   }
1320 
1321 private:
1322   // Determines which lines are affected by the SourceRanges given as input.
1323   // Returns \c true if at least one line between I and E or one of their
1324   // children is affected.
1325   bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1326                             SmallVectorImpl<AnnotatedLine *>::iterator E) {
1327     bool SomeLineAffected = false;
1328     const AnnotatedLine *PreviousLine = nullptr;
1329     while (I != E) {
1330       AnnotatedLine *Line = *I;
1331       Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1332 
1333       // If a line is part of a preprocessor directive, it needs to be formatted
1334       // if any token within the directive is affected.
1335       if (Line->InPPDirective) {
1336         FormatToken *Last = Line->Last;
1337         SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1338         while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1339           Last = (*PPEnd)->Last;
1340           ++PPEnd;
1341         }
1342 
1343         if (affectsTokenRange(*Line->First, *Last,
1344                               /*IncludeLeadingNewlines=*/false)) {
1345           SomeLineAffected = true;
1346           markAllAsAffected(I, PPEnd);
1347         }
1348         I = PPEnd;
1349         continue;
1350       }
1351 
1352       if (nonPPLineAffected(Line, PreviousLine))
1353         SomeLineAffected = true;
1354 
1355       PreviousLine = Line;
1356       ++I;
1357     }
1358     return SomeLineAffected;
1359   }
1360 
1361   // Determines whether 'Line' is affected by the SourceRanges given as input.
1362   // Returns \c true if line or one if its children is affected.
1363   bool nonPPLineAffected(AnnotatedLine *Line,
1364                          const AnnotatedLine *PreviousLine) {
1365     bool SomeLineAffected = false;
1366     Line->ChildrenAffected =
1367         computeAffectedLines(Line->Children.begin(), Line->Children.end());
1368     if (Line->ChildrenAffected)
1369       SomeLineAffected = true;
1370 
1371     // Stores whether one of the line's tokens is directly affected.
1372     bool SomeTokenAffected = false;
1373     // Stores whether we need to look at the leading newlines of the next token
1374     // in order to determine whether it was affected.
1375     bool IncludeLeadingNewlines = false;
1376 
1377     // Stores whether the first child line of any of this line's tokens is
1378     // affected.
1379     bool SomeFirstChildAffected = false;
1380 
1381     for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1382       // Determine whether 'Tok' was affected.
1383       if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1384         SomeTokenAffected = true;
1385 
1386       // Determine whether the first child of 'Tok' was affected.
1387       if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1388         SomeFirstChildAffected = true;
1389 
1390       IncludeLeadingNewlines = Tok->Children.empty();
1391     }
1392 
1393     // Was this line moved, i.e. has it previously been on the same line as an
1394     // affected line?
1395     bool LineMoved = PreviousLine && PreviousLine->Affected &&
1396                      Line->First->NewlinesBefore == 0;
1397 
1398     bool IsContinuedComment =
1399         Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1400         Line->First->NewlinesBefore < 2 && PreviousLine &&
1401         PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1402 
1403     if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1404         IsContinuedComment) {
1405       Line->Affected = true;
1406       SomeLineAffected = true;
1407     }
1408     return SomeLineAffected;
1409   }
1410 
1411   // Marks all lines between I and E as well as all their children as affected.
1412   void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1413                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
1414     while (I != E) {
1415       (*I)->Affected = true;
1416       markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1417       ++I;
1418     }
1419   }
1420 
1421   // Returns true if the range from 'First' to 'Last' intersects with one of the
1422   // input ranges.
1423   bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1424                          bool IncludeLeadingNewlines) {
1425     SourceLocation Start = First.WhitespaceRange.getBegin();
1426     if (!IncludeLeadingNewlines)
1427       Start = Start.getLocWithOffset(First.LastNewlineOffset);
1428     SourceLocation End = Last.getStartOfNonWhitespace();
1429     End = End.getLocWithOffset(Last.TokenText.size());
1430     CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1431     return affectsCharSourceRange(Range);
1432   }
1433 
1434   // Returns true if one of the input ranges intersect the leading empty lines
1435   // before 'Tok'.
1436   bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1437     CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1438         Tok.WhitespaceRange.getBegin(),
1439         Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1440     return affectsCharSourceRange(EmptyLineRange);
1441   }
1442 
1443   // Returns true if 'Range' intersects with one of the input ranges.
1444   bool affectsCharSourceRange(const CharSourceRange &Range) {
1445     for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1446                                                           E = Ranges.end();
1447          I != E; ++I) {
1448       if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1449           !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1450         return true;
1451     }
1452     return false;
1453   }
1454 
1455   static bool inputUsesCRLF(StringRef Text) {
1456     return Text.count('\r') * 2 > Text.count('\n');
1457   }
1458 
1459   void
1460   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1461     unsigned CountBoundToVariable = 0;
1462     unsigned CountBoundToType = 0;
1463     bool HasCpp03IncompatibleFormat = false;
1464     bool HasBinPackedFunction = false;
1465     bool HasOnePerLineFunction = false;
1466     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1467       if (!AnnotatedLines[i]->First->Next)
1468         continue;
1469       FormatToken *Tok = AnnotatedLines[i]->First->Next;
1470       while (Tok->Next) {
1471         if (Tok->is(TT_PointerOrReference)) {
1472           bool SpacesBefore =
1473               Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1474           bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1475                              Tok->Next->WhitespaceRange.getEnd();
1476           if (SpacesBefore && !SpacesAfter)
1477             ++CountBoundToVariable;
1478           else if (!SpacesBefore && SpacesAfter)
1479             ++CountBoundToType;
1480         }
1481 
1482         if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1483           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1484             HasCpp03IncompatibleFormat = true;
1485           if (Tok->is(TT_TemplateCloser) &&
1486               Tok->Previous->is(TT_TemplateCloser))
1487             HasCpp03IncompatibleFormat = true;
1488         }
1489 
1490         if (Tok->PackingKind == PPK_BinPacked)
1491           HasBinPackedFunction = true;
1492         if (Tok->PackingKind == PPK_OnePerLine)
1493           HasOnePerLineFunction = true;
1494 
1495         Tok = Tok->Next;
1496       }
1497     }
1498     if (Style.DerivePointerAlignment) {
1499       if (CountBoundToType > CountBoundToVariable)
1500         Style.PointerAlignment = FormatStyle::PAS_Left;
1501       else if (CountBoundToType < CountBoundToVariable)
1502         Style.PointerAlignment = FormatStyle::PAS_Right;
1503     }
1504     if (Style.Standard == FormatStyle::LS_Auto) {
1505       Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1506                                                   : FormatStyle::LS_Cpp03;
1507     }
1508     BinPackInconclusiveFunctions =
1509         HasBinPackedFunction || !HasOnePerLineFunction;
1510   }
1511 
1512   void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1513     assert(!UnwrappedLines.empty());
1514     UnwrappedLines.back().push_back(TheLine);
1515   }
1516 
1517   void finishRun() override {
1518     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1519   }
1520 
1521   FormatStyle Style;
1522   FileID ID;
1523   SourceManager &SourceMgr;
1524   WhitespaceManager Whitespaces;
1525   SmallVector<CharSourceRange, 8> Ranges;
1526   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1527 
1528   encoding::Encoding Encoding;
1529   bool BinPackInconclusiveFunctions;
1530 };
1531 
1532 } // end anonymous namespace
1533 
1534 tooling::Replacements reformat(const FormatStyle &Style,
1535                                SourceManager &SourceMgr, FileID ID,
1536                                ArrayRef<CharSourceRange> Ranges,
1537                                bool *IncompleteFormat) {
1538   if (Style.DisableFormat)
1539     return tooling::Replacements();
1540   Formatter formatter(Style, SourceMgr, ID, Ranges);
1541   return formatter.format(IncompleteFormat);
1542 }
1543 
1544 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1545                                ArrayRef<tooling::Range> Ranges,
1546                                StringRef FileName, bool *IncompleteFormat) {
1547   if (Style.DisableFormat)
1548     return tooling::Replacements();
1549 
1550   FileManager Files((FileSystemOptions()));
1551   DiagnosticsEngine Diagnostics(
1552       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1553       new DiagnosticOptions);
1554   SourceManager SourceMgr(Diagnostics, Files);
1555   std::unique_ptr<llvm::MemoryBuffer> Buf =
1556       llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1557   const clang::FileEntry *Entry =
1558       Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1559   SourceMgr.overrideFileContents(Entry, std::move(Buf));
1560   FileID ID =
1561       SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1562   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1563   std::vector<CharSourceRange> CharRanges;
1564   for (const tooling::Range &Range : Ranges) {
1565     SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
1566     SourceLocation End = Start.getLocWithOffset(Range.getLength());
1567     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1568   }
1569   return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
1570 }
1571 
1572 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
1573   LangOptions LangOpts;
1574   LangOpts.CPlusPlus = 1;
1575   LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1576   LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1577   LangOpts.LineComment = 1;
1578   bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
1579   LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
1580   LangOpts.Bool = 1;
1581   LangOpts.ObjC1 = 1;
1582   LangOpts.ObjC2 = 1;
1583   LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
1584   return LangOpts;
1585 }
1586 
1587 const char *StyleOptionHelpDescription =
1588     "Coding style, currently supports:\n"
1589     "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1590     "Use -style=file to load style configuration from\n"
1591     ".clang-format file located in one of the parent\n"
1592     "directories of the source file (or current\n"
1593     "directory for stdin).\n"
1594     "Use -style=\"{key: value, ...}\" to set specific\n"
1595     "parameters, e.g.:\n"
1596     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1597 
1598 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1599   if (FileName.endswith(".java")) {
1600     return FormatStyle::LK_Java;
1601   } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) {
1602     // JavaScript or TypeScript.
1603     return FormatStyle::LK_JavaScript;
1604   } else if (FileName.endswith_lower(".proto") ||
1605              FileName.endswith_lower(".protodevel")) {
1606     return FormatStyle::LK_Proto;
1607   }
1608   return FormatStyle::LK_Cpp;
1609 }
1610 
1611 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1612                      StringRef FallbackStyle) {
1613   FormatStyle Style = getLLVMStyle();
1614   Style.Language = getLanguageByFileName(FileName);
1615   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1616     llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1617                  << "\" using LLVM style\n";
1618     return Style;
1619   }
1620 
1621   if (StyleName.startswith("{")) {
1622     // Parse YAML/JSON style from the command line.
1623     if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
1624       llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1625                    << FallbackStyle << " style\n";
1626     }
1627     return Style;
1628   }
1629 
1630   if (!StyleName.equals_lower("file")) {
1631     if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1632       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1633                    << " style\n";
1634     return Style;
1635   }
1636 
1637   // Look for .clang-format/_clang-format file in the file's parent directories.
1638   SmallString<128> UnsuitableConfigFiles;
1639   SmallString<128> Path(FileName);
1640   llvm::sys::fs::make_absolute(Path);
1641   for (StringRef Directory = Path; !Directory.empty();
1642        Directory = llvm::sys::path::parent_path(Directory)) {
1643     if (!llvm::sys::fs::is_directory(Directory))
1644       continue;
1645     SmallString<128> ConfigFile(Directory);
1646 
1647     llvm::sys::path::append(ConfigFile, ".clang-format");
1648     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1649     bool IsFile = false;
1650     // Ignore errors from is_regular_file: we only need to know if we can read
1651     // the file or not.
1652     llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1653 
1654     if (!IsFile) {
1655       // Try _clang-format too, since dotfiles are not commonly used on Windows.
1656       ConfigFile = Directory;
1657       llvm::sys::path::append(ConfigFile, "_clang-format");
1658       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1659       llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1660     }
1661 
1662     if (IsFile) {
1663       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1664           llvm::MemoryBuffer::getFile(ConfigFile.c_str());
1665       if (std::error_code EC = Text.getError()) {
1666         llvm::errs() << EC.message() << "\n";
1667         break;
1668       }
1669       if (std::error_code ec =
1670               parseConfiguration(Text.get()->getBuffer(), &Style)) {
1671         if (ec == ParseError::Unsuitable) {
1672           if (!UnsuitableConfigFiles.empty())
1673             UnsuitableConfigFiles.append(", ");
1674           UnsuitableConfigFiles.append(ConfigFile);
1675           continue;
1676         }
1677         llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1678                      << "\n";
1679         break;
1680       }
1681       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1682       return Style;
1683     }
1684   }
1685   if (!UnsuitableConfigFiles.empty()) {
1686     llvm::errs() << "Configuration file(s) do(es) not support "
1687                  << getLanguageName(Style.Language) << ": "
1688                  << UnsuitableConfigFiles << "\n";
1689   }
1690   return Style;
1691 }
1692 
1693 } // namespace format
1694 } // namespace clang
1695