1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "ContinuationIndenter.h" 17 #include "TokenAnnotator.h" 18 #include "UnwrappedLineFormatter.h" 19 #include "UnwrappedLineParser.h" 20 #include "WhitespaceManager.h" 21 #include "clang/Basic/Diagnostic.h" 22 #include "clang/Basic/DiagnosticOptions.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Format/Format.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/Regex.h" 31 #include "llvm/Support/YAMLTraits.h" 32 #include <queue> 33 #include <string> 34 35 #define DEBUG_TYPE "format-formatter" 36 37 using clang::format::FormatStyle; 38 39 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) 40 41 namespace llvm { 42 namespace yaml { 43 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 44 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 45 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 46 IO.enumCase(Value, "Java", FormatStyle::LK_Java); 47 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 48 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 49 } 50 }; 51 52 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 53 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 54 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 55 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 56 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 57 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 58 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 59 } 60 }; 61 62 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 63 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 64 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 65 IO.enumCase(Value, "false", FormatStyle::UT_Never); 66 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 67 IO.enumCase(Value, "true", FormatStyle::UT_Always); 68 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 69 } 70 }; 71 72 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { 73 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { 74 IO.enumCase(Value, "None", FormatStyle::SFS_None); 75 IO.enumCase(Value, "false", FormatStyle::SFS_None); 76 IO.enumCase(Value, "All", FormatStyle::SFS_All); 77 IO.enumCase(Value, "true", FormatStyle::SFS_All); 78 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); 79 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); 80 } 81 }; 82 83 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { 84 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { 85 IO.enumCase(Value, "All", FormatStyle::BOS_All); 86 IO.enumCase(Value, "true", FormatStyle::BOS_All); 87 IO.enumCase(Value, "None", FormatStyle::BOS_None); 88 IO.enumCase(Value, "false", FormatStyle::BOS_None); 89 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); 90 } 91 }; 92 93 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 94 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 95 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 96 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 97 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 98 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 99 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 100 } 101 }; 102 103 template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { 104 static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { 105 IO.enumCase(Value, "None", FormatStyle::DRTBS_None); 106 IO.enumCase(Value, "All", FormatStyle::DRTBS_All); 107 IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel); 108 109 // For backward compatibility. 110 IO.enumCase(Value, "false", FormatStyle::DRTBS_None); 111 IO.enumCase(Value, "true", FormatStyle::DRTBS_All); 112 } 113 }; 114 115 template <> 116 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 117 static void enumeration(IO &IO, 118 FormatStyle::NamespaceIndentationKind &Value) { 119 IO.enumCase(Value, "None", FormatStyle::NI_None); 120 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 121 IO.enumCase(Value, "All", FormatStyle::NI_All); 122 } 123 }; 124 125 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { 126 static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { 127 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); 128 IO.enumCase(Value, "Left", FormatStyle::PAS_Left); 129 IO.enumCase(Value, "Right", FormatStyle::PAS_Right); 130 131 // For backward compatibility. 132 IO.enumCase(Value, "true", FormatStyle::PAS_Left); 133 IO.enumCase(Value, "false", FormatStyle::PAS_Right); 134 } 135 }; 136 137 template <> 138 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 139 static void enumeration(IO &IO, 140 FormatStyle::SpaceBeforeParensOptions &Value) { 141 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 142 IO.enumCase(Value, "ControlStatements", 143 FormatStyle::SBPO_ControlStatements); 144 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 145 146 // For backward compatibility. 147 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 148 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 149 } 150 }; 151 152 template <> struct MappingTraits<FormatStyle> { 153 static void mapping(IO &IO, FormatStyle &Style) { 154 // When reading, read the language first, we need it for getPredefinedStyle. 155 IO.mapOptional("Language", Style.Language); 156 157 if (IO.outputting()) { 158 StringRef StylesArray[] = {"LLVM", "Google", "Chromium", 159 "Mozilla", "WebKit", "GNU"}; 160 ArrayRef<StringRef> Styles(StylesArray); 161 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 162 StringRef StyleName(Styles[i]); 163 FormatStyle PredefinedStyle; 164 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 165 Style == PredefinedStyle) { 166 IO.mapOptional("# BasedOnStyle", StyleName); 167 break; 168 } 169 } 170 } else { 171 StringRef BasedOnStyle; 172 IO.mapOptional("BasedOnStyle", BasedOnStyle); 173 if (!BasedOnStyle.empty()) { 174 FormatStyle::LanguageKind OldLanguage = Style.Language; 175 FormatStyle::LanguageKind Language = 176 ((FormatStyle *)IO.getContext())->Language; 177 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 178 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 179 return; 180 } 181 Style.Language = OldLanguage; 182 } 183 } 184 185 // For backward compatibility. 186 if (!IO.outputting()) { 187 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); 188 IO.mapOptional("IndentFunctionDeclarationAfterType", 189 Style.IndentWrappedFunctionNames); 190 IO.mapOptional("PointerBindsToType", Style.PointerAlignment); 191 IO.mapOptional("SpaceAfterControlStatementKeyword", 192 Style.SpaceBeforeParens); 193 } 194 195 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 196 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); 197 IO.mapOptional("AlignConsecutiveAssignments", 198 Style.AlignConsecutiveAssignments); 199 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 200 IO.mapOptional("AlignOperands", Style.AlignOperands); 201 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 202 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 203 Style.AllowAllParametersOfDeclarationOnNextLine); 204 IO.mapOptional("AllowShortBlocksOnASingleLine", 205 Style.AllowShortBlocksOnASingleLine); 206 IO.mapOptional("AllowShortCaseLabelsOnASingleLine", 207 Style.AllowShortCaseLabelsOnASingleLine); 208 IO.mapOptional("AllowShortFunctionsOnASingleLine", 209 Style.AllowShortFunctionsOnASingleLine); 210 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 211 Style.AllowShortIfStatementsOnASingleLine); 212 IO.mapOptional("AllowShortLoopsOnASingleLine", 213 Style.AllowShortLoopsOnASingleLine); 214 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", 215 Style.AlwaysBreakAfterDefinitionReturnType); 216 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 217 Style.AlwaysBreakBeforeMultilineStrings); 218 IO.mapOptional("AlwaysBreakTemplateDeclarations", 219 Style.AlwaysBreakTemplateDeclarations); 220 IO.mapOptional("BinPackArguments", Style.BinPackArguments); 221 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 222 IO.mapOptional("BreakBeforeBinaryOperators", 223 Style.BreakBeforeBinaryOperators); 224 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 225 IO.mapOptional("BreakBeforeTernaryOperators", 226 Style.BreakBeforeTernaryOperators); 227 IO.mapOptional("BreakConstructorInitializersBeforeComma", 228 Style.BreakConstructorInitializersBeforeComma); 229 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 230 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 231 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 232 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 233 IO.mapOptional("ConstructorInitializerIndentWidth", 234 Style.ConstructorInitializerIndentWidth); 235 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 236 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 237 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); 238 IO.mapOptional("DisableFormat", Style.DisableFormat); 239 IO.mapOptional("ExperimentalAutoDetectBinPacking", 240 Style.ExperimentalAutoDetectBinPacking); 241 IO.mapOptional("ForEachMacros", Style.ForEachMacros); 242 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 243 IO.mapOptional("IndentWidth", Style.IndentWidth); 244 IO.mapOptional("IndentWrappedFunctionNames", 245 Style.IndentWrappedFunctionNames); 246 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", 247 Style.KeepEmptyLinesAtTheStartOfBlocks); 248 IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin); 249 IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd); 250 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 251 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 252 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); 253 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 254 IO.mapOptional("ObjCSpaceBeforeProtocolList", 255 Style.ObjCSpaceBeforeProtocolList); 256 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 257 Style.PenaltyBreakBeforeFirstCallParameter); 258 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 259 IO.mapOptional("PenaltyBreakFirstLessLess", 260 Style.PenaltyBreakFirstLessLess); 261 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 262 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 263 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 264 Style.PenaltyReturnTypeOnItsOwnLine); 265 IO.mapOptional("PointerAlignment", Style.PointerAlignment); 266 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); 267 IO.mapOptional("SpaceBeforeAssignmentOperators", 268 Style.SpaceBeforeAssignmentOperators); 269 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 270 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 271 IO.mapOptional("SpacesBeforeTrailingComments", 272 Style.SpacesBeforeTrailingComments); 273 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 274 IO.mapOptional("SpacesInContainerLiterals", 275 Style.SpacesInContainerLiterals); 276 IO.mapOptional("SpacesInCStyleCastParentheses", 277 Style.SpacesInCStyleCastParentheses); 278 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 279 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); 280 IO.mapOptional("Standard", Style.Standard); 281 IO.mapOptional("TabWidth", Style.TabWidth); 282 IO.mapOptional("UseTab", Style.UseTab); 283 } 284 }; 285 286 // Allows to read vector<FormatStyle> while keeping default values. 287 // IO.getContext() should contain a pointer to the FormatStyle structure, that 288 // will be used to get default values for missing keys. 289 // If the first element has no Language specified, it will be treated as the 290 // default one for the following elements. 291 template <> struct DocumentListTraits<std::vector<FormatStyle>> { 292 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 293 return Seq.size(); 294 } 295 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 296 size_t Index) { 297 if (Index >= Seq.size()) { 298 assert(Index == Seq.size()); 299 FormatStyle Template; 300 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 301 Template = Seq[0]; 302 } else { 303 Template = *((const FormatStyle *)IO.getContext()); 304 Template.Language = FormatStyle::LK_None; 305 } 306 Seq.resize(Index + 1, Template); 307 } 308 return Seq[Index]; 309 } 310 }; 311 } 312 } 313 314 namespace clang { 315 namespace format { 316 317 const std::error_category &getParseCategory() { 318 static ParseErrorCategory C; 319 return C; 320 } 321 std::error_code make_error_code(ParseError e) { 322 return std::error_code(static_cast<int>(e), getParseCategory()); 323 } 324 325 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { 326 return "clang-format.parse_error"; 327 } 328 329 std::string ParseErrorCategory::message(int EV) const { 330 switch (static_cast<ParseError>(EV)) { 331 case ParseError::Success: 332 return "Success"; 333 case ParseError::Error: 334 return "Invalid argument"; 335 case ParseError::Unsuitable: 336 return "Unsuitable"; 337 } 338 llvm_unreachable("unexpected parse error"); 339 } 340 341 FormatStyle getLLVMStyle() { 342 FormatStyle LLVMStyle; 343 LLVMStyle.Language = FormatStyle::LK_Cpp; 344 LLVMStyle.AccessModifierOffset = -2; 345 LLVMStyle.AlignEscapedNewlinesLeft = false; 346 LLVMStyle.AlignAfterOpenBracket = true; 347 LLVMStyle.AlignOperands = true; 348 LLVMStyle.AlignTrailingComments = true; 349 LLVMStyle.AlignConsecutiveAssignments = false; 350 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 351 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; 352 LLVMStyle.AllowShortBlocksOnASingleLine = false; 353 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; 354 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 355 LLVMStyle.AllowShortLoopsOnASingleLine = false; 356 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; 357 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 358 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 359 LLVMStyle.BinPackParameters = true; 360 LLVMStyle.BinPackArguments = true; 361 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; 362 LLVMStyle.BreakBeforeTernaryOperators = true; 363 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 364 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 365 LLVMStyle.ColumnLimit = 80; 366 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 367 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 368 LLVMStyle.ConstructorInitializerIndentWidth = 4; 369 LLVMStyle.ContinuationIndentWidth = 4; 370 LLVMStyle.Cpp11BracedListStyle = true; 371 LLVMStyle.DerivePointerAlignment = false; 372 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 373 LLVMStyle.ForEachMacros.push_back("foreach"); 374 LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); 375 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); 376 LLVMStyle.IndentCaseLabels = false; 377 LLVMStyle.IndentWrappedFunctionNames = false; 378 LLVMStyle.IndentWidth = 2; 379 LLVMStyle.TabWidth = 8; 380 LLVMStyle.MaxEmptyLinesToKeep = 1; 381 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; 382 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 383 LLVMStyle.ObjCBlockIndentWidth = 2; 384 LLVMStyle.ObjCSpaceAfterProperty = false; 385 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 386 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; 387 LLVMStyle.SpacesBeforeTrailingComments = 1; 388 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 389 LLVMStyle.UseTab = FormatStyle::UT_Never; 390 LLVMStyle.SpacesInParentheses = false; 391 LLVMStyle.SpacesInSquareBrackets = false; 392 LLVMStyle.SpaceInEmptyParentheses = false; 393 LLVMStyle.SpacesInContainerLiterals = true; 394 LLVMStyle.SpacesInCStyleCastParentheses = false; 395 LLVMStyle.SpaceAfterCStyleCast = false; 396 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 397 LLVMStyle.SpaceBeforeAssignmentOperators = true; 398 LLVMStyle.SpacesInAngles = false; 399 400 LLVMStyle.PenaltyBreakComment = 300; 401 LLVMStyle.PenaltyBreakFirstLessLess = 120; 402 LLVMStyle.PenaltyBreakString = 1000; 403 LLVMStyle.PenaltyExcessCharacter = 1000000; 404 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 405 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 406 407 LLVMStyle.DisableFormat = false; 408 409 return LLVMStyle; 410 } 411 412 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 413 FormatStyle GoogleStyle = getLLVMStyle(); 414 GoogleStyle.Language = Language; 415 416 GoogleStyle.AccessModifierOffset = -1; 417 GoogleStyle.AlignEscapedNewlinesLeft = true; 418 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 419 GoogleStyle.AllowShortLoopsOnASingleLine = true; 420 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 421 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 422 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 423 GoogleStyle.DerivePointerAlignment = true; 424 GoogleStyle.IndentCaseLabels = true; 425 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; 426 GoogleStyle.ObjCSpaceAfterProperty = false; 427 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 428 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; 429 GoogleStyle.SpacesBeforeTrailingComments = 2; 430 GoogleStyle.Standard = FormatStyle::LS_Auto; 431 432 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 433 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 434 435 if (Language == FormatStyle::LK_Java) { 436 GoogleStyle.AlignAfterOpenBracket = false; 437 GoogleStyle.AlignOperands = false; 438 GoogleStyle.AlignTrailingComments = false; 439 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; 440 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 441 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 442 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 443 GoogleStyle.ColumnLimit = 100; 444 GoogleStyle.SpaceAfterCStyleCast = true; 445 GoogleStyle.SpacesBeforeTrailingComments = 1; 446 } else if (Language == FormatStyle::LK_JavaScript) { 447 GoogleStyle.BreakBeforeTernaryOperators = false; 448 GoogleStyle.MaxEmptyLinesToKeep = 3; 449 GoogleStyle.SpacesInContainerLiterals = false; 450 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 451 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; 452 } else if (Language == FormatStyle::LK_Proto) { 453 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 454 GoogleStyle.SpacesInContainerLiterals = false; 455 } 456 457 return GoogleStyle; 458 } 459 460 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 461 FormatStyle ChromiumStyle = getGoogleStyle(Language); 462 if (Language == FormatStyle::LK_Java) { 463 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; 464 ChromiumStyle.IndentWidth = 4; 465 ChromiumStyle.ContinuationIndentWidth = 8; 466 } else { 467 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 468 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 469 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 470 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 471 ChromiumStyle.BinPackParameters = false; 472 ChromiumStyle.DerivePointerAlignment = false; 473 } 474 ChromiumStyle.MacroBlockBegin = "^IPC_BEGIN_MESSAGE_MAP$"; 475 ChromiumStyle.MacroBlockBegin = "^IPC_END_MESSAGE_MAP$"; 476 return ChromiumStyle; 477 } 478 479 FormatStyle getMozillaStyle() { 480 FormatStyle MozillaStyle = getLLVMStyle(); 481 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 482 MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 483 MozillaStyle.AlwaysBreakAfterDefinitionReturnType = 484 FormatStyle::DRTBS_TopLevel; 485 MozillaStyle.AlwaysBreakTemplateDeclarations = true; 486 MozillaStyle.BreakConstructorInitializersBeforeComma = true; 487 MozillaStyle.ConstructorInitializerIndentWidth = 2; 488 MozillaStyle.ContinuationIndentWidth = 2; 489 MozillaStyle.Cpp11BracedListStyle = false; 490 MozillaStyle.IndentCaseLabels = true; 491 MozillaStyle.ObjCSpaceAfterProperty = true; 492 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 493 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 494 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; 495 return MozillaStyle; 496 } 497 498 FormatStyle getWebKitStyle() { 499 FormatStyle Style = getLLVMStyle(); 500 Style.AccessModifierOffset = -4; 501 Style.AlignAfterOpenBracket = false; 502 Style.AlignOperands = false; 503 Style.AlignTrailingComments = false; 504 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 505 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 506 Style.BreakConstructorInitializersBeforeComma = true; 507 Style.Cpp11BracedListStyle = false; 508 Style.ColumnLimit = 0; 509 Style.IndentWidth = 4; 510 Style.NamespaceIndentation = FormatStyle::NI_Inner; 511 Style.ObjCBlockIndentWidth = 4; 512 Style.ObjCSpaceAfterProperty = true; 513 Style.PointerAlignment = FormatStyle::PAS_Left; 514 Style.Standard = FormatStyle::LS_Cpp03; 515 return Style; 516 } 517 518 FormatStyle getGNUStyle() { 519 FormatStyle Style = getLLVMStyle(); 520 Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; 521 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 522 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 523 Style.BreakBeforeTernaryOperators = true; 524 Style.Cpp11BracedListStyle = false; 525 Style.ColumnLimit = 79; 526 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 527 Style.Standard = FormatStyle::LS_Cpp03; 528 return Style; 529 } 530 531 FormatStyle getNoStyle() { 532 FormatStyle NoStyle = getLLVMStyle(); 533 NoStyle.DisableFormat = true; 534 return NoStyle; 535 } 536 537 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 538 FormatStyle *Style) { 539 if (Name.equals_lower("llvm")) { 540 *Style = getLLVMStyle(); 541 } else if (Name.equals_lower("chromium")) { 542 *Style = getChromiumStyle(Language); 543 } else if (Name.equals_lower("mozilla")) { 544 *Style = getMozillaStyle(); 545 } else if (Name.equals_lower("google")) { 546 *Style = getGoogleStyle(Language); 547 } else if (Name.equals_lower("webkit")) { 548 *Style = getWebKitStyle(); 549 } else if (Name.equals_lower("gnu")) { 550 *Style = getGNUStyle(); 551 } else if (Name.equals_lower("none")) { 552 *Style = getNoStyle(); 553 } else { 554 return false; 555 } 556 557 Style->Language = Language; 558 return true; 559 } 560 561 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 562 assert(Style); 563 FormatStyle::LanguageKind Language = Style->Language; 564 assert(Language != FormatStyle::LK_None); 565 if (Text.trim().empty()) 566 return make_error_code(ParseError::Error); 567 568 std::vector<FormatStyle> Styles; 569 llvm::yaml::Input Input(Text); 570 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 571 // values for the fields, keys for which are missing from the configuration. 572 // Mapping also uses the context to get the language to find the correct 573 // base style. 574 Input.setContext(Style); 575 Input >> Styles; 576 if (Input.error()) 577 return Input.error(); 578 579 for (unsigned i = 0; i < Styles.size(); ++i) { 580 // Ensures that only the first configuration can skip the Language option. 581 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 582 return make_error_code(ParseError::Error); 583 // Ensure that each language is configured at most once. 584 for (unsigned j = 0; j < i; ++j) { 585 if (Styles[i].Language == Styles[j].Language) { 586 DEBUG(llvm::dbgs() 587 << "Duplicate languages in the config file on positions " << j 588 << " and " << i << "\n"); 589 return make_error_code(ParseError::Error); 590 } 591 } 592 } 593 // Look for a suitable configuration starting from the end, so we can 594 // find the configuration for the specific language first, and the default 595 // configuration (which can only be at slot 0) after it. 596 for (int i = Styles.size() - 1; i >= 0; --i) { 597 if (Styles[i].Language == Language || 598 Styles[i].Language == FormatStyle::LK_None) { 599 *Style = Styles[i]; 600 Style->Language = Language; 601 return make_error_code(ParseError::Success); 602 } 603 } 604 return make_error_code(ParseError::Unsuitable); 605 } 606 607 std::string configurationAsText(const FormatStyle &Style) { 608 std::string Text; 609 llvm::raw_string_ostream Stream(Text); 610 llvm::yaml::Output Output(Stream); 611 // We use the same mapping method for input and output, so we need a non-const 612 // reference here. 613 FormatStyle NonConstStyle = Style; 614 Output << NonConstStyle; 615 return Stream.str(); 616 } 617 618 namespace { 619 620 class FormatTokenLexer { 621 public: 622 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, 623 encoding::Encoding Encoding) 624 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), 625 LessStashed(false), Column(0), TrailingWhitespace(0), 626 SourceMgr(SourceMgr), ID(ID), Style(Style), 627 IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), 628 Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), 629 MacroBlockBeginRegex(Style.MacroBlockBegin), 630 MacroBlockEndRegex(Style.MacroBlockEnd) { 631 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, 632 getFormattingLangOpts(Style))); 633 Lex->SetKeepWhitespaceMode(true); 634 635 for (const std::string &ForEachMacro : Style.ForEachMacros) 636 ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); 637 std::sort(ForEachMacros.begin(), ForEachMacros.end()); 638 } 639 640 ArrayRef<FormatToken *> lex() { 641 assert(Tokens.empty()); 642 assert(FirstInLineIndex == 0); 643 do { 644 Tokens.push_back(getNextToken()); 645 tryMergePreviousTokens(); 646 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) 647 FirstInLineIndex = Tokens.size() - 1; 648 } while (Tokens.back()->Tok.isNot(tok::eof)); 649 return Tokens; 650 } 651 652 const AdditionalKeywords &getKeywords() { return Keywords; } 653 654 private: 655 void tryMergePreviousTokens() { 656 if (tryMerge_TMacro()) 657 return; 658 if (tryMergeConflictMarkers()) 659 return; 660 if (tryMergeLessLess()) 661 return; 662 663 if (Style.Language == FormatStyle::LK_JavaScript) { 664 if (tryMergeJSRegexLiteral()) 665 return; 666 if (tryMergeEscapeSequence()) 667 return; 668 if (tryMergeTemplateString()) 669 return; 670 671 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; 672 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, 673 tok::equal}; 674 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, 675 tok::greaterequal}; 676 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; 677 // FIXME: Investigate what token type gives the correct operator priority. 678 if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) 679 return; 680 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) 681 return; 682 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) 683 return; 684 if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) 685 return; 686 } 687 } 688 689 bool tryMergeLessLess() { 690 // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. 691 if (Tokens.size() < 3) 692 return false; 693 694 bool FourthTokenIsLess = false; 695 if (Tokens.size() > 3) 696 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); 697 698 auto First = Tokens.end() - 3; 699 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || 700 First[0]->isNot(tok::less) || FourthTokenIsLess) 701 return false; 702 703 // Only merge if there currently is no whitespace between the two "<". 704 if (First[1]->WhitespaceRange.getBegin() != 705 First[1]->WhitespaceRange.getEnd()) 706 return false; 707 708 First[0]->Tok.setKind(tok::lessless); 709 First[0]->TokenText = "<<"; 710 First[0]->ColumnWidth += 1; 711 Tokens.erase(Tokens.end() - 2); 712 return true; 713 } 714 715 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) { 716 if (Tokens.size() < Kinds.size()) 717 return false; 718 719 SmallVectorImpl<FormatToken *>::const_iterator First = 720 Tokens.end() - Kinds.size(); 721 if (!First[0]->is(Kinds[0])) 722 return false; 723 unsigned AddLength = 0; 724 for (unsigned i = 1; i < Kinds.size(); ++i) { 725 if (!First[i]->is(Kinds[i]) || 726 First[i]->WhitespaceRange.getBegin() != 727 First[i]->WhitespaceRange.getEnd()) 728 return false; 729 AddLength += First[i]->TokenText.size(); 730 } 731 Tokens.resize(Tokens.size() - Kinds.size() + 1); 732 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 733 First[0]->TokenText.size() + AddLength); 734 First[0]->ColumnWidth += AddLength; 735 First[0]->Type = NewType; 736 return true; 737 } 738 739 // Tries to merge an escape sequence, i.e. a "\\" and the following 740 // character. Use e.g. inside JavaScript regex literals. 741 bool tryMergeEscapeSequence() { 742 if (Tokens.size() < 2) 743 return false; 744 FormatToken *Previous = Tokens[Tokens.size() - 2]; 745 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") 746 return false; 747 ++Previous->ColumnWidth; 748 StringRef Text = Previous->TokenText; 749 Previous->TokenText = StringRef(Text.data(), Text.size() + 1); 750 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); 751 Tokens.resize(Tokens.size() - 1); 752 Column = Previous->OriginalColumn + Previous->ColumnWidth; 753 return true; 754 } 755 756 // Try to determine whether the current token ends a JavaScript regex literal. 757 // We heuristically assume that this is a regex literal if we find two 758 // unescaped slashes on a line and the token before the first slash is one of 759 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by 760 // a division. 761 bool tryMergeJSRegexLiteral() { 762 if (Tokens.size() < 2) 763 return false; 764 765 // If this is a string literal with a slash inside, compute the slash's 766 // offset and try to find the beginning of the regex literal. 767 // Also look at tok::unknown, as it can be an unterminated char literal. 768 size_t SlashInStringPos = StringRef::npos; 769 if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant, 770 tok::unknown)) { 771 // Start search from position 1 as otherwise, this is an unknown token 772 // for an unterminated /*-comment which is handled elsewhere. 773 SlashInStringPos = Tokens.back()->TokenText.find('/', 1); 774 if (SlashInStringPos == StringRef::npos) 775 return false; 776 } 777 778 // If a regex literal ends in "\//", this gets represented by an unknown 779 // token "\" and a comment. 780 bool MightEndWithEscapedSlash = 781 Tokens.back()->is(tok::comment) && 782 Tokens.back()->TokenText.startswith("//") && 783 Tokens[Tokens.size() - 2]->TokenText == "\\"; 784 if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos && 785 (Tokens.back()->isNot(tok::slash) || 786 (Tokens[Tokens.size() - 2]->is(tok::unknown) && 787 Tokens[Tokens.size() - 2]->TokenText == "\\"))) 788 return false; 789 790 unsigned TokenCount = 0; 791 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { 792 ++TokenCount; 793 auto Prev = I + 1; 794 while (Prev != E && Prev[0]->is(tok::comment)) 795 ++Prev; 796 if (I[0]->isOneOf(tok::slash, tok::slashequal) && 797 (Prev == E || 798 ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, 799 tok::r_brace, tok::exclaim, tok::l_square, 800 tok::colon, tok::comma, tok::question, 801 tok::kw_return) || 802 Prev[0]->isBinaryOperator())))) { 803 unsigned LastColumn = Tokens.back()->OriginalColumn; 804 SourceLocation Loc = Tokens.back()->Tok.getLocation(); 805 if (MightEndWithEscapedSlash) { 806 // This regex literal ends in '\//'. Skip past the '//' of the last 807 // token and re-start lexing from there. 808 resetLexer(SourceMgr.getFileOffset(Loc) + 2); 809 } else if (SlashInStringPos != StringRef::npos) { 810 // This regex literal ends in a string_literal with a slash inside. 811 // Calculate end column and reset lexer appropriately. 812 resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1); 813 LastColumn += SlashInStringPos; 814 } 815 Tokens.resize(Tokens.size() - TokenCount); 816 Tokens.back()->Tok.setKind(tok::unknown); 817 Tokens.back()->Type = TT_RegexLiteral; 818 // Treat regex literals like other string_literals. 819 Tokens.back()->Tok.setKind(tok::string_literal); 820 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; 821 return true; 822 } 823 824 // There can't be a newline inside a regex literal. 825 if (I[0]->NewlinesBefore > 0) 826 return false; 827 } 828 return false; 829 } 830 831 bool tryMergeTemplateString() { 832 if (Tokens.size() < 2) 833 return false; 834 835 FormatToken *EndBacktick = Tokens.back(); 836 // Backticks get lexed as tok::unknown tokens. If a template string contains 837 // a comment start, it gets lexed as a tok::comment, or tok::unknown if 838 // unterminated. 839 if (!EndBacktick->isOneOf(tok::comment, tok::string_literal, 840 tok::char_constant, tok::unknown)) 841 return false; 842 size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); 843 // Unknown token that's not actually a backtick, or a comment that doesn't 844 // contain a backtick. 845 if (CommentBacktickPos == StringRef::npos) 846 return false; 847 848 unsigned TokenCount = 0; 849 bool IsMultiline = false; 850 unsigned EndColumnInFirstLine = 851 EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; 852 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { 853 ++TokenCount; 854 if (I[0]->IsMultiline) 855 IsMultiline = true; 856 857 // If there was a preceding template string, this must be the start of a 858 // template string, not the end. 859 if (I[0]->is(TT_TemplateString)) 860 return false; 861 862 if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { 863 // Keep track of the rhs offset of the last token to wrap across lines - 864 // its the rhs offset of the first line of the template string, used to 865 // determine its width. 866 if (I[0]->IsMultiline) 867 EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; 868 // If the token has newlines, the token before it (if it exists) is the 869 // rhs end of the previous line. 870 if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) { 871 EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; 872 IsMultiline = true; 873 } 874 continue; 875 } 876 877 Tokens.resize(Tokens.size() - TokenCount); 878 Tokens.back()->Type = TT_TemplateString; 879 const char *EndOffset = 880 EndBacktick->TokenText.data() + 1 + CommentBacktickPos; 881 if (CommentBacktickPos != 0) { 882 // If the backtick was not the first character (e.g. in a comment), 883 // re-lex after the backtick position. 884 SourceLocation Loc = EndBacktick->Tok.getLocation(); 885 resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); 886 } 887 Tokens.back()->TokenText = 888 StringRef(Tokens.back()->TokenText.data(), 889 EndOffset - Tokens.back()->TokenText.data()); 890 891 unsigned EndOriginalColumn = EndBacktick->OriginalColumn; 892 if (EndOriginalColumn == 0) { 893 SourceLocation Loc = EndBacktick->Tok.getLocation(); 894 EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); 895 } 896 // If the ` is further down within the token (e.g. in a comment). 897 EndOriginalColumn += CommentBacktickPos; 898 899 if (IsMultiline) { 900 // ColumnWidth is from backtick to last token in line. 901 // LastLineColumnWidth is 0 to backtick. 902 // x = `some content 903 // until here`; 904 Tokens.back()->ColumnWidth = 905 EndColumnInFirstLine - Tokens.back()->OriginalColumn; 906 // +1 for the ` itself. 907 Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1; 908 Tokens.back()->IsMultiline = true; 909 } else { 910 // Token simply spans from start to end, +1 for the ` itself. 911 Tokens.back()->ColumnWidth = 912 EndOriginalColumn - Tokens.back()->OriginalColumn + 1; 913 } 914 return true; 915 } 916 return false; 917 } 918 919 bool tryMerge_TMacro() { 920 if (Tokens.size() < 4) 921 return false; 922 FormatToken *Last = Tokens.back(); 923 if (!Last->is(tok::r_paren)) 924 return false; 925 926 FormatToken *String = Tokens[Tokens.size() - 2]; 927 if (!String->is(tok::string_literal) || String->IsMultiline) 928 return false; 929 930 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 931 return false; 932 933 FormatToken *Macro = Tokens[Tokens.size() - 4]; 934 if (Macro->TokenText != "_T") 935 return false; 936 937 const char *Start = Macro->TokenText.data(); 938 const char *End = Last->TokenText.data() + Last->TokenText.size(); 939 String->TokenText = StringRef(Start, End - Start); 940 String->IsFirst = Macro->IsFirst; 941 String->LastNewlineOffset = Macro->LastNewlineOffset; 942 String->WhitespaceRange = Macro->WhitespaceRange; 943 String->OriginalColumn = Macro->OriginalColumn; 944 String->ColumnWidth = encoding::columnWidthWithTabs( 945 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 946 String->NewlinesBefore = Macro->NewlinesBefore; 947 String->HasUnescapedNewline = Macro->HasUnescapedNewline; 948 949 Tokens.pop_back(); 950 Tokens.pop_back(); 951 Tokens.pop_back(); 952 Tokens.back() = String; 953 return true; 954 } 955 956 bool tryMergeConflictMarkers() { 957 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) 958 return false; 959 960 // Conflict lines look like: 961 // <marker> <text from the vcs> 962 // For example: 963 // >>>>>>> /file/in/file/system at revision 1234 964 // 965 // We merge all tokens in a line that starts with a conflict marker 966 // into a single token with a special token type that the unwrapped line 967 // parser will use to correctly rebuild the underlying code. 968 969 FileID ID; 970 // Get the position of the first token in the line. 971 unsigned FirstInLineOffset; 972 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( 973 Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); 974 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); 975 // Calculate the offset of the start of the current line. 976 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); 977 if (LineOffset == StringRef::npos) { 978 LineOffset = 0; 979 } else { 980 ++LineOffset; 981 } 982 983 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); 984 StringRef LineStart; 985 if (FirstSpace == StringRef::npos) { 986 LineStart = Buffer.substr(LineOffset); 987 } else { 988 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); 989 } 990 991 TokenType Type = TT_Unknown; 992 if (LineStart == "<<<<<<<" || LineStart == ">>>>") { 993 Type = TT_ConflictStart; 994 } else if (LineStart == "|||||||" || LineStart == "=======" || 995 LineStart == "====") { 996 Type = TT_ConflictAlternative; 997 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { 998 Type = TT_ConflictEnd; 999 } 1000 1001 if (Type != TT_Unknown) { 1002 FormatToken *Next = Tokens.back(); 1003 1004 Tokens.resize(FirstInLineIndex + 1); 1005 // We do not need to build a complete token here, as we will skip it 1006 // during parsing anyway (as we must not touch whitespace around conflict 1007 // markers). 1008 Tokens.back()->Type = Type; 1009 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); 1010 1011 Tokens.push_back(Next); 1012 return true; 1013 } 1014 1015 return false; 1016 } 1017 1018 FormatToken *getStashedToken() { 1019 // Create a synthesized second '>' or '<' token. 1020 Token Tok = FormatTok->Tok; 1021 StringRef TokenText = FormatTok->TokenText; 1022 1023 unsigned OriginalColumn = FormatTok->OriginalColumn; 1024 FormatTok = new (Allocator.Allocate()) FormatToken; 1025 FormatTok->Tok = Tok; 1026 SourceLocation TokLocation = 1027 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); 1028 FormatTok->Tok.setLocation(TokLocation); 1029 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); 1030 FormatTok->TokenText = TokenText; 1031 FormatTok->ColumnWidth = 1; 1032 FormatTok->OriginalColumn = OriginalColumn + 1; 1033 1034 return FormatTok; 1035 } 1036 1037 FormatToken *getNextToken() { 1038 if (GreaterStashed) { 1039 GreaterStashed = false; 1040 return getStashedToken(); 1041 } 1042 if (LessStashed) { 1043 LessStashed = false; 1044 return getStashedToken(); 1045 } 1046 1047 FormatTok = new (Allocator.Allocate()) FormatToken; 1048 readRawToken(*FormatTok); 1049 SourceLocation WhitespaceStart = 1050 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1051 FormatTok->IsFirst = IsFirstToken; 1052 IsFirstToken = false; 1053 1054 // Consume and record whitespace until we find a significant token. 1055 unsigned WhitespaceLength = TrailingWhitespace; 1056 while (FormatTok->Tok.is(tok::unknown)) { 1057 StringRef Text = FormatTok->TokenText; 1058 auto EscapesNewline = [&](int pos) { 1059 // A '\r' here is just part of '\r\n'. Skip it. 1060 if (pos >= 0 && Text[pos] == '\r') 1061 --pos; 1062 // See whether there is an odd number of '\' before this. 1063 unsigned count = 0; 1064 for (; pos >= 0; --pos, ++count) 1065 if (Text[pos] != '\\') 1066 break; 1067 return count & 1; 1068 }; 1069 // FIXME: This miscounts tok:unknown tokens that are not just 1070 // whitespace, e.g. a '`' character. 1071 for (int i = 0, e = Text.size(); i != e; ++i) { 1072 switch (Text[i]) { 1073 case '\n': 1074 ++FormatTok->NewlinesBefore; 1075 FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); 1076 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1077 Column = 0; 1078 break; 1079 case '\r': 1080 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1081 Column = 0; 1082 break; 1083 case '\f': 1084 case '\v': 1085 Column = 0; 1086 break; 1087 case ' ': 1088 ++Column; 1089 break; 1090 case '\t': 1091 Column += Style.TabWidth - Column % Style.TabWidth; 1092 break; 1093 case '\\': 1094 if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) 1095 FormatTok->Type = TT_ImplicitStringLiteral; 1096 break; 1097 default: 1098 FormatTok->Type = TT_ImplicitStringLiteral; 1099 break; 1100 } 1101 } 1102 1103 if (FormatTok->is(TT_ImplicitStringLiteral)) 1104 break; 1105 WhitespaceLength += FormatTok->Tok.getLength(); 1106 1107 readRawToken(*FormatTok); 1108 } 1109 1110 // In case the token starts with escaped newlines, we want to 1111 // take them into account as whitespace - this pattern is quite frequent 1112 // in macro definitions. 1113 // FIXME: Add a more explicit test. 1114 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1115 FormatTok->TokenText[1] == '\n') { 1116 ++FormatTok->NewlinesBefore; 1117 WhitespaceLength += 2; 1118 FormatTok->LastNewlineOffset = 2; 1119 Column = 0; 1120 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1121 } 1122 1123 FormatTok->WhitespaceRange = SourceRange( 1124 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1125 1126 FormatTok->OriginalColumn = Column; 1127 1128 TrailingWhitespace = 0; 1129 if (FormatTok->Tok.is(tok::comment)) { 1130 // FIXME: Add the trimmed whitespace to Column. 1131 StringRef UntrimmedText = FormatTok->TokenText; 1132 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1133 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1134 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1135 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1136 FormatTok->Tok.setIdentifierInfo(&Info); 1137 FormatTok->Tok.setKind(Info.getTokenID()); 1138 if (Style.Language == FormatStyle::LK_Java && 1139 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) { 1140 FormatTok->Tok.setKind(tok::identifier); 1141 FormatTok->Tok.setIdentifierInfo(nullptr); 1142 } 1143 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1144 FormatTok->Tok.setKind(tok::greater); 1145 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1146 GreaterStashed = true; 1147 } else if (FormatTok->Tok.is(tok::lessless)) { 1148 FormatTok->Tok.setKind(tok::less); 1149 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1150 LessStashed = true; 1151 } 1152 1153 // Now FormatTok is the next non-whitespace token. 1154 1155 StringRef Text = FormatTok->TokenText; 1156 size_t FirstNewlinePos = Text.find('\n'); 1157 if (FirstNewlinePos == StringRef::npos) { 1158 // FIXME: ColumnWidth actually depends on the start column, we need to 1159 // take this into account when the token is moved. 1160 FormatTok->ColumnWidth = 1161 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1162 Column += FormatTok->ColumnWidth; 1163 } else { 1164 FormatTok->IsMultiline = true; 1165 // FIXME: ColumnWidth actually depends on the start column, we need to 1166 // take this into account when the token is moved. 1167 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1168 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1169 1170 // The last line of the token always starts in column 0. 1171 // Thus, the length can be precomputed even in the presence of tabs. 1172 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1173 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1174 Encoding); 1175 Column = FormatTok->LastLineColumnWidth; 1176 } 1177 1178 if (Style.Language == FormatStyle::LK_Cpp) { 1179 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && 1180 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == 1181 tok::pp_define) && 1182 std::find(ForEachMacros.begin(), ForEachMacros.end(), 1183 FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) { 1184 FormatTok->Type = TT_ForEachMacro; 1185 } else if (FormatTok->is(tok::identifier)) { 1186 if (MacroBlockBeginRegex.match(Text)) { 1187 FormatTok->Type = TT_MacroBlockBegin; 1188 } else if (MacroBlockEndRegex.match(Text)) { 1189 FormatTok->Type = TT_MacroBlockEnd; 1190 } 1191 } 1192 } 1193 1194 return FormatTok; 1195 } 1196 1197 FormatToken *FormatTok; 1198 bool IsFirstToken; 1199 bool GreaterStashed, LessStashed; 1200 unsigned Column; 1201 unsigned TrailingWhitespace; 1202 std::unique_ptr<Lexer> Lex; 1203 SourceManager &SourceMgr; 1204 FileID ID; 1205 FormatStyle &Style; 1206 IdentifierTable IdentTable; 1207 AdditionalKeywords Keywords; 1208 encoding::Encoding Encoding; 1209 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1210 // Index (in 'Tokens') of the last token that starts a new line. 1211 unsigned FirstInLineIndex; 1212 SmallVector<FormatToken *, 16> Tokens; 1213 SmallVector<IdentifierInfo *, 8> ForEachMacros; 1214 1215 bool FormattingDisabled; 1216 1217 llvm::Regex MacroBlockBeginRegex; 1218 llvm::Regex MacroBlockEndRegex; 1219 1220 void readRawToken(FormatToken &Tok) { 1221 Lex->LexFromRawLexer(Tok.Tok); 1222 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1223 Tok.Tok.getLength()); 1224 // For formatting, treat unterminated string literals like normal string 1225 // literals. 1226 if (Tok.is(tok::unknown)) { 1227 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1228 Tok.Tok.setKind(tok::string_literal); 1229 Tok.IsUnterminatedLiteral = true; 1230 } else if (Style.Language == FormatStyle::LK_JavaScript && 1231 Tok.TokenText == "''") { 1232 Tok.Tok.setKind(tok::char_constant); 1233 } 1234 } 1235 1236 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || 1237 Tok.TokenText == "/* clang-format on */")) { 1238 FormattingDisabled = false; 1239 } 1240 1241 Tok.Finalized = FormattingDisabled; 1242 1243 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || 1244 Tok.TokenText == "/* clang-format off */")) { 1245 FormattingDisabled = true; 1246 } 1247 } 1248 1249 void resetLexer(unsigned Offset) { 1250 StringRef Buffer = SourceMgr.getBufferData(ID); 1251 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), 1252 getFormattingLangOpts(Style), Buffer.begin(), 1253 Buffer.begin() + Offset, Buffer.end())); 1254 Lex->SetKeepWhitespaceMode(true); 1255 TrailingWhitespace = 0; 1256 } 1257 }; 1258 1259 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1260 switch (Language) { 1261 case FormatStyle::LK_Cpp: 1262 return "C++"; 1263 case FormatStyle::LK_Java: 1264 return "Java"; 1265 case FormatStyle::LK_JavaScript: 1266 return "JavaScript"; 1267 case FormatStyle::LK_Proto: 1268 return "Proto"; 1269 default: 1270 return "Unknown"; 1271 } 1272 } 1273 1274 class Formatter : public UnwrappedLineConsumer { 1275 public: 1276 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, 1277 ArrayRef<CharSourceRange> Ranges) 1278 : Style(Style), ID(ID), SourceMgr(SourceMgr), 1279 Whitespaces(SourceMgr, Style, 1280 inputUsesCRLF(SourceMgr.getBufferData(ID))), 1281 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1282 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { 1283 DEBUG(llvm::dbgs() << "File encoding: " 1284 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1285 : "unknown") 1286 << "\n"); 1287 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1288 << "\n"); 1289 } 1290 1291 tooling::Replacements format(bool *IncompleteFormat) { 1292 tooling::Replacements Result; 1293 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); 1294 1295 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), 1296 *this); 1297 Parser.parse(); 1298 assert(UnwrappedLines.rbegin()->empty()); 1299 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1300 ++Run) { 1301 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1302 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1303 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1304 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1305 } 1306 tooling::Replacements RunResult = 1307 format(AnnotatedLines, Tokens, IncompleteFormat); 1308 DEBUG({ 1309 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1310 for (tooling::Replacements::iterator I = RunResult.begin(), 1311 E = RunResult.end(); 1312 I != E; ++I) { 1313 llvm::dbgs() << I->toString() << "\n"; 1314 } 1315 }); 1316 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1317 delete AnnotatedLines[i]; 1318 } 1319 Result.insert(RunResult.begin(), RunResult.end()); 1320 Whitespaces.reset(); 1321 } 1322 return Result; 1323 } 1324 1325 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1326 FormatTokenLexer &Tokens, 1327 bool *IncompleteFormat) { 1328 TokenAnnotator Annotator(Style, Tokens.getKeywords()); 1329 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1330 Annotator.annotate(*AnnotatedLines[i]); 1331 } 1332 deriveLocalStyle(AnnotatedLines); 1333 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1334 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1335 } 1336 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1337 1338 Annotator.setCommentLineLevels(AnnotatedLines); 1339 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, 1340 Whitespaces, Encoding, 1341 BinPackInconclusiveFunctions); 1342 UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), 1343 IncompleteFormat) 1344 .format(AnnotatedLines); 1345 return Whitespaces.generateReplacements(); 1346 } 1347 1348 private: 1349 // Determines which lines are affected by the SourceRanges given as input. 1350 // Returns \c true if at least one line between I and E or one of their 1351 // children is affected. 1352 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1353 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1354 bool SomeLineAffected = false; 1355 const AnnotatedLine *PreviousLine = nullptr; 1356 while (I != E) { 1357 AnnotatedLine *Line = *I; 1358 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1359 1360 // If a line is part of a preprocessor directive, it needs to be formatted 1361 // if any token within the directive is affected. 1362 if (Line->InPPDirective) { 1363 FormatToken *Last = Line->Last; 1364 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1365 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1366 Last = (*PPEnd)->Last; 1367 ++PPEnd; 1368 } 1369 1370 if (affectsTokenRange(*Line->First, *Last, 1371 /*IncludeLeadingNewlines=*/false)) { 1372 SomeLineAffected = true; 1373 markAllAsAffected(I, PPEnd); 1374 } 1375 I = PPEnd; 1376 continue; 1377 } 1378 1379 if (nonPPLineAffected(Line, PreviousLine)) 1380 SomeLineAffected = true; 1381 1382 PreviousLine = Line; 1383 ++I; 1384 } 1385 return SomeLineAffected; 1386 } 1387 1388 // Determines whether 'Line' is affected by the SourceRanges given as input. 1389 // Returns \c true if line or one if its children is affected. 1390 bool nonPPLineAffected(AnnotatedLine *Line, 1391 const AnnotatedLine *PreviousLine) { 1392 bool SomeLineAffected = false; 1393 Line->ChildrenAffected = 1394 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1395 if (Line->ChildrenAffected) 1396 SomeLineAffected = true; 1397 1398 // Stores whether one of the line's tokens is directly affected. 1399 bool SomeTokenAffected = false; 1400 // Stores whether we need to look at the leading newlines of the next token 1401 // in order to determine whether it was affected. 1402 bool IncludeLeadingNewlines = false; 1403 1404 // Stores whether the first child line of any of this line's tokens is 1405 // affected. 1406 bool SomeFirstChildAffected = false; 1407 1408 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1409 // Determine whether 'Tok' was affected. 1410 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1411 SomeTokenAffected = true; 1412 1413 // Determine whether the first child of 'Tok' was affected. 1414 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1415 SomeFirstChildAffected = true; 1416 1417 IncludeLeadingNewlines = Tok->Children.empty(); 1418 } 1419 1420 // Was this line moved, i.e. has it previously been on the same line as an 1421 // affected line? 1422 bool LineMoved = PreviousLine && PreviousLine->Affected && 1423 Line->First->NewlinesBefore == 0; 1424 1425 bool IsContinuedComment = 1426 Line->First->is(tok::comment) && Line->First->Next == nullptr && 1427 Line->First->NewlinesBefore < 2 && PreviousLine && 1428 PreviousLine->Affected && PreviousLine->Last->is(tok::comment); 1429 1430 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1431 IsContinuedComment) { 1432 Line->Affected = true; 1433 SomeLineAffected = true; 1434 } 1435 return SomeLineAffected; 1436 } 1437 1438 // Marks all lines between I and E as well as all their children as affected. 1439 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1440 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1441 while (I != E) { 1442 (*I)->Affected = true; 1443 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1444 ++I; 1445 } 1446 } 1447 1448 // Returns true if the range from 'First' to 'Last' intersects with one of the 1449 // input ranges. 1450 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1451 bool IncludeLeadingNewlines) { 1452 SourceLocation Start = First.WhitespaceRange.getBegin(); 1453 if (!IncludeLeadingNewlines) 1454 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1455 SourceLocation End = Last.getStartOfNonWhitespace(); 1456 End = End.getLocWithOffset(Last.TokenText.size()); 1457 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1458 return affectsCharSourceRange(Range); 1459 } 1460 1461 // Returns true if one of the input ranges intersect the leading empty lines 1462 // before 'Tok'. 1463 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1464 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1465 Tok.WhitespaceRange.getBegin(), 1466 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1467 return affectsCharSourceRange(EmptyLineRange); 1468 } 1469 1470 // Returns true if 'Range' intersects with one of the input ranges. 1471 bool affectsCharSourceRange(const CharSourceRange &Range) { 1472 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1473 E = Ranges.end(); 1474 I != E; ++I) { 1475 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1476 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1477 return true; 1478 } 1479 return false; 1480 } 1481 1482 static bool inputUsesCRLF(StringRef Text) { 1483 return Text.count('\r') * 2 > Text.count('\n'); 1484 } 1485 1486 void 1487 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 1488 unsigned CountBoundToVariable = 0; 1489 unsigned CountBoundToType = 0; 1490 bool HasCpp03IncompatibleFormat = false; 1491 bool HasBinPackedFunction = false; 1492 bool HasOnePerLineFunction = false; 1493 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1494 if (!AnnotatedLines[i]->First->Next) 1495 continue; 1496 FormatToken *Tok = AnnotatedLines[i]->First->Next; 1497 while (Tok->Next) { 1498 if (Tok->is(TT_PointerOrReference)) { 1499 bool SpacesBefore = 1500 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1501 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1502 Tok->Next->WhitespaceRange.getEnd(); 1503 if (SpacesBefore && !SpacesAfter) 1504 ++CountBoundToVariable; 1505 else if (!SpacesBefore && SpacesAfter) 1506 ++CountBoundToType; 1507 } 1508 1509 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 1510 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) 1511 HasCpp03IncompatibleFormat = true; 1512 if (Tok->is(TT_TemplateCloser) && 1513 Tok->Previous->is(TT_TemplateCloser)) 1514 HasCpp03IncompatibleFormat = true; 1515 } 1516 1517 if (Tok->PackingKind == PPK_BinPacked) 1518 HasBinPackedFunction = true; 1519 if (Tok->PackingKind == PPK_OnePerLine) 1520 HasOnePerLineFunction = true; 1521 1522 Tok = Tok->Next; 1523 } 1524 } 1525 if (Style.DerivePointerAlignment) { 1526 if (CountBoundToType > CountBoundToVariable) 1527 Style.PointerAlignment = FormatStyle::PAS_Left; 1528 else if (CountBoundToType < CountBoundToVariable) 1529 Style.PointerAlignment = FormatStyle::PAS_Right; 1530 } 1531 if (Style.Standard == FormatStyle::LS_Auto) { 1532 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1533 : FormatStyle::LS_Cpp03; 1534 } 1535 BinPackInconclusiveFunctions = 1536 HasBinPackedFunction || !HasOnePerLineFunction; 1537 } 1538 1539 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 1540 assert(!UnwrappedLines.empty()); 1541 UnwrappedLines.back().push_back(TheLine); 1542 } 1543 1544 void finishRun() override { 1545 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 1546 } 1547 1548 FormatStyle Style; 1549 FileID ID; 1550 SourceManager &SourceMgr; 1551 WhitespaceManager Whitespaces; 1552 SmallVector<CharSourceRange, 8> Ranges; 1553 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 1554 1555 encoding::Encoding Encoding; 1556 bool BinPackInconclusiveFunctions; 1557 }; 1558 1559 } // end anonymous namespace 1560 1561 tooling::Replacements reformat(const FormatStyle &Style, 1562 SourceManager &SourceMgr, FileID ID, 1563 ArrayRef<CharSourceRange> Ranges, 1564 bool *IncompleteFormat) { 1565 if (Style.DisableFormat) 1566 return tooling::Replacements(); 1567 Formatter formatter(Style, SourceMgr, ID, Ranges); 1568 return formatter.format(IncompleteFormat); 1569 } 1570 1571 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1572 ArrayRef<tooling::Range> Ranges, 1573 StringRef FileName, bool *IncompleteFormat) { 1574 if (Style.DisableFormat) 1575 return tooling::Replacements(); 1576 1577 FileManager Files((FileSystemOptions())); 1578 DiagnosticsEngine Diagnostics( 1579 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1580 new DiagnosticOptions); 1581 SourceManager SourceMgr(Diagnostics, Files); 1582 std::unique_ptr<llvm::MemoryBuffer> Buf = 1583 llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1584 const clang::FileEntry *Entry = 1585 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1586 SourceMgr.overrideFileContents(Entry, std::move(Buf)); 1587 FileID ID = 1588 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1589 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1590 std::vector<CharSourceRange> CharRanges; 1591 for (const tooling::Range &Range : Ranges) { 1592 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 1593 SourceLocation End = Start.getLocWithOffset(Range.getLength()); 1594 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1595 } 1596 return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); 1597 } 1598 1599 LangOptions getFormattingLangOpts(const FormatStyle &Style) { 1600 LangOptions LangOpts; 1601 LangOpts.CPlusPlus = 1; 1602 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1603 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1604 LangOpts.LineComment = 1; 1605 bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; 1606 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; 1607 LangOpts.Bool = 1; 1608 LangOpts.ObjC1 = 1; 1609 LangOpts.ObjC2 = 1; 1610 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. 1611 return LangOpts; 1612 } 1613 1614 const char *StyleOptionHelpDescription = 1615 "Coding style, currently supports:\n" 1616 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 1617 "Use -style=file to load style configuration from\n" 1618 ".clang-format file located in one of the parent\n" 1619 "directories of the source file (or current\n" 1620 "directory for stdin).\n" 1621 "Use -style=\"{key: value, ...}\" to set specific\n" 1622 "parameters, e.g.:\n" 1623 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 1624 1625 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 1626 if (FileName.endswith(".java")) { 1627 return FormatStyle::LK_Java; 1628 } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { 1629 // JavaScript or TypeScript. 1630 return FormatStyle::LK_JavaScript; 1631 } else if (FileName.endswith_lower(".proto") || 1632 FileName.endswith_lower(".protodevel")) { 1633 return FormatStyle::LK_Proto; 1634 } 1635 return FormatStyle::LK_Cpp; 1636 } 1637 1638 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 1639 StringRef FallbackStyle) { 1640 FormatStyle Style = getLLVMStyle(); 1641 Style.Language = getLanguageByFileName(FileName); 1642 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 1643 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 1644 << "\" using LLVM style\n"; 1645 return Style; 1646 } 1647 1648 if (StyleName.startswith("{")) { 1649 // Parse YAML/JSON style from the command line. 1650 if (std::error_code ec = parseConfiguration(StyleName, &Style)) { 1651 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 1652 << FallbackStyle << " style\n"; 1653 } 1654 return Style; 1655 } 1656 1657 if (!StyleName.equals_lower("file")) { 1658 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 1659 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 1660 << " style\n"; 1661 return Style; 1662 } 1663 1664 // Look for .clang-format/_clang-format file in the file's parent directories. 1665 SmallString<128> UnsuitableConfigFiles; 1666 SmallString<128> Path(FileName); 1667 llvm::sys::fs::make_absolute(Path); 1668 for (StringRef Directory = Path; !Directory.empty(); 1669 Directory = llvm::sys::path::parent_path(Directory)) { 1670 if (!llvm::sys::fs::is_directory(Directory)) 1671 continue; 1672 SmallString<128> ConfigFile(Directory); 1673 1674 llvm::sys::path::append(ConfigFile, ".clang-format"); 1675 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1676 bool IsFile = false; 1677 // Ignore errors from is_regular_file: we only need to know if we can read 1678 // the file or not. 1679 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1680 1681 if (!IsFile) { 1682 // Try _clang-format too, since dotfiles are not commonly used on Windows. 1683 ConfigFile = Directory; 1684 llvm::sys::path::append(ConfigFile, "_clang-format"); 1685 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1686 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1687 } 1688 1689 if (IsFile) { 1690 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 1691 llvm::MemoryBuffer::getFile(ConfigFile.c_str()); 1692 if (std::error_code EC = Text.getError()) { 1693 llvm::errs() << EC.message() << "\n"; 1694 break; 1695 } 1696 if (std::error_code ec = 1697 parseConfiguration(Text.get()->getBuffer(), &Style)) { 1698 if (ec == ParseError::Unsuitable) { 1699 if (!UnsuitableConfigFiles.empty()) 1700 UnsuitableConfigFiles.append(", "); 1701 UnsuitableConfigFiles.append(ConfigFile); 1702 continue; 1703 } 1704 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 1705 << "\n"; 1706 break; 1707 } 1708 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 1709 return Style; 1710 } 1711 } 1712 if (!UnsuitableConfigFiles.empty()) { 1713 llvm::errs() << "Configuration file(s) do(es) not support " 1714 << getLanguageName(Style.Language) << ": " 1715 << UnsuitableConfigFiles << "\n"; 1716 } 1717 return Style; 1718 } 1719 1720 } // namespace format 1721 } // namespace clang 1722