1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "ContinuationIndenter.h" 17 #include "TokenAnnotator.h" 18 #include "UnwrappedLineParser.h" 19 #include "WhitespaceManager.h" 20 #include "clang/Basic/Diagnostic.h" 21 #include "clang/Basic/DiagnosticOptions.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Format/Format.h" 24 #include "clang/Lex/Lexer.h" 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/Support/Allocator.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/YAMLTraits.h" 30 #include <queue> 31 #include <string> 32 33 #define DEBUG_TYPE "format-formatter" 34 35 using clang::format::FormatStyle; 36 37 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) 38 39 namespace llvm { 40 namespace yaml { 41 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 42 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 43 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 44 IO.enumCase(Value, "Java", FormatStyle::LK_Java); 45 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 46 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 47 } 48 }; 49 50 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 51 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 52 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 53 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 54 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 55 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 56 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 57 } 58 }; 59 60 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 61 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 62 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 63 IO.enumCase(Value, "false", FormatStyle::UT_Never); 64 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 65 IO.enumCase(Value, "true", FormatStyle::UT_Always); 66 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 67 } 68 }; 69 70 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { 71 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { 72 IO.enumCase(Value, "None", FormatStyle::SFS_None); 73 IO.enumCase(Value, "false", FormatStyle::SFS_None); 74 IO.enumCase(Value, "All", FormatStyle::SFS_All); 75 IO.enumCase(Value, "true", FormatStyle::SFS_All); 76 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); 77 } 78 }; 79 80 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { 81 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { 82 IO.enumCase(Value, "All", FormatStyle::BOS_All); 83 IO.enumCase(Value, "true", FormatStyle::BOS_All); 84 IO.enumCase(Value, "None", FormatStyle::BOS_None); 85 IO.enumCase(Value, "false", FormatStyle::BOS_None); 86 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); 87 } 88 }; 89 90 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 91 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 92 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 93 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 94 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 95 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 96 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 97 } 98 }; 99 100 template <> 101 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 102 static void enumeration(IO &IO, 103 FormatStyle::NamespaceIndentationKind &Value) { 104 IO.enumCase(Value, "None", FormatStyle::NI_None); 105 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 106 IO.enumCase(Value, "All", FormatStyle::NI_All); 107 } 108 }; 109 110 template <> 111 struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { 112 static void enumeration(IO &IO, 113 FormatStyle::PointerAlignmentStyle &Value) { 114 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); 115 IO.enumCase(Value, "Left", FormatStyle::PAS_Left); 116 IO.enumCase(Value, "Right", FormatStyle::PAS_Right); 117 118 // For backward compatibility. 119 IO.enumCase(Value, "true", FormatStyle::PAS_Left); 120 IO.enumCase(Value, "false", FormatStyle::PAS_Right); 121 } 122 }; 123 124 template <> 125 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 126 static void enumeration(IO &IO, 127 FormatStyle::SpaceBeforeParensOptions &Value) { 128 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 129 IO.enumCase(Value, "ControlStatements", 130 FormatStyle::SBPO_ControlStatements); 131 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 132 133 // For backward compatibility. 134 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 135 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 136 } 137 }; 138 139 template <> struct MappingTraits<FormatStyle> { 140 static void mapping(IO &IO, FormatStyle &Style) { 141 // When reading, read the language first, we need it for getPredefinedStyle. 142 IO.mapOptional("Language", Style.Language); 143 144 if (IO.outputting()) { 145 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", 146 "Mozilla", "WebKit", "GNU" }; 147 ArrayRef<StringRef> Styles(StylesArray); 148 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 149 StringRef StyleName(Styles[i]); 150 FormatStyle PredefinedStyle; 151 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 152 Style == PredefinedStyle) { 153 IO.mapOptional("# BasedOnStyle", StyleName); 154 break; 155 } 156 } 157 } else { 158 StringRef BasedOnStyle; 159 IO.mapOptional("BasedOnStyle", BasedOnStyle); 160 if (!BasedOnStyle.empty()) { 161 FormatStyle::LanguageKind OldLanguage = Style.Language; 162 FormatStyle::LanguageKind Language = 163 ((FormatStyle *)IO.getContext())->Language; 164 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 165 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 166 return; 167 } 168 Style.Language = OldLanguage; 169 } 170 } 171 172 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 173 IO.mapOptional("ConstructorInitializerIndentWidth", 174 Style.ConstructorInitializerIndentWidth); 175 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 176 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 177 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 178 Style.AllowAllParametersOfDeclarationOnNextLine); 179 IO.mapOptional("AllowShortBlocksOnASingleLine", 180 Style.AllowShortBlocksOnASingleLine); 181 IO.mapOptional("AllowShortCaseLabelsOnASingleLine", 182 Style.AllowShortCaseLabelsOnASingleLine); 183 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 184 Style.AllowShortIfStatementsOnASingleLine); 185 IO.mapOptional("AllowShortLoopsOnASingleLine", 186 Style.AllowShortLoopsOnASingleLine); 187 IO.mapOptional("AllowShortFunctionsOnASingleLine", 188 Style.AllowShortFunctionsOnASingleLine); 189 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", 190 Style.AlwaysBreakAfterDefinitionReturnType); 191 IO.mapOptional("AlwaysBreakTemplateDeclarations", 192 Style.AlwaysBreakTemplateDeclarations); 193 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 194 Style.AlwaysBreakBeforeMultilineStrings); 195 IO.mapOptional("BreakBeforeBinaryOperators", 196 Style.BreakBeforeBinaryOperators); 197 IO.mapOptional("BreakBeforeTernaryOperators", 198 Style.BreakBeforeTernaryOperators); 199 IO.mapOptional("BreakConstructorInitializersBeforeComma", 200 Style.BreakConstructorInitializersBeforeComma); 201 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 202 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 203 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 204 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 205 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); 206 IO.mapOptional("ExperimentalAutoDetectBinPacking", 207 Style.ExperimentalAutoDetectBinPacking); 208 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 209 IO.mapOptional("IndentWrappedFunctionNames", 210 Style.IndentWrappedFunctionNames); 211 IO.mapOptional("IndentFunctionDeclarationAfterType", 212 Style.IndentWrappedFunctionNames); 213 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 214 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", 215 Style.KeepEmptyLinesAtTheStartOfBlocks); 216 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 217 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 218 IO.mapOptional("ObjCSpaceBeforeProtocolList", 219 Style.ObjCSpaceBeforeProtocolList); 220 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 221 Style.PenaltyBreakBeforeFirstCallParameter); 222 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 223 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 224 IO.mapOptional("PenaltyBreakFirstLessLess", 225 Style.PenaltyBreakFirstLessLess); 226 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 227 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 228 Style.PenaltyReturnTypeOnItsOwnLine); 229 IO.mapOptional("PointerAlignment", Style.PointerAlignment); 230 IO.mapOptional("SpacesBeforeTrailingComments", 231 Style.SpacesBeforeTrailingComments); 232 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 233 IO.mapOptional("Standard", Style.Standard); 234 IO.mapOptional("IndentWidth", Style.IndentWidth); 235 IO.mapOptional("TabWidth", Style.TabWidth); 236 IO.mapOptional("UseTab", Style.UseTab); 237 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 238 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 239 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); 240 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 241 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 242 IO.mapOptional("SpacesInCStyleCastParentheses", 243 Style.SpacesInCStyleCastParentheses); 244 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); 245 IO.mapOptional("SpacesInContainerLiterals", 246 Style.SpacesInContainerLiterals); 247 IO.mapOptional("SpaceBeforeAssignmentOperators", 248 Style.SpaceBeforeAssignmentOperators); 249 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 250 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 251 IO.mapOptional("ForEachMacros", Style.ForEachMacros); 252 253 // For backward compatibility. 254 if (!IO.outputting()) { 255 IO.mapOptional("SpaceAfterControlStatementKeyword", 256 Style.SpaceBeforeParens); 257 IO.mapOptional("PointerBindsToType", Style.PointerAlignment); 258 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); 259 } 260 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 261 IO.mapOptional("DisableFormat", Style.DisableFormat); 262 } 263 }; 264 265 // Allows to read vector<FormatStyle> while keeping default values. 266 // IO.getContext() should contain a pointer to the FormatStyle structure, that 267 // will be used to get default values for missing keys. 268 // If the first element has no Language specified, it will be treated as the 269 // default one for the following elements. 270 template <> struct DocumentListTraits<std::vector<FormatStyle> > { 271 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 272 return Seq.size(); 273 } 274 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 275 size_t Index) { 276 if (Index >= Seq.size()) { 277 assert(Index == Seq.size()); 278 FormatStyle Template; 279 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 280 Template = Seq[0]; 281 } else { 282 Template = *((const FormatStyle *)IO.getContext()); 283 Template.Language = FormatStyle::LK_None; 284 } 285 Seq.resize(Index + 1, Template); 286 } 287 return Seq[Index]; 288 } 289 }; 290 } 291 } 292 293 namespace clang { 294 namespace format { 295 296 const std::error_category &getParseCategory() { 297 static ParseErrorCategory C; 298 return C; 299 } 300 std::error_code make_error_code(ParseError e) { 301 return std::error_code(static_cast<int>(e), getParseCategory()); 302 } 303 304 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { 305 return "clang-format.parse_error"; 306 } 307 308 std::string ParseErrorCategory::message(int EV) const { 309 switch (static_cast<ParseError>(EV)) { 310 case ParseError::Success: 311 return "Success"; 312 case ParseError::Error: 313 return "Invalid argument"; 314 case ParseError::Unsuitable: 315 return "Unsuitable"; 316 } 317 llvm_unreachable("unexpected parse error"); 318 } 319 320 FormatStyle getLLVMStyle() { 321 FormatStyle LLVMStyle; 322 LLVMStyle.Language = FormatStyle::LK_Cpp; 323 LLVMStyle.AccessModifierOffset = -2; 324 LLVMStyle.AlignEscapedNewlinesLeft = false; 325 LLVMStyle.AlignTrailingComments = true; 326 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 327 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; 328 LLVMStyle.AllowShortBlocksOnASingleLine = false; 329 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; 330 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 331 LLVMStyle.AllowShortLoopsOnASingleLine = false; 332 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = false; 333 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 334 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 335 LLVMStyle.BinPackParameters = true; 336 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; 337 LLVMStyle.BreakBeforeTernaryOperators = true; 338 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 339 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 340 LLVMStyle.ColumnLimit = 80; 341 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 342 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 343 LLVMStyle.ConstructorInitializerIndentWidth = 4; 344 LLVMStyle.ContinuationIndentWidth = 4; 345 LLVMStyle.Cpp11BracedListStyle = true; 346 LLVMStyle.DerivePointerAlignment = false; 347 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 348 LLVMStyle.ForEachMacros.push_back("foreach"); 349 LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); 350 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); 351 LLVMStyle.IndentCaseLabels = false; 352 LLVMStyle.IndentWrappedFunctionNames = false; 353 LLVMStyle.IndentWidth = 2; 354 LLVMStyle.TabWidth = 8; 355 LLVMStyle.MaxEmptyLinesToKeep = 1; 356 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; 357 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 358 LLVMStyle.ObjCSpaceAfterProperty = false; 359 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 360 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; 361 LLVMStyle.SpacesBeforeTrailingComments = 1; 362 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 363 LLVMStyle.UseTab = FormatStyle::UT_Never; 364 LLVMStyle.SpacesInParentheses = false; 365 LLVMStyle.SpacesInSquareBrackets = false; 366 LLVMStyle.SpaceInEmptyParentheses = false; 367 LLVMStyle.SpacesInContainerLiterals = true; 368 LLVMStyle.SpacesInCStyleCastParentheses = false; 369 LLVMStyle.SpaceAfterCStyleCast = false; 370 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 371 LLVMStyle.SpaceBeforeAssignmentOperators = true; 372 LLVMStyle.SpacesInAngles = false; 373 374 LLVMStyle.PenaltyBreakComment = 300; 375 LLVMStyle.PenaltyBreakFirstLessLess = 120; 376 LLVMStyle.PenaltyBreakString = 1000; 377 LLVMStyle.PenaltyExcessCharacter = 1000000; 378 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 379 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 380 381 LLVMStyle.DisableFormat = false; 382 383 return LLVMStyle; 384 } 385 386 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 387 FormatStyle GoogleStyle = getLLVMStyle(); 388 GoogleStyle.Language = Language; 389 390 GoogleStyle.AccessModifierOffset = -1; 391 GoogleStyle.AlignEscapedNewlinesLeft = true; 392 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 393 GoogleStyle.AllowShortLoopsOnASingleLine = true; 394 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 395 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 396 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 397 GoogleStyle.DerivePointerAlignment = true; 398 GoogleStyle.IndentCaseLabels = true; 399 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; 400 GoogleStyle.ObjCSpaceAfterProperty = false; 401 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 402 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; 403 GoogleStyle.SpacesBeforeTrailingComments = 2; 404 GoogleStyle.Standard = FormatStyle::LS_Auto; 405 406 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 407 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 408 409 if (Language == FormatStyle::LK_Java) { 410 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 411 GoogleStyle.ColumnLimit = 100; 412 GoogleStyle.SpaceAfterCStyleCast = true; 413 } else if (Language == FormatStyle::LK_JavaScript) { 414 GoogleStyle.BreakBeforeTernaryOperators = false; 415 GoogleStyle.MaxEmptyLinesToKeep = 3; 416 GoogleStyle.SpacesInContainerLiterals = false; 417 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 418 } else if (Language == FormatStyle::LK_Proto) { 419 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 420 GoogleStyle.SpacesInContainerLiterals = false; 421 } 422 423 return GoogleStyle; 424 } 425 426 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 427 FormatStyle ChromiumStyle = getGoogleStyle(Language); 428 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 429 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 430 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 431 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 432 ChromiumStyle.BinPackParameters = false; 433 ChromiumStyle.DerivePointerAlignment = false; 434 return ChromiumStyle; 435 } 436 437 FormatStyle getMozillaStyle() { 438 FormatStyle MozillaStyle = getLLVMStyle(); 439 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 440 MozillaStyle.Cpp11BracedListStyle = false; 441 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 442 MozillaStyle.DerivePointerAlignment = true; 443 MozillaStyle.IndentCaseLabels = true; 444 MozillaStyle.ObjCSpaceAfterProperty = true; 445 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 446 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 447 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; 448 MozillaStyle.Standard = FormatStyle::LS_Cpp03; 449 return MozillaStyle; 450 } 451 452 FormatStyle getWebKitStyle() { 453 FormatStyle Style = getLLVMStyle(); 454 Style.AccessModifierOffset = -4; 455 Style.AlignTrailingComments = false; 456 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 457 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 458 Style.BreakConstructorInitializersBeforeComma = true; 459 Style.Cpp11BracedListStyle = false; 460 Style.ColumnLimit = 0; 461 Style.IndentWidth = 4; 462 Style.NamespaceIndentation = FormatStyle::NI_Inner; 463 Style.ObjCSpaceAfterProperty = true; 464 Style.PointerAlignment = FormatStyle::PAS_Left; 465 Style.Standard = FormatStyle::LS_Cpp03; 466 return Style; 467 } 468 469 FormatStyle getGNUStyle() { 470 FormatStyle Style = getLLVMStyle(); 471 Style.AlwaysBreakAfterDefinitionReturnType = true; 472 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 473 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 474 Style.BreakBeforeTernaryOperators = true; 475 Style.Cpp11BracedListStyle = false; 476 Style.ColumnLimit = 79; 477 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 478 Style.Standard = FormatStyle::LS_Cpp03; 479 return Style; 480 } 481 482 FormatStyle getNoStyle() { 483 FormatStyle NoStyle = getLLVMStyle(); 484 NoStyle.DisableFormat = true; 485 return NoStyle; 486 } 487 488 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 489 FormatStyle *Style) { 490 if (Name.equals_lower("llvm")) { 491 *Style = getLLVMStyle(); 492 } else if (Name.equals_lower("chromium")) { 493 *Style = getChromiumStyle(Language); 494 } else if (Name.equals_lower("mozilla")) { 495 *Style = getMozillaStyle(); 496 } else if (Name.equals_lower("google")) { 497 *Style = getGoogleStyle(Language); 498 } else if (Name.equals_lower("webkit")) { 499 *Style = getWebKitStyle(); 500 } else if (Name.equals_lower("gnu")) { 501 *Style = getGNUStyle(); 502 } else if (Name.equals_lower("none")) { 503 *Style = getNoStyle(); 504 } else { 505 return false; 506 } 507 508 Style->Language = Language; 509 return true; 510 } 511 512 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 513 assert(Style); 514 FormatStyle::LanguageKind Language = Style->Language; 515 assert(Language != FormatStyle::LK_None); 516 if (Text.trim().empty()) 517 return make_error_code(ParseError::Error); 518 519 std::vector<FormatStyle> Styles; 520 llvm::yaml::Input Input(Text); 521 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 522 // values for the fields, keys for which are missing from the configuration. 523 // Mapping also uses the context to get the language to find the correct 524 // base style. 525 Input.setContext(Style); 526 Input >> Styles; 527 if (Input.error()) 528 return Input.error(); 529 530 for (unsigned i = 0; i < Styles.size(); ++i) { 531 // Ensures that only the first configuration can skip the Language option. 532 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 533 return make_error_code(ParseError::Error); 534 // Ensure that each language is configured at most once. 535 for (unsigned j = 0; j < i; ++j) { 536 if (Styles[i].Language == Styles[j].Language) { 537 DEBUG(llvm::dbgs() 538 << "Duplicate languages in the config file on positions " << j 539 << " and " << i << "\n"); 540 return make_error_code(ParseError::Error); 541 } 542 } 543 } 544 // Look for a suitable configuration starting from the end, so we can 545 // find the configuration for the specific language first, and the default 546 // configuration (which can only be at slot 0) after it. 547 for (int i = Styles.size() - 1; i >= 0; --i) { 548 if (Styles[i].Language == Language || 549 Styles[i].Language == FormatStyle::LK_None) { 550 *Style = Styles[i]; 551 Style->Language = Language; 552 return make_error_code(ParseError::Success); 553 } 554 } 555 return make_error_code(ParseError::Unsuitable); 556 } 557 558 std::string configurationAsText(const FormatStyle &Style) { 559 std::string Text; 560 llvm::raw_string_ostream Stream(Text); 561 llvm::yaml::Output Output(Stream); 562 // We use the same mapping method for input and output, so we need a non-const 563 // reference here. 564 FormatStyle NonConstStyle = Style; 565 Output << NonConstStyle; 566 return Stream.str(); 567 } 568 569 namespace { 570 571 class NoColumnLimitFormatter { 572 public: 573 NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} 574 575 /// \brief Formats the line starting at \p State, simply keeping all of the 576 /// input's line breaking decisions. 577 void format(unsigned FirstIndent, const AnnotatedLine *Line) { 578 LineState State = 579 Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); 580 while (State.NextToken) { 581 bool Newline = 582 Indenter->mustBreak(State) || 583 (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); 584 Indenter->addTokenToState(State, Newline, /*DryRun=*/false); 585 } 586 } 587 588 private: 589 ContinuationIndenter *Indenter; 590 }; 591 592 class LineJoiner { 593 public: 594 LineJoiner(const FormatStyle &Style) : Style(Style) {} 595 596 /// \brief Calculates how many lines can be merged into 1 starting at \p I. 597 unsigned 598 tryFitMultipleLinesInOne(unsigned Indent, 599 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 600 SmallVectorImpl<AnnotatedLine *>::const_iterator E) { 601 // We can never merge stuff if there are trailing line comments. 602 const AnnotatedLine *TheLine = *I; 603 if (TheLine->Last->Type == TT_LineComment) 604 return 0; 605 606 if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) 607 return 0; 608 609 unsigned Limit = 610 Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; 611 // If we already exceed the column limit, we set 'Limit' to 0. The different 612 // tryMerge..() functions can then decide whether to still do merging. 613 Limit = TheLine->Last->TotalLength > Limit 614 ? 0 615 : Limit - TheLine->Last->TotalLength; 616 617 if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) 618 return 0; 619 620 // FIXME: TheLine->Level != 0 might or might not be the right check to do. 621 // If necessary, change to something smarter. 622 bool MergeShortFunctions = 623 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || 624 (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline && 625 TheLine->Level != 0); 626 627 if (TheLine->Last->Type == TT_FunctionLBrace && 628 TheLine->First != TheLine->Last) { 629 return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; 630 } 631 if (TheLine->Last->is(tok::l_brace)) { 632 return Style.BreakBeforeBraces == FormatStyle::BS_Attach 633 ? tryMergeSimpleBlock(I, E, Limit) 634 : 0; 635 } 636 if (I[1]->First->Type == TT_FunctionLBrace && 637 Style.BreakBeforeBraces != FormatStyle::BS_Attach) { 638 // Check for Limit <= 2 to account for the " {". 639 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) 640 return 0; 641 Limit -= 2; 642 643 unsigned MergedLines = 0; 644 if (MergeShortFunctions) { 645 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); 646 // If we managed to merge the block, count the function header, which is 647 // on a separate line. 648 if (MergedLines > 0) 649 ++MergedLines; 650 } 651 return MergedLines; 652 } 653 if (TheLine->First->is(tok::kw_if)) { 654 return Style.AllowShortIfStatementsOnASingleLine 655 ? tryMergeSimpleControlStatement(I, E, Limit) 656 : 0; 657 } 658 if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { 659 return Style.AllowShortLoopsOnASingleLine 660 ? tryMergeSimpleControlStatement(I, E, Limit) 661 : 0; 662 } 663 if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) { 664 return Style.AllowShortCaseLabelsOnASingleLine 665 ? tryMergeShortCaseLabels(I, E, Limit) 666 : 0; 667 } 668 if (TheLine->InPPDirective && 669 (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { 670 return tryMergeSimplePPDirective(I, E, Limit); 671 } 672 return 0; 673 } 674 675 private: 676 unsigned 677 tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 678 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 679 unsigned Limit) { 680 if (Limit == 0) 681 return 0; 682 if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) 683 return 0; 684 if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) 685 return 0; 686 if (1 + I[1]->Last->TotalLength > Limit) 687 return 0; 688 return 1; 689 } 690 691 unsigned tryMergeSimpleControlStatement( 692 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 693 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { 694 if (Limit == 0) 695 return 0; 696 if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman || 697 Style.BreakBeforeBraces == FormatStyle::BS_GNU) && 698 (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine)) 699 return 0; 700 if (I[1]->InPPDirective != (*I)->InPPDirective || 701 (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) 702 return 0; 703 Limit = limitConsideringMacros(I + 1, E, Limit); 704 AnnotatedLine &Line = **I; 705 if (Line.Last->isNot(tok::r_paren)) 706 return 0; 707 if (1 + I[1]->Last->TotalLength > Limit) 708 return 0; 709 if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 710 tok::kw_while) || 711 I[1]->First->Type == TT_LineComment) 712 return 0; 713 // Only inline simple if's (no nested if or else). 714 if (I + 2 != E && Line.First->is(tok::kw_if) && 715 I[2]->First->is(tok::kw_else)) 716 return 0; 717 return 1; 718 } 719 720 unsigned tryMergeShortCaseLabels( 721 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 722 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { 723 if (Limit == 0 || I + 1 == E || 724 I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) 725 return 0; 726 unsigned NumStmts = 0; 727 unsigned Length = 0; 728 for (; NumStmts < 3; ++NumStmts) { 729 if (I + 1 + NumStmts == E) 730 break; 731 const AnnotatedLine *Line = I[1 + NumStmts]; 732 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) 733 break; 734 if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, 735 tok::kw_while)) 736 return 0; 737 Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space. 738 } 739 if (NumStmts == 0 || NumStmts == 3 || Length > Limit) 740 return 0; 741 return NumStmts; 742 } 743 744 unsigned 745 tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 746 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 747 unsigned Limit) { 748 AnnotatedLine &Line = **I; 749 750 // Don't merge ObjC @ keywords and methods. 751 if (Line.First->isOneOf(tok::at, tok::minus, tok::plus)) 752 return 0; 753 754 // Check that the current line allows merging. This depends on whether we 755 // are in a control flow statements as well as several style flags. 756 if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) 757 return 0; 758 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, 759 tok::kw_catch, tok::kw_for, tok::r_brace)) { 760 if (!Style.AllowShortBlocksOnASingleLine) 761 return 0; 762 if (!Style.AllowShortIfStatementsOnASingleLine && 763 Line.First->is(tok::kw_if)) 764 return 0; 765 if (!Style.AllowShortLoopsOnASingleLine && 766 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for)) 767 return 0; 768 // FIXME: Consider an option to allow short exception handling clauses on 769 // a single line. 770 if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) 771 return 0; 772 } 773 774 FormatToken *Tok = I[1]->First; 775 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 776 (Tok->getNextNonComment() == nullptr || 777 Tok->getNextNonComment()->is(tok::semi))) { 778 // We merge empty blocks even if the line exceeds the column limit. 779 Tok->SpacesRequiredBefore = 0; 780 Tok->CanBreakBefore = true; 781 return 1; 782 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 783 // We don't merge short records. 784 if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) 785 return 0; 786 787 // Check that we still have three lines and they fit into the limit. 788 if (I + 2 == E || I[2]->Type == LT_Invalid) 789 return 0; 790 Limit = limitConsideringMacros(I + 2, E, Limit); 791 792 if (!nextTwoLinesFitInto(I, Limit)) 793 return 0; 794 795 // Second, check that the next line does not contain any braces - if it 796 // does, readability declines when putting it into a single line. 797 if (I[1]->Last->Type == TT_LineComment) 798 return 0; 799 do { 800 if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) 801 return 0; 802 Tok = Tok->Next; 803 } while (Tok); 804 805 // Last, check that the third line starts with a closing brace. 806 Tok = I[2]->First; 807 if (Tok->isNot(tok::r_brace)) 808 return 0; 809 810 return 2; 811 } 812 return 0; 813 } 814 815 /// Returns the modified column limit for \p I if it is inside a macro and 816 /// needs a trailing '\'. 817 unsigned 818 limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 819 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 820 unsigned Limit) { 821 if (I[0]->InPPDirective && I + 1 != E && 822 !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { 823 return Limit < 2 ? 0 : Limit - 2; 824 } 825 return Limit; 826 } 827 828 bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 829 unsigned Limit) { 830 if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore) 831 return false; 832 return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; 833 } 834 835 bool containsMustBreak(const AnnotatedLine *Line) { 836 for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 837 if (Tok->MustBreakBefore) 838 return true; 839 } 840 return false; 841 } 842 843 const FormatStyle &Style; 844 }; 845 846 class UnwrappedLineFormatter { 847 public: 848 UnwrappedLineFormatter(ContinuationIndenter *Indenter, 849 WhitespaceManager *Whitespaces, 850 const FormatStyle &Style) 851 : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), 852 Joiner(Style) {} 853 854 unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, 855 int AdditionalIndent = 0, bool FixBadIndentation = false) { 856 // Try to look up already computed penalty in DryRun-mode. 857 std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( 858 &Lines, AdditionalIndent); 859 auto CacheIt = PenaltyCache.find(CacheKey); 860 if (DryRun && CacheIt != PenaltyCache.end()) 861 return CacheIt->second; 862 863 assert(!Lines.empty()); 864 unsigned Penalty = 0; 865 std::vector<int> IndentForLevel; 866 for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) 867 IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); 868 const AnnotatedLine *PreviousLine = nullptr; 869 for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), 870 E = Lines.end(); 871 I != E; ++I) { 872 const AnnotatedLine &TheLine = **I; 873 const FormatToken *FirstTok = TheLine.First; 874 int Offset = getIndentOffset(*FirstTok); 875 876 // Determine indent and try to merge multiple unwrapped lines. 877 unsigned Indent; 878 if (TheLine.InPPDirective) { 879 Indent = TheLine.Level * Style.IndentWidth; 880 } else { 881 while (IndentForLevel.size() <= TheLine.Level) 882 IndentForLevel.push_back(-1); 883 IndentForLevel.resize(TheLine.Level + 1); 884 Indent = getIndent(IndentForLevel, TheLine.Level); 885 } 886 unsigned LevelIndent = Indent; 887 if (static_cast<int>(Indent) + Offset >= 0) 888 Indent += Offset; 889 890 // Merge multiple lines if possible. 891 unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); 892 if (MergedLines > 0 && Style.ColumnLimit == 0) { 893 // Disallow line merging if there is a break at the start of one of the 894 // input lines. 895 for (unsigned i = 0; i < MergedLines; ++i) { 896 if (I[i + 1]->First->NewlinesBefore > 0) 897 MergedLines = 0; 898 } 899 } 900 if (!DryRun) { 901 for (unsigned i = 0; i < MergedLines; ++i) { 902 join(*I[i], *I[i + 1]); 903 } 904 } 905 I += MergedLines; 906 907 bool FixIndentation = 908 FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); 909 if (TheLine.First->is(tok::eof)) { 910 if (PreviousLine && PreviousLine->Affected && !DryRun) { 911 // Remove the file's trailing whitespace. 912 unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); 913 Whitespaces->replaceWhitespace(*TheLine.First, Newlines, 914 /*IndentLevel=*/0, /*Spaces=*/0, 915 /*TargetColumn=*/0); 916 } 917 } else if (TheLine.Type != LT_Invalid && 918 (TheLine.Affected || FixIndentation)) { 919 if (FirstTok->WhitespaceRange.isValid()) { 920 if (!DryRun) 921 formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, 922 Indent, TheLine.InPPDirective); 923 } else { 924 Indent = LevelIndent = FirstTok->OriginalColumn; 925 } 926 927 // If everything fits on a single line, just put it there. 928 unsigned ColumnLimit = Style.ColumnLimit; 929 if (I + 1 != E) { 930 AnnotatedLine *NextLine = I[1]; 931 if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) 932 ColumnLimit = getColumnLimit(TheLine.InPPDirective); 933 } 934 935 if (TheLine.Last->TotalLength + Indent <= ColumnLimit) { 936 LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); 937 while (State.NextToken) { 938 formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); 939 Indenter->addTokenToState(State, /*Newline=*/false, DryRun); 940 } 941 } else if (Style.ColumnLimit == 0) { 942 // FIXME: Implement nested blocks for ColumnLimit = 0. 943 NoColumnLimitFormatter Formatter(Indenter); 944 if (!DryRun) 945 Formatter.format(Indent, &TheLine); 946 } else { 947 Penalty += format(TheLine, Indent, DryRun); 948 } 949 950 if (!TheLine.InPPDirective) 951 IndentForLevel[TheLine.Level] = LevelIndent; 952 } else if (TheLine.ChildrenAffected) { 953 format(TheLine.Children, DryRun); 954 } else { 955 // Format the first token if necessary, and notify the WhitespaceManager 956 // about the unchanged whitespace. 957 for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { 958 if (Tok == TheLine.First && 959 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 960 unsigned LevelIndent = Tok->OriginalColumn; 961 if (!DryRun) { 962 // Remove trailing whitespace of the previous line. 963 if ((PreviousLine && PreviousLine->Affected) || 964 TheLine.LeadingEmptyLinesAffected) { 965 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, 966 TheLine.InPPDirective); 967 } else { 968 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); 969 } 970 } 971 972 if (static_cast<int>(LevelIndent) - Offset >= 0) 973 LevelIndent -= Offset; 974 if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) 975 IndentForLevel[TheLine.Level] = LevelIndent; 976 } else if (!DryRun) { 977 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); 978 } 979 } 980 } 981 if (!DryRun) { 982 for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { 983 Tok->Finalized = true; 984 } 985 } 986 PreviousLine = *I; 987 } 988 PenaltyCache[CacheKey] = Penalty; 989 return Penalty; 990 } 991 992 private: 993 /// \brief Formats an \c AnnotatedLine and returns the penalty. 994 /// 995 /// If \p DryRun is \c false, directly applies the changes. 996 unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, 997 bool DryRun) { 998 LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); 999 1000 // If the ObjC method declaration does not fit on a line, we should format 1001 // it with one arg per line. 1002 if (State.Line->Type == LT_ObjCMethodDecl) 1003 State.Stack.back().BreakBeforeParameter = true; 1004 1005 // Find best solution in solution space. 1006 return analyzeSolutionSpace(State, DryRun); 1007 } 1008 1009 /// \brief An edge in the solution space from \c Previous->State to \c State, 1010 /// inserting a newline dependent on the \c NewLine. 1011 struct StateNode { 1012 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 1013 : State(State), NewLine(NewLine), Previous(Previous) {} 1014 LineState State; 1015 bool NewLine; 1016 StateNode *Previous; 1017 }; 1018 1019 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 1020 /// 1021 /// In case of equal penalties, we want to prefer states that were inserted 1022 /// first. During state generation we make sure that we insert states first 1023 /// that break the line as late as possible. 1024 typedef std::pair<unsigned, unsigned> OrderedPenalty; 1025 1026 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 1027 /// \c State has the given \c OrderedPenalty. 1028 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 1029 1030 /// \brief The BFS queue type. 1031 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 1032 std::greater<QueueItem> > QueueType; 1033 1034 /// \brief Get the offset of the line relatively to the level. 1035 /// 1036 /// For example, 'public:' labels in classes are offset by 1 or 2 1037 /// characters to the left from their level. 1038 int getIndentOffset(const FormatToken &RootToken) { 1039 if (Style.Language == FormatStyle::LK_Java) 1040 return 0; 1041 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 1042 return Style.AccessModifierOffset; 1043 return 0; 1044 } 1045 1046 /// \brief Add a new line and the required indent before the first Token 1047 /// of the \c UnwrappedLine if there was no structural parsing error. 1048 void formatFirstToken(FormatToken &RootToken, 1049 const AnnotatedLine *PreviousLine, unsigned IndentLevel, 1050 unsigned Indent, bool InPPDirective) { 1051 unsigned Newlines = 1052 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1053 // Remove empty lines before "}" where applicable. 1054 if (RootToken.is(tok::r_brace) && 1055 (!RootToken.Next || 1056 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 1057 Newlines = std::min(Newlines, 1u); 1058 if (Newlines == 0 && !RootToken.IsFirst) 1059 Newlines = 1; 1060 if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) 1061 Newlines = 0; 1062 1063 // Remove empty lines after "{". 1064 if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && 1065 PreviousLine->Last->is(tok::l_brace) && 1066 PreviousLine->First->isNot(tok::kw_namespace)) 1067 Newlines = 1; 1068 1069 // Insert extra new line before access specifiers. 1070 if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && 1071 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 1072 ++Newlines; 1073 1074 // Remove empty lines after access specifiers. 1075 if (PreviousLine && PreviousLine->First->isAccessSpecifier()) 1076 Newlines = std::min(1u, Newlines); 1077 1078 Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, 1079 Indent, InPPDirective && 1080 !RootToken.HasUnescapedNewline); 1081 } 1082 1083 /// \brief Get the indent of \p Level from \p IndentForLevel. 1084 /// 1085 /// \p IndentForLevel must contain the indent for the level \c l 1086 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1087 /// that level is unknown. 1088 unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) { 1089 if (IndentForLevel[Level] != -1) 1090 return IndentForLevel[Level]; 1091 if (Level == 0) 1092 return 0; 1093 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 1094 } 1095 1096 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1097 assert(!A.Last->Next); 1098 assert(!B.First->Previous); 1099 if (B.Affected) 1100 A.Affected = true; 1101 A.Last->Next = B.First; 1102 B.First->Previous = A.Last; 1103 B.First->CanBreakBefore = true; 1104 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 1105 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 1106 Tok->TotalLength += LengthA; 1107 A.Last = Tok; 1108 } 1109 } 1110 1111 unsigned getColumnLimit(bool InPPDirective) const { 1112 // In preprocessor directives reserve two chars for trailing " \" 1113 return Style.ColumnLimit - (InPPDirective ? 2 : 0); 1114 } 1115 1116 struct CompareLineStatePointers { 1117 bool operator()(LineState *obj1, LineState *obj2) const { 1118 return *obj1 < *obj2; 1119 } 1120 }; 1121 1122 /// \brief Analyze the entire solution space starting from \p InitialState. 1123 /// 1124 /// This implements a variant of Dijkstra's algorithm on the graph that spans 1125 /// the solution space (\c LineStates are the nodes). The algorithm tries to 1126 /// find the shortest path (the one with lowest penalty) from \p InitialState 1127 /// to a state where all tokens are placed. Returns the penalty. 1128 /// 1129 /// If \p DryRun is \c false, directly applies the changes. 1130 unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) { 1131 std::set<LineState *, CompareLineStatePointers> Seen; 1132 1133 // Increasing count of \c StateNode items we have created. This is used to 1134 // create a deterministic order independent of the container. 1135 unsigned Count = 0; 1136 QueueType Queue; 1137 1138 // Insert start element into queue. 1139 StateNode *Node = 1140 new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); 1141 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 1142 ++Count; 1143 1144 unsigned Penalty = 0; 1145 1146 // While not empty, take first element and follow edges. 1147 while (!Queue.empty()) { 1148 Penalty = Queue.top().first.first; 1149 StateNode *Node = Queue.top().second; 1150 if (!Node->State.NextToken) { 1151 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 1152 break; 1153 } 1154 Queue.pop(); 1155 1156 // Cut off the analysis of certain solutions if the analysis gets too 1157 // complex. See description of IgnoreStackForComparison. 1158 if (Count > 10000) 1159 Node->State.IgnoreStackForComparison = true; 1160 1161 if (!Seen.insert(&Node->State).second) 1162 // State already examined with lower penalty. 1163 continue; 1164 1165 FormatDecision LastFormat = Node->State.NextToken->Decision; 1166 if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) 1167 addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); 1168 if (LastFormat == FD_Unformatted || LastFormat == FD_Break) 1169 addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); 1170 } 1171 1172 if (Queue.empty()) { 1173 // We were unable to find a solution, do nothing. 1174 // FIXME: Add diagnostic? 1175 DEBUG(llvm::dbgs() << "Could not find a solution.\n"); 1176 return 0; 1177 } 1178 1179 // Reconstruct the solution. 1180 if (!DryRun) 1181 reconstructPath(InitialState, Queue.top().second); 1182 1183 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 1184 DEBUG(llvm::dbgs() << "---\n"); 1185 1186 return Penalty; 1187 } 1188 1189 void reconstructPath(LineState &State, StateNode *Current) { 1190 std::deque<StateNode *> Path; 1191 // We do not need a break before the initial token. 1192 while (Current->Previous) { 1193 Path.push_front(Current); 1194 Current = Current->Previous; 1195 } 1196 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 1197 I != E; ++I) { 1198 unsigned Penalty = 0; 1199 formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); 1200 Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); 1201 1202 DEBUG({ 1203 if ((*I)->NewLine) { 1204 llvm::dbgs() << "Penalty for placing " 1205 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1206 << Penalty << "\n"; 1207 } 1208 }); 1209 } 1210 } 1211 1212 /// \brief Add the following state to the analysis queue \c Queue. 1213 /// 1214 /// Assume the current state is \p PreviousNode and has been reached with a 1215 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1216 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1217 bool NewLine, unsigned *Count, QueueType *Queue) { 1218 if (NewLine && !Indenter->canBreak(PreviousNode->State)) 1219 return; 1220 if (!NewLine && Indenter->mustBreak(PreviousNode->State)) 1221 return; 1222 1223 StateNode *Node = new (Allocator.Allocate()) 1224 StateNode(PreviousNode->State, NewLine, PreviousNode); 1225 if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) 1226 return; 1227 1228 Penalty += Indenter->addTokenToState(Node->State, NewLine, true); 1229 1230 Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); 1231 ++(*Count); 1232 } 1233 1234 /// \brief If the \p State's next token is an r_brace closing a nested block, 1235 /// format the nested block before it. 1236 /// 1237 /// Returns \c true if all children could be placed successfully and adapts 1238 /// \p Penalty as well as \p State. If \p DryRun is false, also directly 1239 /// creates changes using \c Whitespaces. 1240 /// 1241 /// The crucial idea here is that children always get formatted upon 1242 /// encountering the closing brace right after the nested block. Now, if we 1243 /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is 1244 /// \c false), the entire block has to be kept on the same line (which is only 1245 /// possible if it fits on the line, only contains a single statement, etc. 1246 /// 1247 /// If \p NewLine is true, we format the nested block on separate lines, i.e. 1248 /// break after the "{", format all lines with correct indentation and the put 1249 /// the closing "}" on yet another new line. 1250 /// 1251 /// This enables us to keep the simple structure of the 1252 /// \c UnwrappedLineFormatter, where we only have two options for each token: 1253 /// break or don't break. 1254 bool formatChildren(LineState &State, bool NewLine, bool DryRun, 1255 unsigned &Penalty) { 1256 FormatToken &Previous = *State.NextToken->Previous; 1257 const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); 1258 if (!LBrace || LBrace->isNot(tok::l_brace) || 1259 LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) 1260 // The previous token does not open a block. Nothing to do. We don't 1261 // assert so that we can simply call this function for all tokens. 1262 return true; 1263 1264 if (NewLine) { 1265 int AdditionalIndent = 1266 State.FirstIndent - State.Line->Level * Style.IndentWidth; 1267 if (State.Stack.size() < 2 || 1268 !State.Stack[State.Stack.size() - 2].JSFunctionInlined) { 1269 AdditionalIndent = State.Stack.back().Indent - 1270 Previous.Children[0]->Level * Style.IndentWidth; 1271 } 1272 1273 Penalty += format(Previous.Children, DryRun, AdditionalIndent, 1274 /*FixBadIndentation=*/true); 1275 return true; 1276 } 1277 1278 // Cannot merge multiple statements into a single line. 1279 if (Previous.Children.size() > 1) 1280 return false; 1281 1282 // Cannot merge into one line if this line ends on a comment. 1283 if (Previous.is(tok::comment)) 1284 return false; 1285 1286 // We can't put the closing "}" on a line with a trailing comment. 1287 if (Previous.Children[0]->Last->isTrailingComment()) 1288 return false; 1289 1290 // If the child line exceeds the column limit, we wouldn't want to merge it. 1291 // We add +2 for the trailing " }". 1292 if (Style.ColumnLimit > 0 && 1293 Previous.Children[0]->Last->TotalLength + State.Column + 2 > 1294 Style.ColumnLimit) 1295 return false; 1296 1297 if (!DryRun) { 1298 Whitespaces->replaceWhitespace( 1299 *Previous.Children[0]->First, 1300 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, 1301 /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); 1302 } 1303 Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); 1304 1305 State.Column += 1 + Previous.Children[0]->Last->TotalLength; 1306 return true; 1307 } 1308 1309 ContinuationIndenter *Indenter; 1310 WhitespaceManager *Whitespaces; 1311 FormatStyle Style; 1312 LineJoiner Joiner; 1313 1314 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1315 1316 // Cache to store the penalty of formatting a vector of AnnotatedLines 1317 // starting from a specific additional offset. Improves performance if there 1318 // are many nested blocks. 1319 std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, 1320 unsigned> PenaltyCache; 1321 }; 1322 1323 class FormatTokenLexer { 1324 public: 1325 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, 1326 encoding::Encoding Encoding) 1327 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), 1328 Column(0), TrailingWhitespace(0), 1329 SourceMgr(SourceMgr), ID(ID), Style(Style), 1330 IdentTable(getFormattingLangOpts(Style)), Encoding(Encoding), 1331 FirstInLineIndex(0), FormattingDisabled(false) { 1332 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, 1333 getFormattingLangOpts(Style))); 1334 Lex->SetKeepWhitespaceMode(true); 1335 1336 for (const std::string &ForEachMacro : Style.ForEachMacros) 1337 ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); 1338 std::sort(ForEachMacros.begin(), ForEachMacros.end()); 1339 } 1340 1341 ArrayRef<FormatToken *> lex() { 1342 assert(Tokens.empty()); 1343 assert(FirstInLineIndex == 0); 1344 do { 1345 Tokens.push_back(getNextToken()); 1346 tryMergePreviousTokens(); 1347 if (Tokens.back()->NewlinesBefore > 0) 1348 FirstInLineIndex = Tokens.size() - 1; 1349 } while (Tokens.back()->Tok.isNot(tok::eof)); 1350 return Tokens; 1351 } 1352 1353 IdentifierTable &getIdentTable() { return IdentTable; } 1354 1355 private: 1356 void tryMergePreviousTokens() { 1357 if (tryMerge_TMacro()) 1358 return; 1359 if (tryMergeConflictMarkers()) 1360 return; 1361 1362 if (Style.Language == FormatStyle::LK_JavaScript) { 1363 if (tryMergeJSRegexLiteral()) 1364 return; 1365 if (tryMergeEscapeSequence()) 1366 return; 1367 1368 static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; 1369 static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; 1370 static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater, 1371 tok::greaterequal }; 1372 static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater }; 1373 // FIXME: We probably need to change token type to mimic operator with the 1374 // correct priority. 1375 if (tryMergeTokens(JSIdentity)) 1376 return; 1377 if (tryMergeTokens(JSNotIdentity)) 1378 return; 1379 if (tryMergeTokens(JSShiftEqual)) 1380 return; 1381 if (tryMergeTokens(JSRightArrow)) 1382 return; 1383 } 1384 } 1385 1386 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) { 1387 if (Tokens.size() < Kinds.size()) 1388 return false; 1389 1390 SmallVectorImpl<FormatToken *>::const_iterator First = 1391 Tokens.end() - Kinds.size(); 1392 if (!First[0]->is(Kinds[0])) 1393 return false; 1394 unsigned AddLength = 0; 1395 for (unsigned i = 1; i < Kinds.size(); ++i) { 1396 if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != 1397 First[i]->WhitespaceRange.getEnd()) 1398 return false; 1399 AddLength += First[i]->TokenText.size(); 1400 } 1401 Tokens.resize(Tokens.size() - Kinds.size() + 1); 1402 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 1403 First[0]->TokenText.size() + AddLength); 1404 First[0]->ColumnWidth += AddLength; 1405 return true; 1406 } 1407 1408 // Tries to merge an escape sequence, i.e. a "\\" and the following 1409 // character. Use e.g. inside JavaScript regex literals. 1410 bool tryMergeEscapeSequence() { 1411 if (Tokens.size() < 2) 1412 return false; 1413 FormatToken *Previous = Tokens[Tokens.size() - 2]; 1414 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\" || 1415 Tokens.back()->NewlinesBefore != 0) 1416 return false; 1417 Previous->ColumnWidth += Tokens.back()->ColumnWidth; 1418 StringRef Text = Previous->TokenText; 1419 Previous->TokenText = 1420 StringRef(Text.data(), Text.size() + Tokens.back()->TokenText.size()); 1421 Tokens.resize(Tokens.size() - 1); 1422 return true; 1423 } 1424 1425 // Try to determine whether the current token ends a JavaScript regex literal. 1426 // We heuristically assume that this is a regex literal if we find two 1427 // unescaped slashes on a line and the token before the first slash is one of 1428 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by 1429 // a division. 1430 bool tryMergeJSRegexLiteral() { 1431 if (Tokens.size() < 2) 1432 return false; 1433 // If a regex literal ends in "\//", this gets represented by an unknown 1434 // token "\" and a comment. 1435 bool MightEndWithEscapedSlash = 1436 Tokens.back()->is(tok::comment) && 1437 Tokens.back()->TokenText.startswith("//") && 1438 Tokens[Tokens.size() - 2]->TokenText == "\\"; 1439 if (!MightEndWithEscapedSlash && 1440 (Tokens.back()->isNot(tok::slash) || 1441 (Tokens[Tokens.size() - 2]->is(tok::unknown) && 1442 Tokens[Tokens.size() - 2]->TokenText == "\\"))) 1443 return false; 1444 unsigned TokenCount = 0; 1445 unsigned LastColumn = Tokens.back()->OriginalColumn; 1446 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { 1447 ++TokenCount; 1448 if (I[0]->is(tok::slash) && I + 1 != E && 1449 (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace, 1450 tok::exclaim, tok::l_square, tok::colon, tok::comma, 1451 tok::question, tok::kw_return) || 1452 I[1]->isBinaryOperator())) { 1453 if (MightEndWithEscapedSlash) { 1454 StringRef Buffer = SourceMgr.getBufferData(ID); 1455 // This regex literal ends in '\//'. Skip past the '//' of the last 1456 // token and re-start lexing from there. 1457 int offset = 1458 SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 2; 1459 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), 1460 getFormattingLangOpts(Style), Buffer.begin(), 1461 Buffer.begin() + offset, Buffer.end())); 1462 Lex->SetKeepWhitespaceMode(true); 1463 } 1464 Tokens.resize(Tokens.size() - TokenCount); 1465 Tokens.back()->Tok.setKind(tok::unknown); 1466 Tokens.back()->Type = TT_RegexLiteral; 1467 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; 1468 return true; 1469 } 1470 1471 // There can't be a newline inside a regex literal. 1472 if (I[0]->NewlinesBefore > 0) 1473 return false; 1474 } 1475 return false; 1476 } 1477 1478 bool tryMerge_TMacro() { 1479 if (Tokens.size() < 4) 1480 return false; 1481 FormatToken *Last = Tokens.back(); 1482 if (!Last->is(tok::r_paren)) 1483 return false; 1484 1485 FormatToken *String = Tokens[Tokens.size() - 2]; 1486 if (!String->is(tok::string_literal) || String->IsMultiline) 1487 return false; 1488 1489 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 1490 return false; 1491 1492 FormatToken *Macro = Tokens[Tokens.size() - 4]; 1493 if (Macro->TokenText != "_T") 1494 return false; 1495 1496 const char *Start = Macro->TokenText.data(); 1497 const char *End = Last->TokenText.data() + Last->TokenText.size(); 1498 String->TokenText = StringRef(Start, End - Start); 1499 String->IsFirst = Macro->IsFirst; 1500 String->LastNewlineOffset = Macro->LastNewlineOffset; 1501 String->WhitespaceRange = Macro->WhitespaceRange; 1502 String->OriginalColumn = Macro->OriginalColumn; 1503 String->ColumnWidth = encoding::columnWidthWithTabs( 1504 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 1505 1506 Tokens.pop_back(); 1507 Tokens.pop_back(); 1508 Tokens.pop_back(); 1509 Tokens.back() = String; 1510 return true; 1511 } 1512 1513 bool tryMergeConflictMarkers() { 1514 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) 1515 return false; 1516 1517 // Conflict lines look like: 1518 // <marker> <text from the vcs> 1519 // For example: 1520 // >>>>>>> /file/in/file/system at revision 1234 1521 // 1522 // We merge all tokens in a line that starts with a conflict marker 1523 // into a single token with a special token type that the unwrapped line 1524 // parser will use to correctly rebuild the underlying code. 1525 1526 FileID ID; 1527 // Get the position of the first token in the line. 1528 unsigned FirstInLineOffset; 1529 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( 1530 Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); 1531 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); 1532 // Calculate the offset of the start of the current line. 1533 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); 1534 if (LineOffset == StringRef::npos) { 1535 LineOffset = 0; 1536 } else { 1537 ++LineOffset; 1538 } 1539 1540 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); 1541 StringRef LineStart; 1542 if (FirstSpace == StringRef::npos) { 1543 LineStart = Buffer.substr(LineOffset); 1544 } else { 1545 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); 1546 } 1547 1548 TokenType Type = TT_Unknown; 1549 if (LineStart == "<<<<<<<" || LineStart == ">>>>") { 1550 Type = TT_ConflictStart; 1551 } else if (LineStart == "|||||||" || LineStart == "=======" || 1552 LineStart == "====") { 1553 Type = TT_ConflictAlternative; 1554 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { 1555 Type = TT_ConflictEnd; 1556 } 1557 1558 if (Type != TT_Unknown) { 1559 FormatToken *Next = Tokens.back(); 1560 1561 Tokens.resize(FirstInLineIndex + 1); 1562 // We do not need to build a complete token here, as we will skip it 1563 // during parsing anyway (as we must not touch whitespace around conflict 1564 // markers). 1565 Tokens.back()->Type = Type; 1566 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); 1567 1568 Tokens.push_back(Next); 1569 return true; 1570 } 1571 1572 return false; 1573 } 1574 1575 FormatToken *getNextToken() { 1576 if (GreaterStashed) { 1577 // Create a synthesized second '>' token. 1578 // FIXME: Increment Column and set OriginalColumn. 1579 Token Greater = FormatTok->Tok; 1580 FormatTok = new (Allocator.Allocate()) FormatToken; 1581 FormatTok->Tok = Greater; 1582 SourceLocation GreaterLocation = 1583 FormatTok->Tok.getLocation().getLocWithOffset(1); 1584 FormatTok->WhitespaceRange = 1585 SourceRange(GreaterLocation, GreaterLocation); 1586 FormatTok->TokenText = ">"; 1587 FormatTok->ColumnWidth = 1; 1588 GreaterStashed = false; 1589 return FormatTok; 1590 } 1591 1592 FormatTok = new (Allocator.Allocate()) FormatToken; 1593 readRawToken(*FormatTok); 1594 SourceLocation WhitespaceStart = 1595 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1596 FormatTok->IsFirst = IsFirstToken; 1597 IsFirstToken = false; 1598 1599 // Consume and record whitespace until we find a significant token. 1600 unsigned WhitespaceLength = TrailingWhitespace; 1601 while (FormatTok->Tok.is(tok::unknown)) { 1602 for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) { 1603 switch (FormatTok->TokenText[i]) { 1604 case '\n': 1605 ++FormatTok->NewlinesBefore; 1606 // FIXME: This is technically incorrect, as it could also 1607 // be a literal backslash at the end of the line. 1608 if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' && 1609 (FormatTok->TokenText[i - 1] != '\r' || i == 1 || 1610 FormatTok->TokenText[i - 2] != '\\'))) 1611 FormatTok->HasUnescapedNewline = true; 1612 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1613 Column = 0; 1614 break; 1615 case '\r': 1616 case '\f': 1617 case '\v': 1618 Column = 0; 1619 break; 1620 case ' ': 1621 ++Column; 1622 break; 1623 case '\t': 1624 Column += Style.TabWidth - Column % Style.TabWidth; 1625 break; 1626 case '\\': 1627 ++Column; 1628 if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && 1629 FormatTok->TokenText[i + 1] != '\n')) 1630 FormatTok->Type = TT_ImplicitStringLiteral; 1631 break; 1632 default: 1633 FormatTok->Type = TT_ImplicitStringLiteral; 1634 ++Column; 1635 break; 1636 } 1637 } 1638 1639 if (FormatTok->Type == TT_ImplicitStringLiteral) 1640 break; 1641 WhitespaceLength += FormatTok->Tok.getLength(); 1642 1643 readRawToken(*FormatTok); 1644 } 1645 1646 // In case the token starts with escaped newlines, we want to 1647 // take them into account as whitespace - this pattern is quite frequent 1648 // in macro definitions. 1649 // FIXME: Add a more explicit test. 1650 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1651 FormatTok->TokenText[1] == '\n') { 1652 ++FormatTok->NewlinesBefore; 1653 WhitespaceLength += 2; 1654 Column = 0; 1655 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1656 } 1657 1658 FormatTok->WhitespaceRange = SourceRange( 1659 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1660 1661 FormatTok->OriginalColumn = Column; 1662 1663 TrailingWhitespace = 0; 1664 if (FormatTok->Tok.is(tok::comment)) { 1665 // FIXME: Add the trimmed whitespace to Column. 1666 StringRef UntrimmedText = FormatTok->TokenText; 1667 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1668 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1669 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1670 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1671 FormatTok->Tok.setIdentifierInfo(&Info); 1672 FormatTok->Tok.setKind(Info.getTokenID()); 1673 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1674 FormatTok->Tok.setKind(tok::greater); 1675 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1676 GreaterStashed = true; 1677 } 1678 1679 // Now FormatTok is the next non-whitespace token. 1680 1681 StringRef Text = FormatTok->TokenText; 1682 size_t FirstNewlinePos = Text.find('\n'); 1683 if (FirstNewlinePos == StringRef::npos) { 1684 // FIXME: ColumnWidth actually depends on the start column, we need to 1685 // take this into account when the token is moved. 1686 FormatTok->ColumnWidth = 1687 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1688 Column += FormatTok->ColumnWidth; 1689 } else { 1690 FormatTok->IsMultiline = true; 1691 // FIXME: ColumnWidth actually depends on the start column, we need to 1692 // take this into account when the token is moved. 1693 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1694 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1695 1696 // The last line of the token always starts in column 0. 1697 // Thus, the length can be precomputed even in the presence of tabs. 1698 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1699 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1700 Encoding); 1701 Column = FormatTok->LastLineColumnWidth; 1702 } 1703 1704 FormatTok->IsForEachMacro = 1705 std::binary_search(ForEachMacros.begin(), ForEachMacros.end(), 1706 FormatTok->Tok.getIdentifierInfo()); 1707 1708 return FormatTok; 1709 } 1710 1711 FormatToken *FormatTok; 1712 bool IsFirstToken; 1713 bool GreaterStashed; 1714 unsigned Column; 1715 unsigned TrailingWhitespace; 1716 std::unique_ptr<Lexer> Lex; 1717 SourceManager &SourceMgr; 1718 FileID ID; 1719 FormatStyle &Style; 1720 IdentifierTable IdentTable; 1721 encoding::Encoding Encoding; 1722 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1723 // Index (in 'Tokens') of the last token that starts a new line. 1724 unsigned FirstInLineIndex; 1725 SmallVector<FormatToken *, 16> Tokens; 1726 SmallVector<IdentifierInfo *, 8> ForEachMacros; 1727 1728 bool FormattingDisabled; 1729 1730 void readRawToken(FormatToken &Tok) { 1731 Lex->LexFromRawLexer(Tok.Tok); 1732 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1733 Tok.Tok.getLength()); 1734 // For formatting, treat unterminated string literals like normal string 1735 // literals. 1736 if (Tok.is(tok::unknown)) { 1737 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1738 Tok.Tok.setKind(tok::string_literal); 1739 Tok.IsUnterminatedLiteral = true; 1740 } else if (Style.Language == FormatStyle::LK_JavaScript && 1741 Tok.TokenText == "''") { 1742 Tok.Tok.setKind(tok::char_constant); 1743 } 1744 } 1745 1746 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || 1747 Tok.TokenText == "/* clang-format on */")) { 1748 FormattingDisabled = false; 1749 } 1750 1751 Tok.Finalized = FormattingDisabled; 1752 1753 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || 1754 Tok.TokenText == "/* clang-format off */")) { 1755 FormattingDisabled = true; 1756 } 1757 } 1758 }; 1759 1760 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1761 switch (Language) { 1762 case FormatStyle::LK_Cpp: 1763 return "C++"; 1764 case FormatStyle::LK_Java: 1765 return "Java"; 1766 case FormatStyle::LK_JavaScript: 1767 return "JavaScript"; 1768 case FormatStyle::LK_Proto: 1769 return "Proto"; 1770 default: 1771 return "Unknown"; 1772 } 1773 } 1774 1775 class Formatter : public UnwrappedLineConsumer { 1776 public: 1777 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, 1778 ArrayRef<CharSourceRange> Ranges) 1779 : Style(Style), ID(ID), SourceMgr(SourceMgr), 1780 Whitespaces(SourceMgr, Style, 1781 inputUsesCRLF(SourceMgr.getBufferData(ID))), 1782 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1783 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { 1784 DEBUG(llvm::dbgs() << "File encoding: " 1785 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1786 : "unknown") 1787 << "\n"); 1788 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1789 << "\n"); 1790 } 1791 1792 tooling::Replacements format() { 1793 tooling::Replacements Result; 1794 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); 1795 1796 UnwrappedLineParser Parser(Style, Tokens.lex(), *this); 1797 bool StructuralError = Parser.parse(); 1798 assert(UnwrappedLines.rbegin()->empty()); 1799 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1800 ++Run) { 1801 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1802 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1803 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1804 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1805 } 1806 tooling::Replacements RunResult = 1807 format(AnnotatedLines, StructuralError, Tokens); 1808 DEBUG({ 1809 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1810 for (tooling::Replacements::iterator I = RunResult.begin(), 1811 E = RunResult.end(); 1812 I != E; ++I) { 1813 llvm::dbgs() << I->toString() << "\n"; 1814 } 1815 }); 1816 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1817 delete AnnotatedLines[i]; 1818 } 1819 Result.insert(RunResult.begin(), RunResult.end()); 1820 Whitespaces.reset(); 1821 } 1822 return Result; 1823 } 1824 1825 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1826 bool StructuralError, FormatTokenLexer &Tokens) { 1827 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); 1828 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1829 Annotator.annotate(*AnnotatedLines[i]); 1830 } 1831 deriveLocalStyle(AnnotatedLines); 1832 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1833 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1834 } 1835 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1836 1837 Annotator.setCommentLineLevels(AnnotatedLines); 1838 ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding, 1839 BinPackInconclusiveFunctions); 1840 UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); 1841 Formatter.format(AnnotatedLines, /*DryRun=*/false); 1842 return Whitespaces.generateReplacements(); 1843 } 1844 1845 private: 1846 // Determines which lines are affected by the SourceRanges given as input. 1847 // Returns \c true if at least one line between I and E or one of their 1848 // children is affected. 1849 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1850 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1851 bool SomeLineAffected = false; 1852 const AnnotatedLine *PreviousLine = nullptr; 1853 while (I != E) { 1854 AnnotatedLine *Line = *I; 1855 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1856 1857 // If a line is part of a preprocessor directive, it needs to be formatted 1858 // if any token within the directive is affected. 1859 if (Line->InPPDirective) { 1860 FormatToken *Last = Line->Last; 1861 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1862 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1863 Last = (*PPEnd)->Last; 1864 ++PPEnd; 1865 } 1866 1867 if (affectsTokenRange(*Line->First, *Last, 1868 /*IncludeLeadingNewlines=*/false)) { 1869 SomeLineAffected = true; 1870 markAllAsAffected(I, PPEnd); 1871 } 1872 I = PPEnd; 1873 continue; 1874 } 1875 1876 if (nonPPLineAffected(Line, PreviousLine)) 1877 SomeLineAffected = true; 1878 1879 PreviousLine = Line; 1880 ++I; 1881 } 1882 return SomeLineAffected; 1883 } 1884 1885 // Determines whether 'Line' is affected by the SourceRanges given as input. 1886 // Returns \c true if line or one if its children is affected. 1887 bool nonPPLineAffected(AnnotatedLine *Line, 1888 const AnnotatedLine *PreviousLine) { 1889 bool SomeLineAffected = false; 1890 Line->ChildrenAffected = 1891 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1892 if (Line->ChildrenAffected) 1893 SomeLineAffected = true; 1894 1895 // Stores whether one of the line's tokens is directly affected. 1896 bool SomeTokenAffected = false; 1897 // Stores whether we need to look at the leading newlines of the next token 1898 // in order to determine whether it was affected. 1899 bool IncludeLeadingNewlines = false; 1900 1901 // Stores whether the first child line of any of this line's tokens is 1902 // affected. 1903 bool SomeFirstChildAffected = false; 1904 1905 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1906 // Determine whether 'Tok' was affected. 1907 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1908 SomeTokenAffected = true; 1909 1910 // Determine whether the first child of 'Tok' was affected. 1911 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1912 SomeFirstChildAffected = true; 1913 1914 IncludeLeadingNewlines = Tok->Children.empty(); 1915 } 1916 1917 // Was this line moved, i.e. has it previously been on the same line as an 1918 // affected line? 1919 bool LineMoved = PreviousLine && PreviousLine->Affected && 1920 Line->First->NewlinesBefore == 0; 1921 1922 bool IsContinuedComment = 1923 Line->First->is(tok::comment) && Line->First->Next == nullptr && 1924 Line->First->NewlinesBefore < 2 && PreviousLine && 1925 PreviousLine->Affected && PreviousLine->Last->is(tok::comment); 1926 1927 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1928 IsContinuedComment) { 1929 Line->Affected = true; 1930 SomeLineAffected = true; 1931 } 1932 return SomeLineAffected; 1933 } 1934 1935 // Marks all lines between I and E as well as all their children as affected. 1936 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1937 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1938 while (I != E) { 1939 (*I)->Affected = true; 1940 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1941 ++I; 1942 } 1943 } 1944 1945 // Returns true if the range from 'First' to 'Last' intersects with one of the 1946 // input ranges. 1947 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1948 bool IncludeLeadingNewlines) { 1949 SourceLocation Start = First.WhitespaceRange.getBegin(); 1950 if (!IncludeLeadingNewlines) 1951 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1952 SourceLocation End = Last.getStartOfNonWhitespace(); 1953 if (Last.TokenText.size() > 0) 1954 End = End.getLocWithOffset(Last.TokenText.size() - 1); 1955 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1956 return affectsCharSourceRange(Range); 1957 } 1958 1959 // Returns true if one of the input ranges intersect the leading empty lines 1960 // before 'Tok'. 1961 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1962 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1963 Tok.WhitespaceRange.getBegin(), 1964 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1965 return affectsCharSourceRange(EmptyLineRange); 1966 } 1967 1968 // Returns true if 'Range' intersects with one of the input ranges. 1969 bool affectsCharSourceRange(const CharSourceRange &Range) { 1970 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1971 E = Ranges.end(); 1972 I != E; ++I) { 1973 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1974 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1975 return true; 1976 } 1977 return false; 1978 } 1979 1980 static bool inputUsesCRLF(StringRef Text) { 1981 return Text.count('\r') * 2 > Text.count('\n'); 1982 } 1983 1984 void 1985 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 1986 unsigned CountBoundToVariable = 0; 1987 unsigned CountBoundToType = 0; 1988 bool HasCpp03IncompatibleFormat = false; 1989 bool HasBinPackedFunction = false; 1990 bool HasOnePerLineFunction = false; 1991 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1992 if (!AnnotatedLines[i]->First->Next) 1993 continue; 1994 FormatToken *Tok = AnnotatedLines[i]->First->Next; 1995 while (Tok->Next) { 1996 if (Tok->Type == TT_PointerOrReference) { 1997 bool SpacesBefore = 1998 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1999 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 2000 Tok->Next->WhitespaceRange.getEnd(); 2001 if (SpacesBefore && !SpacesAfter) 2002 ++CountBoundToVariable; 2003 else if (!SpacesBefore && SpacesAfter) 2004 ++CountBoundToType; 2005 } 2006 2007 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 2008 if (Tok->is(tok::coloncolon) && 2009 Tok->Previous->Type == TT_TemplateOpener) 2010 HasCpp03IncompatibleFormat = true; 2011 if (Tok->Type == TT_TemplateCloser && 2012 Tok->Previous->Type == TT_TemplateCloser) 2013 HasCpp03IncompatibleFormat = true; 2014 } 2015 2016 if (Tok->PackingKind == PPK_BinPacked) 2017 HasBinPackedFunction = true; 2018 if (Tok->PackingKind == PPK_OnePerLine) 2019 HasOnePerLineFunction = true; 2020 2021 Tok = Tok->Next; 2022 } 2023 } 2024 if (Style.DerivePointerAlignment) { 2025 if (CountBoundToType > CountBoundToVariable) 2026 Style.PointerAlignment = FormatStyle::PAS_Left; 2027 else if (CountBoundToType < CountBoundToVariable) 2028 Style.PointerAlignment = FormatStyle::PAS_Right; 2029 } 2030 if (Style.Standard == FormatStyle::LS_Auto) { 2031 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 2032 : FormatStyle::LS_Cpp03; 2033 } 2034 BinPackInconclusiveFunctions = 2035 HasBinPackedFunction || !HasOnePerLineFunction; 2036 } 2037 2038 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 2039 assert(!UnwrappedLines.empty()); 2040 UnwrappedLines.back().push_back(TheLine); 2041 } 2042 2043 void finishRun() override { 2044 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 2045 } 2046 2047 FormatStyle Style; 2048 FileID ID; 2049 SourceManager &SourceMgr; 2050 WhitespaceManager Whitespaces; 2051 SmallVector<CharSourceRange, 8> Ranges; 2052 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 2053 2054 encoding::Encoding Encoding; 2055 bool BinPackInconclusiveFunctions; 2056 }; 2057 2058 } // end anonymous namespace 2059 2060 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 2061 SourceManager &SourceMgr, 2062 ArrayRef<CharSourceRange> Ranges) { 2063 if (Style.DisableFormat) 2064 return tooling::Replacements(); 2065 return reformat(Style, SourceMgr, 2066 SourceMgr.getFileID(Lex.getSourceLocation()), Ranges); 2067 } 2068 2069 tooling::Replacements reformat(const FormatStyle &Style, 2070 SourceManager &SourceMgr, FileID ID, 2071 ArrayRef<CharSourceRange> Ranges) { 2072 if (Style.DisableFormat) 2073 return tooling::Replacements(); 2074 Formatter formatter(Style, SourceMgr, ID, Ranges); 2075 return formatter.format(); 2076 } 2077 2078 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 2079 ArrayRef<tooling::Range> Ranges, 2080 StringRef FileName) { 2081 if (Style.DisableFormat) 2082 return tooling::Replacements(); 2083 2084 FileManager Files((FileSystemOptions())); 2085 DiagnosticsEngine Diagnostics( 2086 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 2087 new DiagnosticOptions); 2088 SourceManager SourceMgr(Diagnostics, Files); 2089 std::unique_ptr<llvm::MemoryBuffer> Buf = 2090 llvm::MemoryBuffer::getMemBuffer(Code, FileName); 2091 const clang::FileEntry *Entry = 2092 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 2093 SourceMgr.overrideFileContents(Entry, std::move(Buf)); 2094 FileID ID = 2095 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 2096 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 2097 std::vector<CharSourceRange> CharRanges; 2098 for (const tooling::Range &Range : Ranges) { 2099 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 2100 SourceLocation End = Start.getLocWithOffset(Range.getLength()); 2101 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 2102 } 2103 return reformat(Style, SourceMgr, ID, CharRanges); 2104 } 2105 2106 LangOptions getFormattingLangOpts(const FormatStyle &Style) { 2107 LangOptions LangOpts; 2108 LangOpts.CPlusPlus = 1; 2109 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 2110 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 2111 LangOpts.LineComment = 1; 2112 LangOpts.CXXOperatorNames = 2113 Style.Language != FormatStyle::LK_JavaScript ? 1 : 0; 2114 LangOpts.Bool = 1; 2115 LangOpts.ObjC1 = 1; 2116 LangOpts.ObjC2 = 1; 2117 return LangOpts; 2118 } 2119 2120 const char *StyleOptionHelpDescription = 2121 "Coding style, currently supports:\n" 2122 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 2123 "Use -style=file to load style configuration from\n" 2124 ".clang-format file located in one of the parent\n" 2125 "directories of the source file (or current\n" 2126 "directory for stdin).\n" 2127 "Use -style=\"{key: value, ...}\" to set specific\n" 2128 "parameters, e.g.:\n" 2129 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 2130 2131 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 2132 if (FileName.endswith(".java")) { 2133 return FormatStyle::LK_Java; 2134 } else if (FileName.endswith_lower(".js")) { 2135 return FormatStyle::LK_JavaScript; 2136 } else if (FileName.endswith_lower(".proto") || 2137 FileName.endswith_lower(".protodevel")) { 2138 return FormatStyle::LK_Proto; 2139 } 2140 return FormatStyle::LK_Cpp; 2141 } 2142 2143 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 2144 StringRef FallbackStyle) { 2145 FormatStyle Style = getLLVMStyle(); 2146 Style.Language = getLanguageByFileName(FileName); 2147 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 2148 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 2149 << "\" using LLVM style\n"; 2150 return Style; 2151 } 2152 2153 if (StyleName.startswith("{")) { 2154 // Parse YAML/JSON style from the command line. 2155 if (std::error_code ec = parseConfiguration(StyleName, &Style)) { 2156 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 2157 << FallbackStyle << " style\n"; 2158 } 2159 return Style; 2160 } 2161 2162 if (!StyleName.equals_lower("file")) { 2163 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 2164 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 2165 << " style\n"; 2166 return Style; 2167 } 2168 2169 // Look for .clang-format/_clang-format file in the file's parent directories. 2170 SmallString<128> UnsuitableConfigFiles; 2171 SmallString<128> Path(FileName); 2172 llvm::sys::fs::make_absolute(Path); 2173 for (StringRef Directory = Path; !Directory.empty(); 2174 Directory = llvm::sys::path::parent_path(Directory)) { 2175 if (!llvm::sys::fs::is_directory(Directory)) 2176 continue; 2177 SmallString<128> ConfigFile(Directory); 2178 2179 llvm::sys::path::append(ConfigFile, ".clang-format"); 2180 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 2181 bool IsFile = false; 2182 // Ignore errors from is_regular_file: we only need to know if we can read 2183 // the file or not. 2184 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 2185 2186 if (!IsFile) { 2187 // Try _clang-format too, since dotfiles are not commonly used on Windows. 2188 ConfigFile = Directory; 2189 llvm::sys::path::append(ConfigFile, "_clang-format"); 2190 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 2191 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 2192 } 2193 2194 if (IsFile) { 2195 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 2196 llvm::MemoryBuffer::getFile(ConfigFile.c_str()); 2197 if (std::error_code EC = Text.getError()) { 2198 llvm::errs() << EC.message() << "\n"; 2199 break; 2200 } 2201 if (std::error_code ec = 2202 parseConfiguration(Text.get()->getBuffer(), &Style)) { 2203 if (ec == ParseError::Unsuitable) { 2204 if (!UnsuitableConfigFiles.empty()) 2205 UnsuitableConfigFiles.append(", "); 2206 UnsuitableConfigFiles.append(ConfigFile); 2207 continue; 2208 } 2209 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 2210 << "\n"; 2211 break; 2212 } 2213 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 2214 return Style; 2215 } 2216 } 2217 llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle 2218 << " style\n"; 2219 if (!UnsuitableConfigFiles.empty()) { 2220 llvm::errs() << "Configuration file(s) do(es) not support " 2221 << getLanguageName(Style.Language) << ": " 2222 << UnsuitableConfigFiles << "\n"; 2223 } 2224 return Style; 2225 } 2226 2227 } // namespace format 2228 } // namespace clang 2229