1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "ContinuationIndenter.h" 17 #include "TokenAnnotator.h" 18 #include "UnwrappedLineParser.h" 19 #include "WhitespaceManager.h" 20 #include "clang/Basic/Diagnostic.h" 21 #include "clang/Basic/DiagnosticOptions.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Format/Format.h" 24 #include "clang/Lex/Lexer.h" 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/Support/Allocator.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/YAMLTraits.h" 30 #include <queue> 31 #include <string> 32 33 #define DEBUG_TYPE "format-formatter" 34 35 using clang::format::FormatStyle; 36 37 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) 38 39 namespace llvm { 40 namespace yaml { 41 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 42 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 43 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 44 IO.enumCase(Value, "Java", FormatStyle::LK_Java); 45 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 46 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 47 } 48 }; 49 50 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 51 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 52 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 53 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 54 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 55 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 56 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 57 } 58 }; 59 60 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 61 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 62 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 63 IO.enumCase(Value, "false", FormatStyle::UT_Never); 64 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 65 IO.enumCase(Value, "true", FormatStyle::UT_Always); 66 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 67 } 68 }; 69 70 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { 71 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { 72 IO.enumCase(Value, "None", FormatStyle::SFS_None); 73 IO.enumCase(Value, "false", FormatStyle::SFS_None); 74 IO.enumCase(Value, "All", FormatStyle::SFS_All); 75 IO.enumCase(Value, "true", FormatStyle::SFS_All); 76 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); 77 } 78 }; 79 80 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { 81 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { 82 IO.enumCase(Value, "All", FormatStyle::BOS_All); 83 IO.enumCase(Value, "true", FormatStyle::BOS_All); 84 IO.enumCase(Value, "None", FormatStyle::BOS_None); 85 IO.enumCase(Value, "false", FormatStyle::BOS_None); 86 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); 87 } 88 }; 89 90 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 91 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 92 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 93 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 94 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 95 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 96 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 97 } 98 }; 99 100 template <> 101 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 102 static void enumeration(IO &IO, 103 FormatStyle::NamespaceIndentationKind &Value) { 104 IO.enumCase(Value, "None", FormatStyle::NI_None); 105 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 106 IO.enumCase(Value, "All", FormatStyle::NI_All); 107 } 108 }; 109 110 template <> 111 struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { 112 static void enumeration(IO &IO, 113 FormatStyle::PointerAlignmentStyle &Value) { 114 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); 115 IO.enumCase(Value, "Left", FormatStyle::PAS_Left); 116 IO.enumCase(Value, "Right", FormatStyle::PAS_Right); 117 118 // For backward compatibility. 119 IO.enumCase(Value, "true", FormatStyle::PAS_Left); 120 IO.enumCase(Value, "false", FormatStyle::PAS_Right); 121 } 122 }; 123 124 template <> 125 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 126 static void enumeration(IO &IO, 127 FormatStyle::SpaceBeforeParensOptions &Value) { 128 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 129 IO.enumCase(Value, "ControlStatements", 130 FormatStyle::SBPO_ControlStatements); 131 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 132 133 // For backward compatibility. 134 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 135 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 136 } 137 }; 138 139 template <> struct MappingTraits<FormatStyle> { 140 static void mapping(IO &IO, FormatStyle &Style) { 141 // When reading, read the language first, we need it for getPredefinedStyle. 142 IO.mapOptional("Language", Style.Language); 143 144 if (IO.outputting()) { 145 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", 146 "Mozilla", "WebKit", "GNU" }; 147 ArrayRef<StringRef> Styles(StylesArray); 148 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 149 StringRef StyleName(Styles[i]); 150 FormatStyle PredefinedStyle; 151 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 152 Style == PredefinedStyle) { 153 IO.mapOptional("# BasedOnStyle", StyleName); 154 break; 155 } 156 } 157 } else { 158 StringRef BasedOnStyle; 159 IO.mapOptional("BasedOnStyle", BasedOnStyle); 160 if (!BasedOnStyle.empty()) { 161 FormatStyle::LanguageKind OldLanguage = Style.Language; 162 FormatStyle::LanguageKind Language = 163 ((FormatStyle *)IO.getContext())->Language; 164 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 165 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 166 return; 167 } 168 Style.Language = OldLanguage; 169 } 170 } 171 172 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 173 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 174 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 175 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 176 Style.AllowAllParametersOfDeclarationOnNextLine); 177 IO.mapOptional("AllowShortBlocksOnASingleLine", 178 Style.AllowShortBlocksOnASingleLine); 179 IO.mapOptional("AllowShortCaseLabelsOnASingleLine", 180 Style.AllowShortCaseLabelsOnASingleLine); 181 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 182 Style.AllowShortIfStatementsOnASingleLine); 183 IO.mapOptional("AllowShortLoopsOnASingleLine", 184 Style.AllowShortLoopsOnASingleLine); 185 IO.mapOptional("AllowShortFunctionsOnASingleLine", 186 Style.AllowShortFunctionsOnASingleLine); 187 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", 188 Style.AlwaysBreakAfterDefinitionReturnType); 189 IO.mapOptional("AlwaysBreakTemplateDeclarations", 190 Style.AlwaysBreakTemplateDeclarations); 191 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 192 Style.AlwaysBreakBeforeMultilineStrings); 193 IO.mapOptional("BreakBeforeBinaryOperators", 194 Style.BreakBeforeBinaryOperators); 195 IO.mapOptional("BreakBeforeTernaryOperators", 196 Style.BreakBeforeTernaryOperators); 197 IO.mapOptional("BreakConstructorInitializersBeforeComma", 198 Style.BreakConstructorInitializersBeforeComma); 199 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 200 IO.mapOptional("BinPackArguments", Style.BinPackArguments); 201 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 202 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 203 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 204 IO.mapOptional("ConstructorInitializerIndentWidth", 205 Style.ConstructorInitializerIndentWidth); 206 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); 207 IO.mapOptional("ExperimentalAutoDetectBinPacking", 208 Style.ExperimentalAutoDetectBinPacking); 209 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 210 IO.mapOptional("IndentWrappedFunctionNames", 211 Style.IndentWrappedFunctionNames); 212 IO.mapOptional("IndentFunctionDeclarationAfterType", 213 Style.IndentWrappedFunctionNames); 214 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 215 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", 216 Style.KeepEmptyLinesAtTheStartOfBlocks); 217 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 218 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); 219 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 220 IO.mapOptional("ObjCSpaceBeforeProtocolList", 221 Style.ObjCSpaceBeforeProtocolList); 222 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 223 Style.PenaltyBreakBeforeFirstCallParameter); 224 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 225 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 226 IO.mapOptional("PenaltyBreakFirstLessLess", 227 Style.PenaltyBreakFirstLessLess); 228 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 229 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 230 Style.PenaltyReturnTypeOnItsOwnLine); 231 IO.mapOptional("PointerAlignment", Style.PointerAlignment); 232 IO.mapOptional("SpacesBeforeTrailingComments", 233 Style.SpacesBeforeTrailingComments); 234 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 235 IO.mapOptional("Standard", Style.Standard); 236 IO.mapOptional("IndentWidth", Style.IndentWidth); 237 IO.mapOptional("TabWidth", Style.TabWidth); 238 IO.mapOptional("UseTab", Style.UseTab); 239 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 240 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 241 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); 242 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 243 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 244 IO.mapOptional("SpacesInCStyleCastParentheses", 245 Style.SpacesInCStyleCastParentheses); 246 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); 247 IO.mapOptional("SpacesInContainerLiterals", 248 Style.SpacesInContainerLiterals); 249 IO.mapOptional("SpaceBeforeAssignmentOperators", 250 Style.SpaceBeforeAssignmentOperators); 251 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 252 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 253 IO.mapOptional("ForEachMacros", Style.ForEachMacros); 254 255 // For backward compatibility. 256 if (!IO.outputting()) { 257 IO.mapOptional("SpaceAfterControlStatementKeyword", 258 Style.SpaceBeforeParens); 259 IO.mapOptional("PointerBindsToType", Style.PointerAlignment); 260 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); 261 } 262 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 263 IO.mapOptional("DisableFormat", Style.DisableFormat); 264 } 265 }; 266 267 // Allows to read vector<FormatStyle> while keeping default values. 268 // IO.getContext() should contain a pointer to the FormatStyle structure, that 269 // will be used to get default values for missing keys. 270 // If the first element has no Language specified, it will be treated as the 271 // default one for the following elements. 272 template <> struct DocumentListTraits<std::vector<FormatStyle> > { 273 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 274 return Seq.size(); 275 } 276 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 277 size_t Index) { 278 if (Index >= Seq.size()) { 279 assert(Index == Seq.size()); 280 FormatStyle Template; 281 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 282 Template = Seq[0]; 283 } else { 284 Template = *((const FormatStyle *)IO.getContext()); 285 Template.Language = FormatStyle::LK_None; 286 } 287 Seq.resize(Index + 1, Template); 288 } 289 return Seq[Index]; 290 } 291 }; 292 } 293 } 294 295 namespace clang { 296 namespace format { 297 298 const std::error_category &getParseCategory() { 299 static ParseErrorCategory C; 300 return C; 301 } 302 std::error_code make_error_code(ParseError e) { 303 return std::error_code(static_cast<int>(e), getParseCategory()); 304 } 305 306 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { 307 return "clang-format.parse_error"; 308 } 309 310 std::string ParseErrorCategory::message(int EV) const { 311 switch (static_cast<ParseError>(EV)) { 312 case ParseError::Success: 313 return "Success"; 314 case ParseError::Error: 315 return "Invalid argument"; 316 case ParseError::Unsuitable: 317 return "Unsuitable"; 318 } 319 llvm_unreachable("unexpected parse error"); 320 } 321 322 FormatStyle getLLVMStyle() { 323 FormatStyle LLVMStyle; 324 LLVMStyle.Language = FormatStyle::LK_Cpp; 325 LLVMStyle.AccessModifierOffset = -2; 326 LLVMStyle.AlignEscapedNewlinesLeft = false; 327 LLVMStyle.AlignTrailingComments = true; 328 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 329 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; 330 LLVMStyle.AllowShortBlocksOnASingleLine = false; 331 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; 332 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 333 LLVMStyle.AllowShortLoopsOnASingleLine = false; 334 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = false; 335 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 336 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 337 LLVMStyle.BinPackParameters = true; 338 LLVMStyle.BinPackArguments = true; 339 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; 340 LLVMStyle.BreakBeforeTernaryOperators = true; 341 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 342 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 343 LLVMStyle.ColumnLimit = 80; 344 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 345 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 346 LLVMStyle.ConstructorInitializerIndentWidth = 4; 347 LLVMStyle.ContinuationIndentWidth = 4; 348 LLVMStyle.Cpp11BracedListStyle = true; 349 LLVMStyle.DerivePointerAlignment = false; 350 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 351 LLVMStyle.ForEachMacros.push_back("foreach"); 352 LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); 353 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); 354 LLVMStyle.IndentCaseLabels = false; 355 LLVMStyle.IndentWrappedFunctionNames = false; 356 LLVMStyle.IndentWidth = 2; 357 LLVMStyle.TabWidth = 8; 358 LLVMStyle.MaxEmptyLinesToKeep = 1; 359 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; 360 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 361 LLVMStyle.ObjCBlockIndentWidth = 2; 362 LLVMStyle.ObjCSpaceAfterProperty = false; 363 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 364 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; 365 LLVMStyle.SpacesBeforeTrailingComments = 1; 366 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 367 LLVMStyle.UseTab = FormatStyle::UT_Never; 368 LLVMStyle.SpacesInParentheses = false; 369 LLVMStyle.SpacesInSquareBrackets = false; 370 LLVMStyle.SpaceInEmptyParentheses = false; 371 LLVMStyle.SpacesInContainerLiterals = true; 372 LLVMStyle.SpacesInCStyleCastParentheses = false; 373 LLVMStyle.SpaceAfterCStyleCast = false; 374 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 375 LLVMStyle.SpaceBeforeAssignmentOperators = true; 376 LLVMStyle.SpacesInAngles = false; 377 378 LLVMStyle.PenaltyBreakComment = 300; 379 LLVMStyle.PenaltyBreakFirstLessLess = 120; 380 LLVMStyle.PenaltyBreakString = 1000; 381 LLVMStyle.PenaltyExcessCharacter = 1000000; 382 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 383 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 384 385 LLVMStyle.DisableFormat = false; 386 387 return LLVMStyle; 388 } 389 390 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 391 FormatStyle GoogleStyle = getLLVMStyle(); 392 GoogleStyle.Language = Language; 393 394 GoogleStyle.AccessModifierOffset = -1; 395 GoogleStyle.AlignEscapedNewlinesLeft = true; 396 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 397 GoogleStyle.AllowShortLoopsOnASingleLine = true; 398 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 399 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 400 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 401 GoogleStyle.DerivePointerAlignment = true; 402 GoogleStyle.IndentCaseLabels = true; 403 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; 404 GoogleStyle.ObjCSpaceAfterProperty = false; 405 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 406 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; 407 GoogleStyle.SpacesBeforeTrailingComments = 2; 408 GoogleStyle.Standard = FormatStyle::LS_Auto; 409 410 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 411 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 412 413 if (Language == FormatStyle::LK_Java) { 414 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 415 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 416 GoogleStyle.ColumnLimit = 100; 417 GoogleStyle.SpaceAfterCStyleCast = true; 418 } else if (Language == FormatStyle::LK_JavaScript) { 419 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; 420 GoogleStyle.MaxEmptyLinesToKeep = 3; 421 GoogleStyle.SpacesInContainerLiterals = false; 422 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 423 } else if (Language == FormatStyle::LK_Proto) { 424 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; 425 GoogleStyle.SpacesInContainerLiterals = false; 426 } 427 428 return GoogleStyle; 429 } 430 431 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 432 FormatStyle ChromiumStyle = getGoogleStyle(Language); 433 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 434 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; 435 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 436 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 437 ChromiumStyle.BinPackParameters = false; 438 ChromiumStyle.DerivePointerAlignment = false; 439 return ChromiumStyle; 440 } 441 442 FormatStyle getMozillaStyle() { 443 FormatStyle MozillaStyle = getLLVMStyle(); 444 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 445 MozillaStyle.Cpp11BracedListStyle = false; 446 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 447 MozillaStyle.DerivePointerAlignment = true; 448 MozillaStyle.IndentCaseLabels = true; 449 MozillaStyle.ObjCSpaceAfterProperty = true; 450 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 451 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 452 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; 453 MozillaStyle.Standard = FormatStyle::LS_Cpp03; 454 return MozillaStyle; 455 } 456 457 FormatStyle getWebKitStyle() { 458 FormatStyle Style = getLLVMStyle(); 459 Style.AccessModifierOffset = -4; 460 Style.AlignTrailingComments = false; 461 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 462 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 463 Style.BreakConstructorInitializersBeforeComma = true; 464 Style.Cpp11BracedListStyle = false; 465 Style.ColumnLimit = 0; 466 Style.IndentWidth = 4; 467 Style.NamespaceIndentation = FormatStyle::NI_Inner; 468 Style.ObjCBlockIndentWidth = 4; 469 Style.ObjCSpaceAfterProperty = true; 470 Style.PointerAlignment = FormatStyle::PAS_Left; 471 Style.Standard = FormatStyle::LS_Cpp03; 472 return Style; 473 } 474 475 FormatStyle getGNUStyle() { 476 FormatStyle Style = getLLVMStyle(); 477 Style.AlwaysBreakAfterDefinitionReturnType = true; 478 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; 479 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 480 Style.BreakBeforeTernaryOperators = true; 481 Style.Cpp11BracedListStyle = false; 482 Style.ColumnLimit = 79; 483 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 484 Style.Standard = FormatStyle::LS_Cpp03; 485 return Style; 486 } 487 488 FormatStyle getNoStyle() { 489 FormatStyle NoStyle = getLLVMStyle(); 490 NoStyle.DisableFormat = true; 491 return NoStyle; 492 } 493 494 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 495 FormatStyle *Style) { 496 if (Name.equals_lower("llvm")) { 497 *Style = getLLVMStyle(); 498 } else if (Name.equals_lower("chromium")) { 499 *Style = getChromiumStyle(Language); 500 } else if (Name.equals_lower("mozilla")) { 501 *Style = getMozillaStyle(); 502 } else if (Name.equals_lower("google")) { 503 *Style = getGoogleStyle(Language); 504 } else if (Name.equals_lower("webkit")) { 505 *Style = getWebKitStyle(); 506 } else if (Name.equals_lower("gnu")) { 507 *Style = getGNUStyle(); 508 } else if (Name.equals_lower("none")) { 509 *Style = getNoStyle(); 510 } else { 511 return false; 512 } 513 514 Style->Language = Language; 515 return true; 516 } 517 518 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 519 assert(Style); 520 FormatStyle::LanguageKind Language = Style->Language; 521 assert(Language != FormatStyle::LK_None); 522 if (Text.trim().empty()) 523 return make_error_code(ParseError::Error); 524 525 std::vector<FormatStyle> Styles; 526 llvm::yaml::Input Input(Text); 527 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 528 // values for the fields, keys for which are missing from the configuration. 529 // Mapping also uses the context to get the language to find the correct 530 // base style. 531 Input.setContext(Style); 532 Input >> Styles; 533 if (Input.error()) 534 return Input.error(); 535 536 for (unsigned i = 0; i < Styles.size(); ++i) { 537 // Ensures that only the first configuration can skip the Language option. 538 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 539 return make_error_code(ParseError::Error); 540 // Ensure that each language is configured at most once. 541 for (unsigned j = 0; j < i; ++j) { 542 if (Styles[i].Language == Styles[j].Language) { 543 DEBUG(llvm::dbgs() 544 << "Duplicate languages in the config file on positions " << j 545 << " and " << i << "\n"); 546 return make_error_code(ParseError::Error); 547 } 548 } 549 } 550 // Look for a suitable configuration starting from the end, so we can 551 // find the configuration for the specific language first, and the default 552 // configuration (which can only be at slot 0) after it. 553 for (int i = Styles.size() - 1; i >= 0; --i) { 554 if (Styles[i].Language == Language || 555 Styles[i].Language == FormatStyle::LK_None) { 556 *Style = Styles[i]; 557 Style->Language = Language; 558 return make_error_code(ParseError::Success); 559 } 560 } 561 return make_error_code(ParseError::Unsuitable); 562 } 563 564 std::string configurationAsText(const FormatStyle &Style) { 565 std::string Text; 566 llvm::raw_string_ostream Stream(Text); 567 llvm::yaml::Output Output(Stream); 568 // We use the same mapping method for input and output, so we need a non-const 569 // reference here. 570 FormatStyle NonConstStyle = Style; 571 Output << NonConstStyle; 572 return Stream.str(); 573 } 574 575 namespace { 576 577 class NoColumnLimitFormatter { 578 public: 579 NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} 580 581 /// \brief Formats the line starting at \p State, simply keeping all of the 582 /// input's line breaking decisions. 583 void format(unsigned FirstIndent, const AnnotatedLine *Line) { 584 LineState State = 585 Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); 586 while (State.NextToken) { 587 bool Newline = 588 Indenter->mustBreak(State) || 589 (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); 590 Indenter->addTokenToState(State, Newline, /*DryRun=*/false); 591 } 592 } 593 594 private: 595 ContinuationIndenter *Indenter; 596 }; 597 598 class LineJoiner { 599 public: 600 LineJoiner(const FormatStyle &Style) : Style(Style) {} 601 602 /// \brief Calculates how many lines can be merged into 1 starting at \p I. 603 unsigned 604 tryFitMultipleLinesInOne(unsigned Indent, 605 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 606 SmallVectorImpl<AnnotatedLine *>::const_iterator E) { 607 // We can never merge stuff if there are trailing line comments. 608 const AnnotatedLine *TheLine = *I; 609 if (TheLine->Last->Type == TT_LineComment) 610 return 0; 611 612 if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) 613 return 0; 614 615 unsigned Limit = 616 Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; 617 // If we already exceed the column limit, we set 'Limit' to 0. The different 618 // tryMerge..() functions can then decide whether to still do merging. 619 Limit = TheLine->Last->TotalLength > Limit 620 ? 0 621 : Limit - TheLine->Last->TotalLength; 622 623 if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) 624 return 0; 625 626 // FIXME: TheLine->Level != 0 might or might not be the right check to do. 627 // If necessary, change to something smarter. 628 bool MergeShortFunctions = 629 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || 630 (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline && 631 TheLine->Level != 0); 632 633 if (TheLine->Last->Type == TT_FunctionLBrace && 634 TheLine->First != TheLine->Last) { 635 return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; 636 } 637 if (TheLine->Last->is(tok::l_brace)) { 638 return Style.BreakBeforeBraces == FormatStyle::BS_Attach 639 ? tryMergeSimpleBlock(I, E, Limit) 640 : 0; 641 } 642 if (I[1]->First->Type == TT_FunctionLBrace && 643 Style.BreakBeforeBraces != FormatStyle::BS_Attach) { 644 // Check for Limit <= 2 to account for the " {". 645 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) 646 return 0; 647 Limit -= 2; 648 649 unsigned MergedLines = 0; 650 if (MergeShortFunctions) { 651 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); 652 // If we managed to merge the block, count the function header, which is 653 // on a separate line. 654 if (MergedLines > 0) 655 ++MergedLines; 656 } 657 return MergedLines; 658 } 659 if (TheLine->First->is(tok::kw_if)) { 660 return Style.AllowShortIfStatementsOnASingleLine 661 ? tryMergeSimpleControlStatement(I, E, Limit) 662 : 0; 663 } 664 if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { 665 return Style.AllowShortLoopsOnASingleLine 666 ? tryMergeSimpleControlStatement(I, E, Limit) 667 : 0; 668 } 669 if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) { 670 return Style.AllowShortCaseLabelsOnASingleLine 671 ? tryMergeShortCaseLabels(I, E, Limit) 672 : 0; 673 } 674 if (TheLine->InPPDirective && 675 (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { 676 return tryMergeSimplePPDirective(I, E, Limit); 677 } 678 return 0; 679 } 680 681 private: 682 unsigned 683 tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 684 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 685 unsigned Limit) { 686 if (Limit == 0) 687 return 0; 688 if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) 689 return 0; 690 if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) 691 return 0; 692 if (1 + I[1]->Last->TotalLength > Limit) 693 return 0; 694 return 1; 695 } 696 697 unsigned tryMergeSimpleControlStatement( 698 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 699 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { 700 if (Limit == 0) 701 return 0; 702 if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman || 703 Style.BreakBeforeBraces == FormatStyle::BS_GNU) && 704 (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine)) 705 return 0; 706 if (I[1]->InPPDirective != (*I)->InPPDirective || 707 (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) 708 return 0; 709 Limit = limitConsideringMacros(I + 1, E, Limit); 710 AnnotatedLine &Line = **I; 711 if (Line.Last->isNot(tok::r_paren)) 712 return 0; 713 if (1 + I[1]->Last->TotalLength > Limit) 714 return 0; 715 if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 716 tok::kw_while) || 717 I[1]->First->Type == TT_LineComment) 718 return 0; 719 // Only inline simple if's (no nested if or else). 720 if (I + 2 != E && Line.First->is(tok::kw_if) && 721 I[2]->First->is(tok::kw_else)) 722 return 0; 723 return 1; 724 } 725 726 unsigned tryMergeShortCaseLabels( 727 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 728 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { 729 if (Limit == 0 || I + 1 == E || 730 I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) 731 return 0; 732 unsigned NumStmts = 0; 733 unsigned Length = 0; 734 for (; NumStmts < 3; ++NumStmts) { 735 if (I + 1 + NumStmts == E) 736 break; 737 const AnnotatedLine *Line = I[1 + NumStmts]; 738 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) 739 break; 740 if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, 741 tok::kw_while)) 742 return 0; 743 Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space. 744 } 745 if (NumStmts == 0 || NumStmts == 3 || Length > Limit) 746 return 0; 747 return NumStmts; 748 } 749 750 unsigned 751 tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 752 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 753 unsigned Limit) { 754 AnnotatedLine &Line = **I; 755 756 // Don't merge ObjC @ keywords and methods. 757 if (Line.First->isOneOf(tok::at, tok::minus, tok::plus)) 758 return 0; 759 760 // Check that the current line allows merging. This depends on whether we 761 // are in a control flow statements as well as several style flags. 762 if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) 763 return 0; 764 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, 765 tok::kw_catch, tok::kw_for, tok::r_brace)) { 766 if (!Style.AllowShortBlocksOnASingleLine) 767 return 0; 768 if (!Style.AllowShortIfStatementsOnASingleLine && 769 Line.First->is(tok::kw_if)) 770 return 0; 771 if (!Style.AllowShortLoopsOnASingleLine && 772 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for)) 773 return 0; 774 // FIXME: Consider an option to allow short exception handling clauses on 775 // a single line. 776 if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) 777 return 0; 778 } 779 780 FormatToken *Tok = I[1]->First; 781 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 782 (Tok->getNextNonComment() == nullptr || 783 Tok->getNextNonComment()->is(tok::semi))) { 784 // We merge empty blocks even if the line exceeds the column limit. 785 Tok->SpacesRequiredBefore = 0; 786 Tok->CanBreakBefore = true; 787 return 1; 788 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 789 // We don't merge short records. 790 if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) 791 return 0; 792 793 // Check that we still have three lines and they fit into the limit. 794 if (I + 2 == E || I[2]->Type == LT_Invalid) 795 return 0; 796 Limit = limitConsideringMacros(I + 2, E, Limit); 797 798 if (!nextTwoLinesFitInto(I, Limit)) 799 return 0; 800 801 // Second, check that the next line does not contain any braces - if it 802 // does, readability declines when putting it into a single line. 803 if (I[1]->Last->Type == TT_LineComment) 804 return 0; 805 do { 806 if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) 807 return 0; 808 Tok = Tok->Next; 809 } while (Tok); 810 811 // Last, check that the third line starts with a closing brace. 812 Tok = I[2]->First; 813 if (Tok->isNot(tok::r_brace)) 814 return 0; 815 816 return 2; 817 } 818 return 0; 819 } 820 821 /// Returns the modified column limit for \p I if it is inside a macro and 822 /// needs a trailing '\'. 823 unsigned 824 limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 825 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 826 unsigned Limit) { 827 if (I[0]->InPPDirective && I + 1 != E && 828 !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { 829 return Limit < 2 ? 0 : Limit - 2; 830 } 831 return Limit; 832 } 833 834 bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 835 unsigned Limit) { 836 if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore) 837 return false; 838 return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; 839 } 840 841 bool containsMustBreak(const AnnotatedLine *Line) { 842 for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 843 if (Tok->MustBreakBefore) 844 return true; 845 } 846 return false; 847 } 848 849 const FormatStyle &Style; 850 }; 851 852 class UnwrappedLineFormatter { 853 public: 854 UnwrappedLineFormatter(ContinuationIndenter *Indenter, 855 WhitespaceManager *Whitespaces, 856 const FormatStyle &Style) 857 : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), 858 Joiner(Style) {} 859 860 unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, 861 int AdditionalIndent = 0, bool FixBadIndentation = false) { 862 // Try to look up already computed penalty in DryRun-mode. 863 std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( 864 &Lines, AdditionalIndent); 865 auto CacheIt = PenaltyCache.find(CacheKey); 866 if (DryRun && CacheIt != PenaltyCache.end()) 867 return CacheIt->second; 868 869 assert(!Lines.empty()); 870 unsigned Penalty = 0; 871 std::vector<int> IndentForLevel; 872 for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) 873 IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); 874 const AnnotatedLine *PreviousLine = nullptr; 875 for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), 876 E = Lines.end(); 877 I != E; ++I) { 878 const AnnotatedLine &TheLine = **I; 879 const FormatToken *FirstTok = TheLine.First; 880 int Offset = getIndentOffset(*FirstTok); 881 882 // Determine indent and try to merge multiple unwrapped lines. 883 unsigned Indent; 884 if (TheLine.InPPDirective) { 885 Indent = TheLine.Level * Style.IndentWidth; 886 } else { 887 while (IndentForLevel.size() <= TheLine.Level) 888 IndentForLevel.push_back(-1); 889 IndentForLevel.resize(TheLine.Level + 1); 890 Indent = getIndent(IndentForLevel, TheLine.Level); 891 } 892 unsigned LevelIndent = Indent; 893 if (static_cast<int>(Indent) + Offset >= 0) 894 Indent += Offset; 895 896 // Merge multiple lines if possible. 897 unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); 898 if (MergedLines > 0 && Style.ColumnLimit == 0) { 899 // Disallow line merging if there is a break at the start of one of the 900 // input lines. 901 for (unsigned i = 0; i < MergedLines; ++i) { 902 if (I[i + 1]->First->NewlinesBefore > 0) 903 MergedLines = 0; 904 } 905 } 906 if (!DryRun) { 907 for (unsigned i = 0; i < MergedLines; ++i) { 908 join(*I[i], *I[i + 1]); 909 } 910 } 911 I += MergedLines; 912 913 bool FixIndentation = 914 FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); 915 if (TheLine.First->is(tok::eof)) { 916 if (PreviousLine && PreviousLine->Affected && !DryRun) { 917 // Remove the file's trailing whitespace. 918 unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); 919 Whitespaces->replaceWhitespace(*TheLine.First, Newlines, 920 /*IndentLevel=*/0, /*Spaces=*/0, 921 /*TargetColumn=*/0); 922 } 923 } else if (TheLine.Type != LT_Invalid && 924 (TheLine.Affected || FixIndentation)) { 925 if (FirstTok->WhitespaceRange.isValid()) { 926 if (!DryRun) 927 formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, 928 Indent, TheLine.InPPDirective); 929 } else { 930 Indent = LevelIndent = FirstTok->OriginalColumn; 931 } 932 933 // If everything fits on a single line, just put it there. 934 unsigned ColumnLimit = Style.ColumnLimit; 935 if (I + 1 != E) { 936 AnnotatedLine *NextLine = I[1]; 937 if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) 938 ColumnLimit = getColumnLimit(TheLine.InPPDirective); 939 } 940 941 if (TheLine.Last->TotalLength + Indent <= ColumnLimit) { 942 LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); 943 while (State.NextToken) { 944 formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); 945 Indenter->addTokenToState(State, /*Newline=*/false, DryRun); 946 } 947 } else if (Style.ColumnLimit == 0) { 948 // FIXME: Implement nested blocks for ColumnLimit = 0. 949 NoColumnLimitFormatter Formatter(Indenter); 950 if (!DryRun) 951 Formatter.format(Indent, &TheLine); 952 } else { 953 Penalty += format(TheLine, Indent, DryRun); 954 } 955 956 if (!TheLine.InPPDirective) 957 IndentForLevel[TheLine.Level] = LevelIndent; 958 } else if (TheLine.ChildrenAffected) { 959 format(TheLine.Children, DryRun); 960 } else { 961 // Format the first token if necessary, and notify the WhitespaceManager 962 // about the unchanged whitespace. 963 for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { 964 if (Tok == TheLine.First && 965 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 966 unsigned LevelIndent = Tok->OriginalColumn; 967 if (!DryRun) { 968 // Remove trailing whitespace of the previous line. 969 if ((PreviousLine && PreviousLine->Affected) || 970 TheLine.LeadingEmptyLinesAffected) { 971 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, 972 TheLine.InPPDirective); 973 } else { 974 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); 975 } 976 } 977 978 if (static_cast<int>(LevelIndent) - Offset >= 0) 979 LevelIndent -= Offset; 980 if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) 981 IndentForLevel[TheLine.Level] = LevelIndent; 982 } else if (!DryRun) { 983 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); 984 } 985 } 986 } 987 if (!DryRun) { 988 for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { 989 Tok->Finalized = true; 990 } 991 } 992 PreviousLine = *I; 993 } 994 PenaltyCache[CacheKey] = Penalty; 995 return Penalty; 996 } 997 998 private: 999 /// \brief Formats an \c AnnotatedLine and returns the penalty. 1000 /// 1001 /// If \p DryRun is \c false, directly applies the changes. 1002 unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, 1003 bool DryRun) { 1004 LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); 1005 1006 // If the ObjC method declaration does not fit on a line, we should format 1007 // it with one arg per line. 1008 if (State.Line->Type == LT_ObjCMethodDecl) 1009 State.Stack.back().BreakBeforeParameter = true; 1010 1011 // Find best solution in solution space. 1012 return analyzeSolutionSpace(State, DryRun); 1013 } 1014 1015 /// \brief An edge in the solution space from \c Previous->State to \c State, 1016 /// inserting a newline dependent on the \c NewLine. 1017 struct StateNode { 1018 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 1019 : State(State), NewLine(NewLine), Previous(Previous) {} 1020 LineState State; 1021 bool NewLine; 1022 StateNode *Previous; 1023 }; 1024 1025 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 1026 /// 1027 /// In case of equal penalties, we want to prefer states that were inserted 1028 /// first. During state generation we make sure that we insert states first 1029 /// that break the line as late as possible. 1030 typedef std::pair<unsigned, unsigned> OrderedPenalty; 1031 1032 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 1033 /// \c State has the given \c OrderedPenalty. 1034 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 1035 1036 /// \brief The BFS queue type. 1037 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 1038 std::greater<QueueItem> > QueueType; 1039 1040 /// \brief Get the offset of the line relatively to the level. 1041 /// 1042 /// For example, 'public:' labels in classes are offset by 1 or 2 1043 /// characters to the left from their level. 1044 int getIndentOffset(const FormatToken &RootToken) { 1045 if (Style.Language == FormatStyle::LK_Java) 1046 return 0; 1047 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 1048 return Style.AccessModifierOffset; 1049 return 0; 1050 } 1051 1052 /// \brief Add a new line and the required indent before the first Token 1053 /// of the \c UnwrappedLine if there was no structural parsing error. 1054 void formatFirstToken(FormatToken &RootToken, 1055 const AnnotatedLine *PreviousLine, unsigned IndentLevel, 1056 unsigned Indent, bool InPPDirective) { 1057 unsigned Newlines = 1058 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1059 // Remove empty lines before "}" where applicable. 1060 if (RootToken.is(tok::r_brace) && 1061 (!RootToken.Next || 1062 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 1063 Newlines = std::min(Newlines, 1u); 1064 if (Newlines == 0 && !RootToken.IsFirst) 1065 Newlines = 1; 1066 if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) 1067 Newlines = 0; 1068 1069 // Remove empty lines after "{". 1070 if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && 1071 PreviousLine->Last->is(tok::l_brace) && 1072 PreviousLine->First->isNot(tok::kw_namespace)) 1073 Newlines = 1; 1074 1075 // Insert extra new line before access specifiers. 1076 if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && 1077 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 1078 ++Newlines; 1079 1080 // Remove empty lines after access specifiers. 1081 if (PreviousLine && PreviousLine->First->isAccessSpecifier()) 1082 Newlines = std::min(1u, Newlines); 1083 1084 Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, 1085 Indent, InPPDirective && 1086 !RootToken.HasUnescapedNewline); 1087 } 1088 1089 /// \brief Get the indent of \p Level from \p IndentForLevel. 1090 /// 1091 /// \p IndentForLevel must contain the indent for the level \c l 1092 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1093 /// that level is unknown. 1094 unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) { 1095 if (IndentForLevel[Level] != -1) 1096 return IndentForLevel[Level]; 1097 if (Level == 0) 1098 return 0; 1099 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 1100 } 1101 1102 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1103 assert(!A.Last->Next); 1104 assert(!B.First->Previous); 1105 if (B.Affected) 1106 A.Affected = true; 1107 A.Last->Next = B.First; 1108 B.First->Previous = A.Last; 1109 B.First->CanBreakBefore = true; 1110 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 1111 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 1112 Tok->TotalLength += LengthA; 1113 A.Last = Tok; 1114 } 1115 } 1116 1117 unsigned getColumnLimit(bool InPPDirective) const { 1118 // In preprocessor directives reserve two chars for trailing " \" 1119 return Style.ColumnLimit - (InPPDirective ? 2 : 0); 1120 } 1121 1122 struct CompareLineStatePointers { 1123 bool operator()(LineState *obj1, LineState *obj2) const { 1124 return *obj1 < *obj2; 1125 } 1126 }; 1127 1128 /// \brief Analyze the entire solution space starting from \p InitialState. 1129 /// 1130 /// This implements a variant of Dijkstra's algorithm on the graph that spans 1131 /// the solution space (\c LineStates are the nodes). The algorithm tries to 1132 /// find the shortest path (the one with lowest penalty) from \p InitialState 1133 /// to a state where all tokens are placed. Returns the penalty. 1134 /// 1135 /// If \p DryRun is \c false, directly applies the changes. 1136 unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) { 1137 std::set<LineState *, CompareLineStatePointers> Seen; 1138 1139 // Increasing count of \c StateNode items we have created. This is used to 1140 // create a deterministic order independent of the container. 1141 unsigned Count = 0; 1142 QueueType Queue; 1143 1144 // Insert start element into queue. 1145 StateNode *Node = 1146 new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); 1147 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 1148 ++Count; 1149 1150 unsigned Penalty = 0; 1151 1152 // While not empty, take first element and follow edges. 1153 while (!Queue.empty()) { 1154 Penalty = Queue.top().first.first; 1155 StateNode *Node = Queue.top().second; 1156 if (!Node->State.NextToken) { 1157 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 1158 break; 1159 } 1160 Queue.pop(); 1161 1162 // Cut off the analysis of certain solutions if the analysis gets too 1163 // complex. See description of IgnoreStackForComparison. 1164 if (Count > 10000) 1165 Node->State.IgnoreStackForComparison = true; 1166 1167 if (!Seen.insert(&Node->State).second) 1168 // State already examined with lower penalty. 1169 continue; 1170 1171 FormatDecision LastFormat = Node->State.NextToken->Decision; 1172 if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) 1173 addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); 1174 if (LastFormat == FD_Unformatted || LastFormat == FD_Break) 1175 addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); 1176 } 1177 1178 if (Queue.empty()) { 1179 // We were unable to find a solution, do nothing. 1180 // FIXME: Add diagnostic? 1181 DEBUG(llvm::dbgs() << "Could not find a solution.\n"); 1182 return 0; 1183 } 1184 1185 // Reconstruct the solution. 1186 if (!DryRun) 1187 reconstructPath(InitialState, Queue.top().second); 1188 1189 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 1190 DEBUG(llvm::dbgs() << "---\n"); 1191 1192 return Penalty; 1193 } 1194 1195 void reconstructPath(LineState &State, StateNode *Current) { 1196 std::deque<StateNode *> Path; 1197 // We do not need a break before the initial token. 1198 while (Current->Previous) { 1199 Path.push_front(Current); 1200 Current = Current->Previous; 1201 } 1202 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 1203 I != E; ++I) { 1204 unsigned Penalty = 0; 1205 formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); 1206 Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); 1207 1208 DEBUG({ 1209 if ((*I)->NewLine) { 1210 llvm::dbgs() << "Penalty for placing " 1211 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1212 << Penalty << "\n"; 1213 } 1214 }); 1215 } 1216 } 1217 1218 /// \brief Add the following state to the analysis queue \c Queue. 1219 /// 1220 /// Assume the current state is \p PreviousNode and has been reached with a 1221 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1222 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1223 bool NewLine, unsigned *Count, QueueType *Queue) { 1224 if (NewLine && !Indenter->canBreak(PreviousNode->State)) 1225 return; 1226 if (!NewLine && Indenter->mustBreak(PreviousNode->State)) 1227 return; 1228 1229 StateNode *Node = new (Allocator.Allocate()) 1230 StateNode(PreviousNode->State, NewLine, PreviousNode); 1231 if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) 1232 return; 1233 1234 Penalty += Indenter->addTokenToState(Node->State, NewLine, true); 1235 1236 Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); 1237 ++(*Count); 1238 } 1239 1240 /// \brief If the \p State's next token is an r_brace closing a nested block, 1241 /// format the nested block before it. 1242 /// 1243 /// Returns \c true if all children could be placed successfully and adapts 1244 /// \p Penalty as well as \p State. If \p DryRun is false, also directly 1245 /// creates changes using \c Whitespaces. 1246 /// 1247 /// The crucial idea here is that children always get formatted upon 1248 /// encountering the closing brace right after the nested block. Now, if we 1249 /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is 1250 /// \c false), the entire block has to be kept on the same line (which is only 1251 /// possible if it fits on the line, only contains a single statement, etc. 1252 /// 1253 /// If \p NewLine is true, we format the nested block on separate lines, i.e. 1254 /// break after the "{", format all lines with correct indentation and the put 1255 /// the closing "}" on yet another new line. 1256 /// 1257 /// This enables us to keep the simple structure of the 1258 /// \c UnwrappedLineFormatter, where we only have two options for each token: 1259 /// break or don't break. 1260 bool formatChildren(LineState &State, bool NewLine, bool DryRun, 1261 unsigned &Penalty) { 1262 FormatToken &Previous = *State.NextToken->Previous; 1263 const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); 1264 if (!LBrace || LBrace->isNot(tok::l_brace) || 1265 LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) 1266 // The previous token does not open a block. Nothing to do. We don't 1267 // assert so that we can simply call this function for all tokens. 1268 return true; 1269 1270 if (NewLine) { 1271 int AdditionalIndent = 1272 State.FirstIndent - State.Line->Level * Style.IndentWidth; 1273 if (State.Stack.size() < 2 || 1274 !State.Stack[State.Stack.size() - 2].JSFunctionInlined) { 1275 AdditionalIndent = State.Stack.back().Indent - 1276 Previous.Children[0]->Level * Style.IndentWidth; 1277 } 1278 1279 Penalty += format(Previous.Children, DryRun, AdditionalIndent, 1280 /*FixBadIndentation=*/true); 1281 return true; 1282 } 1283 1284 if (Previous.Children[0]->First->MustBreakBefore) 1285 return false; 1286 1287 // Cannot merge multiple statements into a single line. 1288 if (Previous.Children.size() > 1) 1289 return false; 1290 1291 // Cannot merge into one line if this line ends on a comment. 1292 if (Previous.is(tok::comment)) 1293 return false; 1294 1295 // We can't put the closing "}" on a line with a trailing comment. 1296 if (Previous.Children[0]->Last->isTrailingComment()) 1297 return false; 1298 1299 // If the child line exceeds the column limit, we wouldn't want to merge it. 1300 // We add +2 for the trailing " }". 1301 if (Style.ColumnLimit > 0 && 1302 Previous.Children[0]->Last->TotalLength + State.Column + 2 > 1303 Style.ColumnLimit) 1304 return false; 1305 1306 if (!DryRun) { 1307 Whitespaces->replaceWhitespace( 1308 *Previous.Children[0]->First, 1309 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, 1310 /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); 1311 } 1312 Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); 1313 1314 State.Column += 1 + Previous.Children[0]->Last->TotalLength; 1315 return true; 1316 } 1317 1318 ContinuationIndenter *Indenter; 1319 WhitespaceManager *Whitespaces; 1320 FormatStyle Style; 1321 LineJoiner Joiner; 1322 1323 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1324 1325 // Cache to store the penalty of formatting a vector of AnnotatedLines 1326 // starting from a specific additional offset. Improves performance if there 1327 // are many nested blocks. 1328 std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, 1329 unsigned> PenaltyCache; 1330 }; 1331 1332 class FormatTokenLexer { 1333 public: 1334 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, 1335 encoding::Encoding Encoding) 1336 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), 1337 Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), 1338 Style(Style), IdentTable(getFormattingLangOpts(Style)), 1339 Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), 1340 FormattingDisabled(false) { 1341 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, 1342 getFormattingLangOpts(Style))); 1343 Lex->SetKeepWhitespaceMode(true); 1344 1345 for (const std::string &ForEachMacro : Style.ForEachMacros) 1346 ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); 1347 std::sort(ForEachMacros.begin(), ForEachMacros.end()); 1348 } 1349 1350 ArrayRef<FormatToken *> lex() { 1351 assert(Tokens.empty()); 1352 assert(FirstInLineIndex == 0); 1353 do { 1354 Tokens.push_back(getNextToken()); 1355 tryMergePreviousTokens(); 1356 if (Tokens.back()->NewlinesBefore > 0) 1357 FirstInLineIndex = Tokens.size() - 1; 1358 } while (Tokens.back()->Tok.isNot(tok::eof)); 1359 return Tokens; 1360 } 1361 1362 const AdditionalKeywords &getKeywords() { return Keywords; } 1363 1364 private: 1365 void tryMergePreviousTokens() { 1366 if (tryMerge_TMacro()) 1367 return; 1368 if (tryMergeConflictMarkers()) 1369 return; 1370 1371 if (Style.Language == FormatStyle::LK_JavaScript) { 1372 if (tryMergeJSRegexLiteral()) 1373 return; 1374 if (tryMergeEscapeSequence()) 1375 return; 1376 1377 static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; 1378 static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; 1379 static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater, 1380 tok::greaterequal }; 1381 static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater }; 1382 // FIXME: We probably need to change token type to mimic operator with the 1383 // correct priority. 1384 if (tryMergeTokens(JSIdentity)) 1385 return; 1386 if (tryMergeTokens(JSNotIdentity)) 1387 return; 1388 if (tryMergeTokens(JSShiftEqual)) 1389 return; 1390 if (tryMergeTokens(JSRightArrow)) 1391 return; 1392 } 1393 } 1394 1395 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) { 1396 if (Tokens.size() < Kinds.size()) 1397 return false; 1398 1399 SmallVectorImpl<FormatToken *>::const_iterator First = 1400 Tokens.end() - Kinds.size(); 1401 if (!First[0]->is(Kinds[0])) 1402 return false; 1403 unsigned AddLength = 0; 1404 for (unsigned i = 1; i < Kinds.size(); ++i) { 1405 if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != 1406 First[i]->WhitespaceRange.getEnd()) 1407 return false; 1408 AddLength += First[i]->TokenText.size(); 1409 } 1410 Tokens.resize(Tokens.size() - Kinds.size() + 1); 1411 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 1412 First[0]->TokenText.size() + AddLength); 1413 First[0]->ColumnWidth += AddLength; 1414 return true; 1415 } 1416 1417 // Tries to merge an escape sequence, i.e. a "\\" and the following 1418 // character. Use e.g. inside JavaScript regex literals. 1419 bool tryMergeEscapeSequence() { 1420 if (Tokens.size() < 2) 1421 return false; 1422 FormatToken *Previous = Tokens[Tokens.size() - 2]; 1423 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\") 1424 return false; 1425 ++Previous->ColumnWidth; 1426 StringRef Text = Previous->TokenText; 1427 Previous->TokenText = StringRef(Text.data(), Text.size() + 1); 1428 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1); 1429 Tokens.resize(Tokens.size() - 1); 1430 Column = Previous->OriginalColumn + Previous->ColumnWidth; 1431 return true; 1432 } 1433 1434 // Try to determine whether the current token ends a JavaScript regex literal. 1435 // We heuristically assume that this is a regex literal if we find two 1436 // unescaped slashes on a line and the token before the first slash is one of 1437 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by 1438 // a division. 1439 bool tryMergeJSRegexLiteral() { 1440 if (Tokens.size() < 2) 1441 return false; 1442 // If a regex literal ends in "\//", this gets represented by an unknown 1443 // token "\" and a comment. 1444 bool MightEndWithEscapedSlash = 1445 Tokens.back()->is(tok::comment) && 1446 Tokens.back()->TokenText.startswith("//") && 1447 Tokens[Tokens.size() - 2]->TokenText == "\\"; 1448 if (!MightEndWithEscapedSlash && 1449 (Tokens.back()->isNot(tok::slash) || 1450 (Tokens[Tokens.size() - 2]->is(tok::unknown) && 1451 Tokens[Tokens.size() - 2]->TokenText == "\\"))) 1452 return false; 1453 unsigned TokenCount = 0; 1454 unsigned LastColumn = Tokens.back()->OriginalColumn; 1455 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { 1456 ++TokenCount; 1457 if (I[0]->is(tok::slash) && I + 1 != E && 1458 (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace, 1459 tok::exclaim, tok::l_square, tok::colon, tok::comma, 1460 tok::question, tok::kw_return) || 1461 I[1]->isBinaryOperator())) { 1462 if (MightEndWithEscapedSlash) { 1463 // This regex literal ends in '\//'. Skip past the '//' of the last 1464 // token and re-start lexing from there. 1465 SourceLocation Loc = Tokens.back()->Tok.getLocation(); 1466 resetLexer(SourceMgr.getFileOffset(Loc) + 2); 1467 } 1468 Tokens.resize(Tokens.size() - TokenCount); 1469 Tokens.back()->Tok.setKind(tok::unknown); 1470 Tokens.back()->Type = TT_RegexLiteral; 1471 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; 1472 return true; 1473 } 1474 1475 // There can't be a newline inside a regex literal. 1476 if (I[0]->NewlinesBefore > 0) 1477 return false; 1478 } 1479 return false; 1480 } 1481 1482 bool tryMerge_TMacro() { 1483 if (Tokens.size() < 4) 1484 return false; 1485 FormatToken *Last = Tokens.back(); 1486 if (!Last->is(tok::r_paren)) 1487 return false; 1488 1489 FormatToken *String = Tokens[Tokens.size() - 2]; 1490 if (!String->is(tok::string_literal) || String->IsMultiline) 1491 return false; 1492 1493 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 1494 return false; 1495 1496 FormatToken *Macro = Tokens[Tokens.size() - 4]; 1497 if (Macro->TokenText != "_T") 1498 return false; 1499 1500 const char *Start = Macro->TokenText.data(); 1501 const char *End = Last->TokenText.data() + Last->TokenText.size(); 1502 String->TokenText = StringRef(Start, End - Start); 1503 String->IsFirst = Macro->IsFirst; 1504 String->LastNewlineOffset = Macro->LastNewlineOffset; 1505 String->WhitespaceRange = Macro->WhitespaceRange; 1506 String->OriginalColumn = Macro->OriginalColumn; 1507 String->ColumnWidth = encoding::columnWidthWithTabs( 1508 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 1509 1510 Tokens.pop_back(); 1511 Tokens.pop_back(); 1512 Tokens.pop_back(); 1513 Tokens.back() = String; 1514 return true; 1515 } 1516 1517 bool tryMergeConflictMarkers() { 1518 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) 1519 return false; 1520 1521 // Conflict lines look like: 1522 // <marker> <text from the vcs> 1523 // For example: 1524 // >>>>>>> /file/in/file/system at revision 1234 1525 // 1526 // We merge all tokens in a line that starts with a conflict marker 1527 // into a single token with a special token type that the unwrapped line 1528 // parser will use to correctly rebuild the underlying code. 1529 1530 FileID ID; 1531 // Get the position of the first token in the line. 1532 unsigned FirstInLineOffset; 1533 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( 1534 Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); 1535 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); 1536 // Calculate the offset of the start of the current line. 1537 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); 1538 if (LineOffset == StringRef::npos) { 1539 LineOffset = 0; 1540 } else { 1541 ++LineOffset; 1542 } 1543 1544 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); 1545 StringRef LineStart; 1546 if (FirstSpace == StringRef::npos) { 1547 LineStart = Buffer.substr(LineOffset); 1548 } else { 1549 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); 1550 } 1551 1552 TokenType Type = TT_Unknown; 1553 if (LineStart == "<<<<<<<" || LineStart == ">>>>") { 1554 Type = TT_ConflictStart; 1555 } else if (LineStart == "|||||||" || LineStart == "=======" || 1556 LineStart == "====") { 1557 Type = TT_ConflictAlternative; 1558 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { 1559 Type = TT_ConflictEnd; 1560 } 1561 1562 if (Type != TT_Unknown) { 1563 FormatToken *Next = Tokens.back(); 1564 1565 Tokens.resize(FirstInLineIndex + 1); 1566 // We do not need to build a complete token here, as we will skip it 1567 // during parsing anyway (as we must not touch whitespace around conflict 1568 // markers). 1569 Tokens.back()->Type = Type; 1570 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); 1571 1572 Tokens.push_back(Next); 1573 return true; 1574 } 1575 1576 return false; 1577 } 1578 1579 FormatToken *getNextToken() { 1580 if (GreaterStashed) { 1581 // Create a synthesized second '>' token. 1582 // FIXME: Increment Column and set OriginalColumn. 1583 Token Greater = FormatTok->Tok; 1584 FormatTok = new (Allocator.Allocate()) FormatToken; 1585 FormatTok->Tok = Greater; 1586 SourceLocation GreaterLocation = 1587 FormatTok->Tok.getLocation().getLocWithOffset(1); 1588 FormatTok->WhitespaceRange = 1589 SourceRange(GreaterLocation, GreaterLocation); 1590 FormatTok->TokenText = ">"; 1591 FormatTok->ColumnWidth = 1; 1592 GreaterStashed = false; 1593 return FormatTok; 1594 } 1595 1596 FormatTok = new (Allocator.Allocate()) FormatToken; 1597 readRawToken(*FormatTok); 1598 SourceLocation WhitespaceStart = 1599 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1600 FormatTok->IsFirst = IsFirstToken; 1601 IsFirstToken = false; 1602 1603 // Consume and record whitespace until we find a significant token. 1604 unsigned WhitespaceLength = TrailingWhitespace; 1605 while (FormatTok->Tok.is(tok::unknown)) { 1606 for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) { 1607 switch (FormatTok->TokenText[i]) { 1608 case '\n': 1609 ++FormatTok->NewlinesBefore; 1610 // FIXME: This is technically incorrect, as it could also 1611 // be a literal backslash at the end of the line. 1612 if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' && 1613 (FormatTok->TokenText[i - 1] != '\r' || i == 1 || 1614 FormatTok->TokenText[i - 2] != '\\'))) 1615 FormatTok->HasUnescapedNewline = true; 1616 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1617 Column = 0; 1618 break; 1619 case '\r': 1620 case '\f': 1621 case '\v': 1622 Column = 0; 1623 break; 1624 case ' ': 1625 ++Column; 1626 break; 1627 case '\t': 1628 Column += Style.TabWidth - Column % Style.TabWidth; 1629 break; 1630 case '\\': 1631 ++Column; 1632 if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && 1633 FormatTok->TokenText[i + 1] != '\n')) 1634 FormatTok->Type = TT_ImplicitStringLiteral; 1635 break; 1636 default: 1637 FormatTok->Type = TT_ImplicitStringLiteral; 1638 ++Column; 1639 break; 1640 } 1641 } 1642 1643 if (FormatTok->Type == TT_ImplicitStringLiteral) 1644 break; 1645 WhitespaceLength += FormatTok->Tok.getLength(); 1646 1647 readRawToken(*FormatTok); 1648 } 1649 1650 // In case the token starts with escaped newlines, we want to 1651 // take them into account as whitespace - this pattern is quite frequent 1652 // in macro definitions. 1653 // FIXME: Add a more explicit test. 1654 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1655 FormatTok->TokenText[1] == '\n') { 1656 ++FormatTok->NewlinesBefore; 1657 WhitespaceLength += 2; 1658 Column = 0; 1659 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1660 } 1661 1662 FormatTok->WhitespaceRange = SourceRange( 1663 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1664 1665 FormatTok->OriginalColumn = Column; 1666 1667 TrailingWhitespace = 0; 1668 if (FormatTok->Tok.is(tok::comment)) { 1669 // FIXME: Add the trimmed whitespace to Column. 1670 StringRef UntrimmedText = FormatTok->TokenText; 1671 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1672 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1673 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1674 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1675 FormatTok->Tok.setIdentifierInfo(&Info); 1676 FormatTok->Tok.setKind(Info.getTokenID()); 1677 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1678 FormatTok->Tok.setKind(tok::greater); 1679 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1680 GreaterStashed = true; 1681 } 1682 1683 // Now FormatTok is the next non-whitespace token. 1684 1685 StringRef Text = FormatTok->TokenText; 1686 size_t FirstNewlinePos = Text.find('\n'); 1687 if (FirstNewlinePos == StringRef::npos) { 1688 // FIXME: ColumnWidth actually depends on the start column, we need to 1689 // take this into account when the token is moved. 1690 FormatTok->ColumnWidth = 1691 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1692 Column += FormatTok->ColumnWidth; 1693 } else { 1694 FormatTok->IsMultiline = true; 1695 // FIXME: ColumnWidth actually depends on the start column, we need to 1696 // take this into account when the token is moved. 1697 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1698 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1699 1700 // The last line of the token always starts in column 0. 1701 // Thus, the length can be precomputed even in the presence of tabs. 1702 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1703 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1704 Encoding); 1705 Column = FormatTok->LastLineColumnWidth; 1706 } 1707 1708 FormatTok->IsForEachMacro = 1709 std::binary_search(ForEachMacros.begin(), ForEachMacros.end(), 1710 FormatTok->Tok.getIdentifierInfo()); 1711 1712 return FormatTok; 1713 } 1714 1715 FormatToken *FormatTok; 1716 bool IsFirstToken; 1717 bool GreaterStashed; 1718 unsigned Column; 1719 unsigned TrailingWhitespace; 1720 std::unique_ptr<Lexer> Lex; 1721 SourceManager &SourceMgr; 1722 FileID ID; 1723 FormatStyle &Style; 1724 IdentifierTable IdentTable; 1725 AdditionalKeywords Keywords; 1726 encoding::Encoding Encoding; 1727 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1728 // Index (in 'Tokens') of the last token that starts a new line. 1729 unsigned FirstInLineIndex; 1730 SmallVector<FormatToken *, 16> Tokens; 1731 SmallVector<IdentifierInfo *, 8> ForEachMacros; 1732 1733 bool FormattingDisabled; 1734 1735 void readRawToken(FormatToken &Tok) { 1736 Lex->LexFromRawLexer(Tok.Tok); 1737 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1738 Tok.Tok.getLength()); 1739 // For formatting, treat unterminated string literals like normal string 1740 // literals. 1741 if (Tok.is(tok::unknown)) { 1742 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1743 Tok.Tok.setKind(tok::string_literal); 1744 Tok.IsUnterminatedLiteral = true; 1745 } else if (Style.Language == FormatStyle::LK_JavaScript && 1746 Tok.TokenText == "''") { 1747 Tok.Tok.setKind(tok::char_constant); 1748 } 1749 } 1750 1751 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || 1752 Tok.TokenText == "/* clang-format on */")) { 1753 FormattingDisabled = false; 1754 } 1755 1756 Tok.Finalized = FormattingDisabled; 1757 1758 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || 1759 Tok.TokenText == "/* clang-format off */")) { 1760 FormattingDisabled = true; 1761 } 1762 } 1763 1764 void resetLexer(unsigned Offset) { 1765 StringRef Buffer = SourceMgr.getBufferData(ID); 1766 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), 1767 getFormattingLangOpts(Style), Buffer.begin(), 1768 Buffer.begin() + Offset, Buffer.end())); 1769 Lex->SetKeepWhitespaceMode(true); 1770 } 1771 }; 1772 1773 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1774 switch (Language) { 1775 case FormatStyle::LK_Cpp: 1776 return "C++"; 1777 case FormatStyle::LK_Java: 1778 return "Java"; 1779 case FormatStyle::LK_JavaScript: 1780 return "JavaScript"; 1781 case FormatStyle::LK_Proto: 1782 return "Proto"; 1783 default: 1784 return "Unknown"; 1785 } 1786 } 1787 1788 class Formatter : public UnwrappedLineConsumer { 1789 public: 1790 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, 1791 ArrayRef<CharSourceRange> Ranges) 1792 : Style(Style), ID(ID), SourceMgr(SourceMgr), 1793 Whitespaces(SourceMgr, Style, 1794 inputUsesCRLF(SourceMgr.getBufferData(ID))), 1795 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1796 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) { 1797 DEBUG(llvm::dbgs() << "File encoding: " 1798 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1799 : "unknown") 1800 << "\n"); 1801 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1802 << "\n"); 1803 } 1804 1805 tooling::Replacements format() { 1806 tooling::Replacements Result; 1807 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); 1808 1809 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), 1810 *this); 1811 bool StructuralError = Parser.parse(); 1812 assert(UnwrappedLines.rbegin()->empty()); 1813 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1814 ++Run) { 1815 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1816 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1817 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1818 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1819 } 1820 tooling::Replacements RunResult = 1821 format(AnnotatedLines, StructuralError, Tokens); 1822 DEBUG({ 1823 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1824 for (tooling::Replacements::iterator I = RunResult.begin(), 1825 E = RunResult.end(); 1826 I != E; ++I) { 1827 llvm::dbgs() << I->toString() << "\n"; 1828 } 1829 }); 1830 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1831 delete AnnotatedLines[i]; 1832 } 1833 Result.insert(RunResult.begin(), RunResult.end()); 1834 Whitespaces.reset(); 1835 } 1836 return Result; 1837 } 1838 1839 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1840 bool StructuralError, FormatTokenLexer &Tokens) { 1841 TokenAnnotator Annotator(Style, Tokens.getKeywords()); 1842 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1843 Annotator.annotate(*AnnotatedLines[i]); 1844 } 1845 deriveLocalStyle(AnnotatedLines); 1846 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1847 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1848 } 1849 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1850 1851 Annotator.setCommentLineLevels(AnnotatedLines); 1852 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, 1853 Whitespaces, Encoding, 1854 BinPackInconclusiveFunctions); 1855 UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); 1856 Formatter.format(AnnotatedLines, /*DryRun=*/false); 1857 return Whitespaces.generateReplacements(); 1858 } 1859 1860 private: 1861 // Determines which lines are affected by the SourceRanges given as input. 1862 // Returns \c true if at least one line between I and E or one of their 1863 // children is affected. 1864 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1865 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1866 bool SomeLineAffected = false; 1867 const AnnotatedLine *PreviousLine = nullptr; 1868 while (I != E) { 1869 AnnotatedLine *Line = *I; 1870 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1871 1872 // If a line is part of a preprocessor directive, it needs to be formatted 1873 // if any token within the directive is affected. 1874 if (Line->InPPDirective) { 1875 FormatToken *Last = Line->Last; 1876 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1877 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1878 Last = (*PPEnd)->Last; 1879 ++PPEnd; 1880 } 1881 1882 if (affectsTokenRange(*Line->First, *Last, 1883 /*IncludeLeadingNewlines=*/false)) { 1884 SomeLineAffected = true; 1885 markAllAsAffected(I, PPEnd); 1886 } 1887 I = PPEnd; 1888 continue; 1889 } 1890 1891 if (nonPPLineAffected(Line, PreviousLine)) 1892 SomeLineAffected = true; 1893 1894 PreviousLine = Line; 1895 ++I; 1896 } 1897 return SomeLineAffected; 1898 } 1899 1900 // Determines whether 'Line' is affected by the SourceRanges given as input. 1901 // Returns \c true if line or one if its children is affected. 1902 bool nonPPLineAffected(AnnotatedLine *Line, 1903 const AnnotatedLine *PreviousLine) { 1904 bool SomeLineAffected = false; 1905 Line->ChildrenAffected = 1906 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1907 if (Line->ChildrenAffected) 1908 SomeLineAffected = true; 1909 1910 // Stores whether one of the line's tokens is directly affected. 1911 bool SomeTokenAffected = false; 1912 // Stores whether we need to look at the leading newlines of the next token 1913 // in order to determine whether it was affected. 1914 bool IncludeLeadingNewlines = false; 1915 1916 // Stores whether the first child line of any of this line's tokens is 1917 // affected. 1918 bool SomeFirstChildAffected = false; 1919 1920 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1921 // Determine whether 'Tok' was affected. 1922 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1923 SomeTokenAffected = true; 1924 1925 // Determine whether the first child of 'Tok' was affected. 1926 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1927 SomeFirstChildAffected = true; 1928 1929 IncludeLeadingNewlines = Tok->Children.empty(); 1930 } 1931 1932 // Was this line moved, i.e. has it previously been on the same line as an 1933 // affected line? 1934 bool LineMoved = PreviousLine && PreviousLine->Affected && 1935 Line->First->NewlinesBefore == 0; 1936 1937 bool IsContinuedComment = 1938 Line->First->is(tok::comment) && Line->First->Next == nullptr && 1939 Line->First->NewlinesBefore < 2 && PreviousLine && 1940 PreviousLine->Affected && PreviousLine->Last->is(tok::comment); 1941 1942 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1943 IsContinuedComment) { 1944 Line->Affected = true; 1945 SomeLineAffected = true; 1946 } 1947 return SomeLineAffected; 1948 } 1949 1950 // Marks all lines between I and E as well as all their children as affected. 1951 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1952 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1953 while (I != E) { 1954 (*I)->Affected = true; 1955 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1956 ++I; 1957 } 1958 } 1959 1960 // Returns true if the range from 'First' to 'Last' intersects with one of the 1961 // input ranges. 1962 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1963 bool IncludeLeadingNewlines) { 1964 SourceLocation Start = First.WhitespaceRange.getBegin(); 1965 if (!IncludeLeadingNewlines) 1966 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1967 SourceLocation End = Last.getStartOfNonWhitespace(); 1968 End = End.getLocWithOffset(Last.TokenText.size()); 1969 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1970 return affectsCharSourceRange(Range); 1971 } 1972 1973 // Returns true if one of the input ranges intersect the leading empty lines 1974 // before 'Tok'. 1975 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1976 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1977 Tok.WhitespaceRange.getBegin(), 1978 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1979 return affectsCharSourceRange(EmptyLineRange); 1980 } 1981 1982 // Returns true if 'Range' intersects with one of the input ranges. 1983 bool affectsCharSourceRange(const CharSourceRange &Range) { 1984 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1985 E = Ranges.end(); 1986 I != E; ++I) { 1987 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1988 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1989 return true; 1990 } 1991 return false; 1992 } 1993 1994 static bool inputUsesCRLF(StringRef Text) { 1995 return Text.count('\r') * 2 > Text.count('\n'); 1996 } 1997 1998 void 1999 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 2000 unsigned CountBoundToVariable = 0; 2001 unsigned CountBoundToType = 0; 2002 bool HasCpp03IncompatibleFormat = false; 2003 bool HasBinPackedFunction = false; 2004 bool HasOnePerLineFunction = false; 2005 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 2006 if (!AnnotatedLines[i]->First->Next) 2007 continue; 2008 FormatToken *Tok = AnnotatedLines[i]->First->Next; 2009 while (Tok->Next) { 2010 if (Tok->Type == TT_PointerOrReference) { 2011 bool SpacesBefore = 2012 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 2013 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 2014 Tok->Next->WhitespaceRange.getEnd(); 2015 if (SpacesBefore && !SpacesAfter) 2016 ++CountBoundToVariable; 2017 else if (!SpacesBefore && SpacesAfter) 2018 ++CountBoundToType; 2019 } 2020 2021 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 2022 if (Tok->is(tok::coloncolon) && 2023 Tok->Previous->Type == TT_TemplateOpener) 2024 HasCpp03IncompatibleFormat = true; 2025 if (Tok->Type == TT_TemplateCloser && 2026 Tok->Previous->Type == TT_TemplateCloser) 2027 HasCpp03IncompatibleFormat = true; 2028 } 2029 2030 if (Tok->PackingKind == PPK_BinPacked) 2031 HasBinPackedFunction = true; 2032 if (Tok->PackingKind == PPK_OnePerLine) 2033 HasOnePerLineFunction = true; 2034 2035 Tok = Tok->Next; 2036 } 2037 } 2038 if (Style.DerivePointerAlignment) { 2039 if (CountBoundToType > CountBoundToVariable) 2040 Style.PointerAlignment = FormatStyle::PAS_Left; 2041 else if (CountBoundToType < CountBoundToVariable) 2042 Style.PointerAlignment = FormatStyle::PAS_Right; 2043 } 2044 if (Style.Standard == FormatStyle::LS_Auto) { 2045 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 2046 : FormatStyle::LS_Cpp03; 2047 } 2048 BinPackInconclusiveFunctions = 2049 HasBinPackedFunction || !HasOnePerLineFunction; 2050 } 2051 2052 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 2053 assert(!UnwrappedLines.empty()); 2054 UnwrappedLines.back().push_back(TheLine); 2055 } 2056 2057 void finishRun() override { 2058 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 2059 } 2060 2061 FormatStyle Style; 2062 FileID ID; 2063 SourceManager &SourceMgr; 2064 WhitespaceManager Whitespaces; 2065 SmallVector<CharSourceRange, 8> Ranges; 2066 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 2067 2068 encoding::Encoding Encoding; 2069 bool BinPackInconclusiveFunctions; 2070 }; 2071 2072 } // end anonymous namespace 2073 2074 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 2075 SourceManager &SourceMgr, 2076 ArrayRef<CharSourceRange> Ranges) { 2077 if (Style.DisableFormat) 2078 return tooling::Replacements(); 2079 return reformat(Style, SourceMgr, 2080 SourceMgr.getFileID(Lex.getSourceLocation()), Ranges); 2081 } 2082 2083 tooling::Replacements reformat(const FormatStyle &Style, 2084 SourceManager &SourceMgr, FileID ID, 2085 ArrayRef<CharSourceRange> Ranges) { 2086 if (Style.DisableFormat) 2087 return tooling::Replacements(); 2088 Formatter formatter(Style, SourceMgr, ID, Ranges); 2089 return formatter.format(); 2090 } 2091 2092 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 2093 ArrayRef<tooling::Range> Ranges, 2094 StringRef FileName) { 2095 if (Style.DisableFormat) 2096 return tooling::Replacements(); 2097 2098 FileManager Files((FileSystemOptions())); 2099 DiagnosticsEngine Diagnostics( 2100 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 2101 new DiagnosticOptions); 2102 SourceManager SourceMgr(Diagnostics, Files); 2103 std::unique_ptr<llvm::MemoryBuffer> Buf = 2104 llvm::MemoryBuffer::getMemBuffer(Code, FileName); 2105 const clang::FileEntry *Entry = 2106 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 2107 SourceMgr.overrideFileContents(Entry, std::move(Buf)); 2108 FileID ID = 2109 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 2110 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 2111 std::vector<CharSourceRange> CharRanges; 2112 for (const tooling::Range &Range : Ranges) { 2113 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 2114 SourceLocation End = Start.getLocWithOffset(Range.getLength()); 2115 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 2116 } 2117 return reformat(Style, SourceMgr, ID, CharRanges); 2118 } 2119 2120 LangOptions getFormattingLangOpts(const FormatStyle &Style) { 2121 LangOptions LangOpts; 2122 LangOpts.CPlusPlus = 1; 2123 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 2124 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 2125 LangOpts.LineComment = 1; 2126 LangOpts.CXXOperatorNames = 2127 Style.Language != FormatStyle::LK_JavaScript ? 1 : 0; 2128 LangOpts.Bool = 1; 2129 LangOpts.ObjC1 = 1; 2130 LangOpts.ObjC2 = 1; 2131 return LangOpts; 2132 } 2133 2134 const char *StyleOptionHelpDescription = 2135 "Coding style, currently supports:\n" 2136 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 2137 "Use -style=file to load style configuration from\n" 2138 ".clang-format file located in one of the parent\n" 2139 "directories of the source file (or current\n" 2140 "directory for stdin).\n" 2141 "Use -style=\"{key: value, ...}\" to set specific\n" 2142 "parameters, e.g.:\n" 2143 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 2144 2145 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 2146 if (FileName.endswith(".java")) { 2147 return FormatStyle::LK_Java; 2148 } else if (FileName.endswith_lower(".js")) { 2149 return FormatStyle::LK_JavaScript; 2150 } else if (FileName.endswith_lower(".proto") || 2151 FileName.endswith_lower(".protodevel")) { 2152 return FormatStyle::LK_Proto; 2153 } 2154 return FormatStyle::LK_Cpp; 2155 } 2156 2157 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 2158 StringRef FallbackStyle) { 2159 FormatStyle Style = getLLVMStyle(); 2160 Style.Language = getLanguageByFileName(FileName); 2161 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 2162 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 2163 << "\" using LLVM style\n"; 2164 return Style; 2165 } 2166 2167 if (StyleName.startswith("{")) { 2168 // Parse YAML/JSON style from the command line. 2169 if (std::error_code ec = parseConfiguration(StyleName, &Style)) { 2170 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 2171 << FallbackStyle << " style\n"; 2172 } 2173 return Style; 2174 } 2175 2176 if (!StyleName.equals_lower("file")) { 2177 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 2178 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 2179 << " style\n"; 2180 return Style; 2181 } 2182 2183 // Look for .clang-format/_clang-format file in the file's parent directories. 2184 SmallString<128> UnsuitableConfigFiles; 2185 SmallString<128> Path(FileName); 2186 llvm::sys::fs::make_absolute(Path); 2187 for (StringRef Directory = Path; !Directory.empty(); 2188 Directory = llvm::sys::path::parent_path(Directory)) { 2189 if (!llvm::sys::fs::is_directory(Directory)) 2190 continue; 2191 SmallString<128> ConfigFile(Directory); 2192 2193 llvm::sys::path::append(ConfigFile, ".clang-format"); 2194 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 2195 bool IsFile = false; 2196 // Ignore errors from is_regular_file: we only need to know if we can read 2197 // the file or not. 2198 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 2199 2200 if (!IsFile) { 2201 // Try _clang-format too, since dotfiles are not commonly used on Windows. 2202 ConfigFile = Directory; 2203 llvm::sys::path::append(ConfigFile, "_clang-format"); 2204 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 2205 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 2206 } 2207 2208 if (IsFile) { 2209 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 2210 llvm::MemoryBuffer::getFile(ConfigFile.c_str()); 2211 if (std::error_code EC = Text.getError()) { 2212 llvm::errs() << EC.message() << "\n"; 2213 break; 2214 } 2215 if (std::error_code ec = 2216 parseConfiguration(Text.get()->getBuffer(), &Style)) { 2217 if (ec == ParseError::Unsuitable) { 2218 if (!UnsuitableConfigFiles.empty()) 2219 UnsuitableConfigFiles.append(", "); 2220 UnsuitableConfigFiles.append(ConfigFile); 2221 continue; 2222 } 2223 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 2224 << "\n"; 2225 break; 2226 } 2227 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 2228 return Style; 2229 } 2230 } 2231 llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle 2232 << " style\n"; 2233 if (!UnsuitableConfigFiles.empty()) { 2234 llvm::errs() << "Configuration file(s) do(es) not support " 2235 << getLanguageName(Style.Language) << ": " 2236 << UnsuitableConfigFiles << "\n"; 2237 } 2238 return Style; 2239 } 2240 2241 } // namespace format 2242 } // namespace clang 2243