1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "ContinuationIndenter.h" 19 #include "TokenAnnotator.h" 20 #include "UnwrappedLineParser.h" 21 #include "WhitespaceManager.h" 22 #include "clang/Basic/Diagnostic.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Format/Format.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/YAMLTraits.h" 31 #include <queue> 32 #include <string> 33 34 using clang::format::FormatStyle; 35 36 namespace llvm { 37 namespace yaml { 38 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { 39 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { 40 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); 41 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); 42 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); 43 } 44 }; 45 46 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { 47 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { 48 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); 49 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); 50 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); 51 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); 52 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); 53 } 54 }; 55 56 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { 57 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { 58 IO.enumCase(Value, "Never", FormatStyle::UT_Never); 59 IO.enumCase(Value, "false", FormatStyle::UT_Never); 60 IO.enumCase(Value, "Always", FormatStyle::UT_Always); 61 IO.enumCase(Value, "true", FormatStyle::UT_Always); 62 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); 63 } 64 }; 65 66 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { 67 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { 68 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); 69 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); 70 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); 71 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); 72 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); 73 } 74 }; 75 76 template <> 77 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { 78 static void enumeration(IO &IO, 79 FormatStyle::NamespaceIndentationKind &Value) { 80 IO.enumCase(Value, "None", FormatStyle::NI_None); 81 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); 82 IO.enumCase(Value, "All", FormatStyle::NI_All); 83 } 84 }; 85 86 template <> 87 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { 88 static void enumeration(IO &IO, 89 FormatStyle::SpaceBeforeParensOptions &Value) { 90 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); 91 IO.enumCase(Value, "ControlStatements", 92 FormatStyle::SBPO_ControlStatements); 93 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); 94 95 // For backward compatibility. 96 IO.enumCase(Value, "false", FormatStyle::SBPO_Never); 97 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); 98 } 99 }; 100 101 template <> struct MappingTraits<FormatStyle> { 102 static void mapping(IO &IO, FormatStyle &Style) { 103 // When reading, read the language first, we need it for getPredefinedStyle. 104 IO.mapOptional("Language", Style.Language); 105 106 if (IO.outputting()) { 107 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", 108 "Mozilla", "WebKit", "GNU" }; 109 ArrayRef<StringRef> Styles(StylesArray); 110 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 111 StringRef StyleName(Styles[i]); 112 FormatStyle PredefinedStyle; 113 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && 114 Style == PredefinedStyle) { 115 IO.mapOptional("# BasedOnStyle", StyleName); 116 break; 117 } 118 } 119 } else { 120 StringRef BasedOnStyle; 121 IO.mapOptional("BasedOnStyle", BasedOnStyle); 122 if (!BasedOnStyle.empty()) { 123 FormatStyle::LanguageKind OldLanguage = Style.Language; 124 FormatStyle::LanguageKind Language = 125 ((FormatStyle *)IO.getContext())->Language; 126 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { 127 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 128 return; 129 } 130 Style.Language = OldLanguage; 131 } 132 } 133 134 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 135 IO.mapOptional("ConstructorInitializerIndentWidth", 136 Style.ConstructorInitializerIndentWidth); 137 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 138 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 139 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 140 Style.AllowAllParametersOfDeclarationOnNextLine); 141 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 142 Style.AllowShortIfStatementsOnASingleLine); 143 IO.mapOptional("AllowShortLoopsOnASingleLine", 144 Style.AllowShortLoopsOnASingleLine); 145 IO.mapOptional("AllowShortFunctionsOnASingleLine", 146 Style.AllowShortFunctionsOnASingleLine); 147 IO.mapOptional("AlwaysBreakTemplateDeclarations", 148 Style.AlwaysBreakTemplateDeclarations); 149 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 150 Style.AlwaysBreakBeforeMultilineStrings); 151 IO.mapOptional("BreakBeforeBinaryOperators", 152 Style.BreakBeforeBinaryOperators); 153 IO.mapOptional("BreakBeforeTernaryOperators", 154 Style.BreakBeforeTernaryOperators); 155 IO.mapOptional("BreakConstructorInitializersBeforeComma", 156 Style.BreakConstructorInitializersBeforeComma); 157 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 158 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 159 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 160 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 161 IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); 162 IO.mapOptional("ExperimentalAutoDetectBinPacking", 163 Style.ExperimentalAutoDetectBinPacking); 164 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 165 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 166 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 167 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); 168 IO.mapOptional("ObjCSpaceBeforeProtocolList", 169 Style.ObjCSpaceBeforeProtocolList); 170 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", 171 Style.PenaltyBreakBeforeFirstCallParameter); 172 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 173 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 174 IO.mapOptional("PenaltyBreakFirstLessLess", 175 Style.PenaltyBreakFirstLessLess); 176 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 177 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 178 Style.PenaltyReturnTypeOnItsOwnLine); 179 IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); 180 IO.mapOptional("SpacesBeforeTrailingComments", 181 Style.SpacesBeforeTrailingComments); 182 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 183 IO.mapOptional("Standard", Style.Standard); 184 IO.mapOptional("IndentWidth", Style.IndentWidth); 185 IO.mapOptional("TabWidth", Style.TabWidth); 186 IO.mapOptional("UseTab", Style.UseTab); 187 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 188 IO.mapOptional("IndentFunctionDeclarationAfterType", 189 Style.IndentFunctionDeclarationAfterType); 190 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); 191 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); 192 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); 193 IO.mapOptional("SpacesInCStyleCastParentheses", 194 Style.SpacesInCStyleCastParentheses); 195 IO.mapOptional("SpacesInContainerLiterals", 196 Style.SpacesInContainerLiterals); 197 IO.mapOptional("SpaceBeforeAssignmentOperators", 198 Style.SpaceBeforeAssignmentOperators); 199 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); 200 IO.mapOptional("CommentPragmas", Style.CommentPragmas); 201 202 // For backward compatibility. 203 if (!IO.outputting()) { 204 IO.mapOptional("SpaceAfterControlStatementKeyword", 205 Style.SpaceBeforeParens); 206 } 207 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); 208 } 209 }; 210 211 // Allows to read vector<FormatStyle> while keeping default values. 212 // IO.getContext() should contain a pointer to the FormatStyle structure, that 213 // will be used to get default values for missing keys. 214 // If the first element has no Language specified, it will be treated as the 215 // default one for the following elements. 216 template <> struct DocumentListTraits<std::vector<FormatStyle> > { 217 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { 218 return Seq.size(); 219 } 220 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, 221 size_t Index) { 222 if (Index >= Seq.size()) { 223 assert(Index == Seq.size()); 224 FormatStyle Template; 225 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { 226 Template = Seq[0]; 227 } else { 228 Template = *((const FormatStyle*)IO.getContext()); 229 Template.Language = FormatStyle::LK_None; 230 } 231 Seq.resize(Index + 1, Template); 232 } 233 return Seq[Index]; 234 } 235 }; 236 } 237 } 238 239 namespace clang { 240 namespace format { 241 242 FormatStyle getLLVMStyle() { 243 FormatStyle LLVMStyle; 244 LLVMStyle.Language = FormatStyle::LK_Cpp; 245 LLVMStyle.AccessModifierOffset = -2; 246 LLVMStyle.AlignEscapedNewlinesLeft = false; 247 LLVMStyle.AlignTrailingComments = true; 248 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 249 LLVMStyle.AllowShortFunctionsOnASingleLine = true; 250 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 251 LLVMStyle.AllowShortLoopsOnASingleLine = false; 252 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 253 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 254 LLVMStyle.BinPackParameters = true; 255 LLVMStyle.BreakBeforeBinaryOperators = false; 256 LLVMStyle.BreakBeforeTernaryOperators = true; 257 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 258 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 259 LLVMStyle.ColumnLimit = 80; 260 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 261 LLVMStyle.ConstructorInitializerIndentWidth = 4; 262 LLVMStyle.Cpp11BracedListStyle = true; 263 LLVMStyle.DerivePointerBinding = false; 264 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 265 LLVMStyle.IndentCaseLabels = false; 266 LLVMStyle.IndentFunctionDeclarationAfterType = false; 267 LLVMStyle.IndentWidth = 2; 268 LLVMStyle.TabWidth = 8; 269 LLVMStyle.MaxEmptyLinesToKeep = 1; 270 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 271 LLVMStyle.ObjCSpaceAfterProperty = false; 272 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 273 LLVMStyle.PointerBindsToType = false; 274 LLVMStyle.SpacesBeforeTrailingComments = 1; 275 LLVMStyle.Standard = FormatStyle::LS_Cpp11; 276 LLVMStyle.UseTab = FormatStyle::UT_Never; 277 LLVMStyle.SpacesInParentheses = false; 278 LLVMStyle.SpaceInEmptyParentheses = false; 279 LLVMStyle.SpacesInContainerLiterals = true; 280 LLVMStyle.SpacesInCStyleCastParentheses = false; 281 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; 282 LLVMStyle.SpaceBeforeAssignmentOperators = true; 283 LLVMStyle.ContinuationIndentWidth = 4; 284 LLVMStyle.SpacesInAngles = false; 285 LLVMStyle.CommentPragmas = "^ IWYU pragma:"; 286 287 LLVMStyle.PenaltyBreakComment = 300; 288 LLVMStyle.PenaltyBreakFirstLessLess = 120; 289 LLVMStyle.PenaltyBreakString = 1000; 290 LLVMStyle.PenaltyExcessCharacter = 1000000; 291 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 292 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; 293 294 return LLVMStyle; 295 } 296 297 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { 298 FormatStyle GoogleStyle = getLLVMStyle(); 299 GoogleStyle.Language = Language; 300 301 GoogleStyle.AccessModifierOffset = -1; 302 GoogleStyle.AlignEscapedNewlinesLeft = true; 303 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 304 GoogleStyle.AllowShortLoopsOnASingleLine = true; 305 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 306 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 307 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 308 GoogleStyle.DerivePointerBinding = true; 309 GoogleStyle.IndentCaseLabels = true; 310 GoogleStyle.IndentFunctionDeclarationAfterType = true; 311 GoogleStyle.ObjCSpaceAfterProperty = false; 312 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 313 GoogleStyle.PointerBindsToType = true; 314 GoogleStyle.SpacesBeforeTrailingComments = 2; 315 GoogleStyle.Standard = FormatStyle::LS_Auto; 316 317 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 318 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; 319 320 if (Language == FormatStyle::LK_JavaScript) { 321 GoogleStyle.BreakBeforeTernaryOperators = false; 322 GoogleStyle.MaxEmptyLinesToKeep = 2; 323 GoogleStyle.SpacesInContainerLiterals = false; 324 } else if (Language == FormatStyle::LK_Proto) { 325 GoogleStyle.AllowShortFunctionsOnASingleLine = false; 326 } 327 328 return GoogleStyle; 329 } 330 331 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { 332 FormatStyle ChromiumStyle = getGoogleStyle(Language); 333 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 334 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 335 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 336 ChromiumStyle.BinPackParameters = false; 337 ChromiumStyle.DerivePointerBinding = false; 338 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 339 return ChromiumStyle; 340 } 341 342 FormatStyle getMozillaStyle() { 343 FormatStyle MozillaStyle = getLLVMStyle(); 344 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 345 MozillaStyle.Cpp11BracedListStyle = false; 346 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 347 MozillaStyle.DerivePointerBinding = true; 348 MozillaStyle.IndentCaseLabels = true; 349 MozillaStyle.ObjCSpaceAfterProperty = true; 350 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 351 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 352 MozillaStyle.PointerBindsToType = true; 353 MozillaStyle.Standard = FormatStyle::LS_Cpp03; 354 return MozillaStyle; 355 } 356 357 FormatStyle getWebKitStyle() { 358 FormatStyle Style = getLLVMStyle(); 359 Style.AccessModifierOffset = -4; 360 Style.AlignTrailingComments = false; 361 Style.BreakBeforeBinaryOperators = true; 362 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 363 Style.BreakConstructorInitializersBeforeComma = true; 364 Style.Cpp11BracedListStyle = false; 365 Style.ColumnLimit = 0; 366 Style.IndentWidth = 4; 367 Style.NamespaceIndentation = FormatStyle::NI_Inner; 368 Style.ObjCSpaceAfterProperty = true; 369 Style.PointerBindsToType = true; 370 Style.Standard = FormatStyle::LS_Cpp03; 371 return Style; 372 } 373 374 FormatStyle getGNUStyle() { 375 FormatStyle Style = getLLVMStyle(); 376 Style.BreakBeforeBinaryOperators = true; 377 Style.BreakBeforeBraces = FormatStyle::BS_GNU; 378 Style.BreakBeforeTernaryOperators = true; 379 Style.Cpp11BracedListStyle = false; 380 Style.ColumnLimit = 79; 381 Style.SpaceBeforeParens = FormatStyle::SBPO_Always; 382 Style.Standard = FormatStyle::LS_Cpp03; 383 return Style; 384 } 385 386 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, 387 FormatStyle *Style) { 388 if (Name.equals_lower("llvm")) { 389 *Style = getLLVMStyle(); 390 } else if (Name.equals_lower("chromium")) { 391 *Style = getChromiumStyle(Language); 392 } else if (Name.equals_lower("mozilla")) { 393 *Style = getMozillaStyle(); 394 } else if (Name.equals_lower("google")) { 395 *Style = getGoogleStyle(Language); 396 } else if (Name.equals_lower("webkit")) { 397 *Style = getWebKitStyle(); 398 } else if (Name.equals_lower("gnu")) { 399 *Style = getGNUStyle(); 400 } else { 401 return false; 402 } 403 404 Style->Language = Language; 405 return true; 406 } 407 408 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 409 assert(Style); 410 FormatStyle::LanguageKind Language = Style->Language; 411 assert(Language != FormatStyle::LK_None); 412 if (Text.trim().empty()) 413 return llvm::make_error_code(llvm::errc::invalid_argument); 414 415 std::vector<FormatStyle> Styles; 416 llvm::yaml::Input Input(Text); 417 // DocumentListTraits<vector<FormatStyle>> uses the context to get default 418 // values for the fields, keys for which are missing from the configuration. 419 // Mapping also uses the context to get the language to find the correct 420 // base style. 421 Input.setContext(Style); 422 Input >> Styles; 423 if (Input.error()) 424 return Input.error(); 425 426 for (unsigned i = 0; i < Styles.size(); ++i) { 427 // Ensures that only the first configuration can skip the Language option. 428 if (Styles[i].Language == FormatStyle::LK_None && i != 0) 429 return llvm::make_error_code(llvm::errc::invalid_argument); 430 // Ensure that each language is configured at most once. 431 for (unsigned j = 0; j < i; ++j) { 432 if (Styles[i].Language == Styles[j].Language) { 433 DEBUG(llvm::dbgs() 434 << "Duplicate languages in the config file on positions " << j 435 << " and " << i << "\n"); 436 return llvm::make_error_code(llvm::errc::invalid_argument); 437 } 438 } 439 } 440 // Look for a suitable configuration starting from the end, so we can 441 // find the configuration for the specific language first, and the default 442 // configuration (which can only be at slot 0) after it. 443 for (int i = Styles.size() - 1; i >= 0; --i) { 444 if (Styles[i].Language == Language || 445 Styles[i].Language == FormatStyle::LK_None) { 446 *Style = Styles[i]; 447 Style->Language = Language; 448 return llvm::make_error_code(llvm::errc::success); 449 } 450 } 451 return llvm::make_error_code(llvm::errc::not_supported); 452 } 453 454 std::string configurationAsText(const FormatStyle &Style) { 455 std::string Text; 456 llvm::raw_string_ostream Stream(Text); 457 llvm::yaml::Output Output(Stream); 458 // We use the same mapping method for input and output, so we need a non-const 459 // reference here. 460 FormatStyle NonConstStyle = Style; 461 Output << NonConstStyle; 462 return Stream.str(); 463 } 464 465 namespace { 466 467 class NoColumnLimitFormatter { 468 public: 469 NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} 470 471 /// \brief Formats the line starting at \p State, simply keeping all of the 472 /// input's line breaking decisions. 473 void format(unsigned FirstIndent, const AnnotatedLine *Line) { 474 LineState State = 475 Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); 476 while (State.NextToken != NULL) { 477 bool Newline = 478 Indenter->mustBreak(State) || 479 (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); 480 Indenter->addTokenToState(State, Newline, /*DryRun=*/false); 481 } 482 } 483 484 private: 485 ContinuationIndenter *Indenter; 486 }; 487 488 class LineJoiner { 489 public: 490 LineJoiner(const FormatStyle &Style) : Style(Style) {} 491 492 /// \brief Calculates how many lines can be merged into 1 starting at \p I. 493 unsigned 494 tryFitMultipleLinesInOne(unsigned Indent, 495 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 496 SmallVectorImpl<AnnotatedLine *>::const_iterator E) { 497 // We can never merge stuff if there are trailing line comments. 498 const AnnotatedLine *TheLine = *I; 499 if (TheLine->Last->Type == TT_LineComment) 500 return 0; 501 502 if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) 503 return 0; 504 505 unsigned Limit = 506 Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; 507 // If we already exceed the column limit, we set 'Limit' to 0. The different 508 // tryMerge..() functions can then decide whether to still do merging. 509 Limit = TheLine->Last->TotalLength > Limit 510 ? 0 511 : Limit - TheLine->Last->TotalLength; 512 513 if (I + 1 == E || I[1]->Type == LT_Invalid) 514 return 0; 515 516 if (TheLine->Last->Type == TT_FunctionLBrace && 517 TheLine->First != TheLine->Last) { 518 return Style.AllowShortFunctionsOnASingleLine 519 ? tryMergeSimpleBlock(I, E, Limit) 520 : 0; 521 } 522 if (TheLine->Last->is(tok::l_brace)) { 523 return Style.BreakBeforeBraces == FormatStyle::BS_Attach 524 ? tryMergeSimpleBlock(I, E, Limit) 525 : 0; 526 } 527 if (I[1]->First->Type == TT_FunctionLBrace && 528 Style.BreakBeforeBraces != FormatStyle::BS_Attach) { 529 // Check for Limit <= 2 to account for the " {". 530 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) 531 return 0; 532 Limit -= 2; 533 534 unsigned MergedLines = 0; 535 if (Style.AllowShortFunctionsOnASingleLine) { 536 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); 537 // If we managed to merge the block, count the function header, which is 538 // on a separate line. 539 if (MergedLines > 0) 540 ++MergedLines; 541 } 542 return MergedLines; 543 } 544 if (TheLine->First->is(tok::kw_if)) { 545 return Style.AllowShortIfStatementsOnASingleLine 546 ? tryMergeSimpleControlStatement(I, E, Limit) 547 : 0; 548 } 549 if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { 550 return Style.AllowShortLoopsOnASingleLine 551 ? tryMergeSimpleControlStatement(I, E, Limit) 552 : 0; 553 } 554 if (TheLine->InPPDirective && 555 (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { 556 return tryMergeSimplePPDirective(I, E, Limit); 557 } 558 return 0; 559 } 560 561 private: 562 unsigned 563 tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 564 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 565 unsigned Limit) { 566 if (Limit == 0) 567 return 0; 568 if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) 569 return 0; 570 if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) 571 return 0; 572 if (1 + I[1]->Last->TotalLength > Limit) 573 return 0; 574 return 1; 575 } 576 577 unsigned tryMergeSimpleControlStatement( 578 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 579 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { 580 if (Limit == 0) 581 return 0; 582 if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman || 583 Style.BreakBeforeBraces == FormatStyle::BS_GNU) && 584 I[1]->First->is(tok::l_brace)) 585 return 0; 586 if (I[1]->InPPDirective != (*I)->InPPDirective || 587 (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) 588 return 0; 589 Limit = limitConsideringMacros(I + 1, E, Limit); 590 AnnotatedLine &Line = **I; 591 if (Line.Last->isNot(tok::r_paren)) 592 return 0; 593 if (1 + I[1]->Last->TotalLength > Limit) 594 return 0; 595 if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 596 tok::kw_while) || 597 I[1]->First->Type == TT_LineComment) 598 return 0; 599 // Only inline simple if's (no nested if or else). 600 if (I + 2 != E && Line.First->is(tok::kw_if) && 601 I[2]->First->is(tok::kw_else)) 602 return 0; 603 return 1; 604 } 605 606 unsigned 607 tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 608 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 609 unsigned Limit) { 610 // First, check that the current line allows merging. This is the case if 611 // we're not in a control flow statement and the last token is an opening 612 // brace. 613 AnnotatedLine &Line = **I; 614 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 615 tok::kw_else, tok::kw_try, tok::kw_catch, 616 tok::kw_for, 617 // This gets rid of all ObjC @ keywords and methods. 618 tok::at, tok::minus, tok::plus)) 619 return 0; 620 621 FormatToken *Tok = I[1]->First; 622 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 623 (Tok->getNextNonComment() == NULL || 624 Tok->getNextNonComment()->is(tok::semi))) { 625 // We merge empty blocks even if the line exceeds the column limit. 626 Tok->SpacesRequiredBefore = 0; 627 Tok->CanBreakBefore = true; 628 return 1; 629 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 630 // Check that we still have three lines and they fit into the limit. 631 if (I + 2 == E || I[2]->Type == LT_Invalid) 632 return 0; 633 Limit = limitConsideringMacros(I + 2, E, Limit); 634 635 if (!nextTwoLinesFitInto(I, Limit)) 636 return 0; 637 638 // Second, check that the next line does not contain any braces - if it 639 // does, readability declines when putting it into a single line. 640 if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore) 641 return 0; 642 do { 643 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 644 return 0; 645 Tok = Tok->Next; 646 } while (Tok != NULL); 647 648 // Last, check that the third line contains a single closing brace. 649 Tok = I[2]->First; 650 if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || 651 Tok->MustBreakBefore) 652 return 0; 653 654 return 2; 655 } 656 return 0; 657 } 658 659 /// Returns the modified column limit for \p I if it is inside a macro and 660 /// needs a trailing '\'. 661 unsigned 662 limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 663 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 664 unsigned Limit) { 665 if (I[0]->InPPDirective && I + 1 != E && 666 !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { 667 return Limit < 2 ? 0 : Limit - 2; 668 } 669 return Limit; 670 } 671 672 bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 673 unsigned Limit) { 674 return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; 675 } 676 677 bool containsMustBreak(const AnnotatedLine *Line) { 678 for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 679 if (Tok->MustBreakBefore) 680 return true; 681 } 682 return false; 683 } 684 685 const FormatStyle &Style; 686 }; 687 688 class UnwrappedLineFormatter { 689 public: 690 UnwrappedLineFormatter(ContinuationIndenter *Indenter, 691 WhitespaceManager *Whitespaces, 692 const FormatStyle &Style) 693 : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), 694 Joiner(Style) {} 695 696 unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, 697 int AdditionalIndent = 0, bool FixBadIndentation = false) { 698 assert(!Lines.empty()); 699 unsigned Penalty = 0; 700 std::vector<int> IndentForLevel; 701 for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) 702 IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); 703 const AnnotatedLine *PreviousLine = NULL; 704 for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), 705 E = Lines.end(); 706 I != E; ++I) { 707 const AnnotatedLine &TheLine = **I; 708 const FormatToken *FirstTok = TheLine.First; 709 int Offset = getIndentOffset(*FirstTok); 710 711 // Determine indent and try to merge multiple unwrapped lines. 712 unsigned Indent; 713 if (TheLine.InPPDirective) { 714 Indent = TheLine.Level * Style.IndentWidth; 715 } else { 716 while (IndentForLevel.size() <= TheLine.Level) 717 IndentForLevel.push_back(-1); 718 IndentForLevel.resize(TheLine.Level + 1); 719 Indent = getIndent(IndentForLevel, TheLine.Level); 720 } 721 unsigned LevelIndent = Indent; 722 if (static_cast<int>(Indent) + Offset >= 0) 723 Indent += Offset; 724 725 // Merge multiple lines if possible. 726 unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); 727 if (MergedLines > 0 && Style.ColumnLimit == 0) { 728 // Disallow line merging if there is a break at the start of one of the 729 // input lines. 730 for (unsigned i = 0; i < MergedLines; ++i) { 731 if (I[i + 1]->First->NewlinesBefore > 0) 732 MergedLines = 0; 733 } 734 } 735 if (!DryRun) { 736 for (unsigned i = 0; i < MergedLines; ++i) { 737 join(*I[i], *I[i + 1]); 738 } 739 } 740 I += MergedLines; 741 742 bool FixIndentation = 743 FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); 744 if (TheLine.First->is(tok::eof)) { 745 if (PreviousLine && PreviousLine->Affected && !DryRun) { 746 // Remove the file's trailing whitespace. 747 unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); 748 Whitespaces->replaceWhitespace(*TheLine.First, Newlines, 749 /*IndentLevel=*/0, /*Spaces=*/0, 750 /*TargetColumn=*/0); 751 } 752 } else if (TheLine.Type != LT_Invalid && 753 (TheLine.Affected || FixIndentation)) { 754 if (FirstTok->WhitespaceRange.isValid()) { 755 if (!DryRun) 756 formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, 757 Indent, TheLine.InPPDirective); 758 } else { 759 Indent = LevelIndent = FirstTok->OriginalColumn; 760 } 761 762 // If everything fits on a single line, just put it there. 763 unsigned ColumnLimit = Style.ColumnLimit; 764 if (I + 1 != E) { 765 AnnotatedLine *NextLine = I[1]; 766 if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) 767 ColumnLimit = getColumnLimit(TheLine.InPPDirective); 768 } 769 770 if (TheLine.Last->TotalLength + Indent <= ColumnLimit) { 771 LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); 772 while (State.NextToken != NULL) 773 Indenter->addTokenToState(State, /*Newline=*/false, DryRun); 774 } else if (Style.ColumnLimit == 0) { 775 // FIXME: Implement nested blocks for ColumnLimit = 0. 776 NoColumnLimitFormatter Formatter(Indenter); 777 if (!DryRun) 778 Formatter.format(Indent, &TheLine); 779 } else { 780 Penalty += format(TheLine, Indent, DryRun); 781 } 782 783 if (!TheLine.InPPDirective) 784 IndentForLevel[TheLine.Level] = LevelIndent; 785 } else if (TheLine.ChildrenAffected) { 786 format(TheLine.Children, DryRun); 787 } else { 788 // Format the first token if necessary, and notify the WhitespaceManager 789 // about the unchanged whitespace. 790 for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) { 791 if (Tok == TheLine.First && 792 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 793 unsigned LevelIndent = Tok->OriginalColumn; 794 if (!DryRun) { 795 // Remove trailing whitespace of the previous line. 796 if ((PreviousLine && PreviousLine->Affected) || 797 TheLine.LeadingEmptyLinesAffected) { 798 formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, 799 TheLine.InPPDirective); 800 } else { 801 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); 802 } 803 } 804 805 if (static_cast<int>(LevelIndent) - Offset >= 0) 806 LevelIndent -= Offset; 807 if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) 808 IndentForLevel[TheLine.Level] = LevelIndent; 809 } else if (!DryRun) { 810 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); 811 } 812 } 813 } 814 if (!DryRun) { 815 for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) { 816 Tok->Finalized = true; 817 } 818 } 819 PreviousLine = *I; 820 } 821 return Penalty; 822 } 823 824 private: 825 /// \brief Formats an \c AnnotatedLine and returns the penalty. 826 /// 827 /// If \p DryRun is \c false, directly applies the changes. 828 unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, 829 bool DryRun) { 830 LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); 831 832 // If the ObjC method declaration does not fit on a line, we should format 833 // it with one arg per line. 834 if (State.Line->Type == LT_ObjCMethodDecl) 835 State.Stack.back().BreakBeforeParameter = true; 836 837 // Find best solution in solution space. 838 return analyzeSolutionSpace(State, DryRun); 839 } 840 841 /// \brief An edge in the solution space from \c Previous->State to \c State, 842 /// inserting a newline dependent on the \c NewLine. 843 struct StateNode { 844 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 845 : State(State), NewLine(NewLine), Previous(Previous) {} 846 LineState State; 847 bool NewLine; 848 StateNode *Previous; 849 }; 850 851 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 852 /// 853 /// In case of equal penalties, we want to prefer states that were inserted 854 /// first. During state generation we make sure that we insert states first 855 /// that break the line as late as possible. 856 typedef std::pair<unsigned, unsigned> OrderedPenalty; 857 858 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 859 /// \c State has the given \c OrderedPenalty. 860 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 861 862 /// \brief The BFS queue type. 863 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 864 std::greater<QueueItem> > QueueType; 865 866 /// \brief Get the offset of the line relatively to the level. 867 /// 868 /// For example, 'public:' labels in classes are offset by 1 or 2 869 /// characters to the left from their level. 870 int getIndentOffset(const FormatToken &RootToken) { 871 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 872 return Style.AccessModifierOffset; 873 return 0; 874 } 875 876 /// \brief Add a new line and the required indent before the first Token 877 /// of the \c UnwrappedLine if there was no structural parsing error. 878 void formatFirstToken(FormatToken &RootToken, 879 const AnnotatedLine *PreviousLine, unsigned IndentLevel, 880 unsigned Indent, bool InPPDirective) { 881 unsigned Newlines = 882 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 883 // Remove empty lines before "}" where applicable. 884 if (RootToken.is(tok::r_brace) && 885 (!RootToken.Next || 886 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 887 Newlines = std::min(Newlines, 1u); 888 if (Newlines == 0 && !RootToken.IsFirst) 889 Newlines = 1; 890 891 // Insert extra new line before access specifiers. 892 if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && 893 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 894 ++Newlines; 895 896 // Remove empty lines after access specifiers. 897 if (PreviousLine && PreviousLine->First->isAccessSpecifier()) 898 Newlines = std::min(1u, Newlines); 899 900 Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, 901 Indent, InPPDirective && 902 !RootToken.HasUnescapedNewline); 903 } 904 905 /// \brief Get the indent of \p Level from \p IndentForLevel. 906 /// 907 /// \p IndentForLevel must contain the indent for the level \c l 908 /// at \p IndentForLevel[l], or a value < 0 if the indent for 909 /// that level is unknown. 910 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 911 if (IndentForLevel[Level] != -1) 912 return IndentForLevel[Level]; 913 if (Level == 0) 914 return 0; 915 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 916 } 917 918 void join(AnnotatedLine &A, const AnnotatedLine &B) { 919 assert(!A.Last->Next); 920 assert(!B.First->Previous); 921 if (B.Affected) 922 A.Affected = true; 923 A.Last->Next = B.First; 924 B.First->Previous = A.Last; 925 B.First->CanBreakBefore = true; 926 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 927 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 928 Tok->TotalLength += LengthA; 929 A.Last = Tok; 930 } 931 } 932 933 unsigned getColumnLimit(bool InPPDirective) const { 934 // In preprocessor directives reserve two chars for trailing " \" 935 return Style.ColumnLimit - (InPPDirective ? 2 : 0); 936 } 937 938 /// \brief Analyze the entire solution space starting from \p InitialState. 939 /// 940 /// This implements a variant of Dijkstra's algorithm on the graph that spans 941 /// the solution space (\c LineStates are the nodes). The algorithm tries to 942 /// find the shortest path (the one with lowest penalty) from \p InitialState 943 /// to a state where all tokens are placed. Returns the penalty. 944 /// 945 /// If \p DryRun is \c false, directly applies the changes. 946 unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) { 947 std::set<LineState> Seen; 948 949 // Increasing count of \c StateNode items we have created. This is used to 950 // create a deterministic order independent of the container. 951 unsigned Count = 0; 952 QueueType Queue; 953 954 // Insert start element into queue. 955 StateNode *Node = 956 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 957 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 958 ++Count; 959 960 unsigned Penalty = 0; 961 962 // While not empty, take first element and follow edges. 963 while (!Queue.empty()) { 964 Penalty = Queue.top().first.first; 965 StateNode *Node = Queue.top().second; 966 if (Node->State.NextToken == NULL) { 967 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 968 break; 969 } 970 Queue.pop(); 971 972 // Cut off the analysis of certain solutions if the analysis gets too 973 // complex. See description of IgnoreStackForComparison. 974 if (Count > 10000) 975 Node->State.IgnoreStackForComparison = true; 976 977 if (!Seen.insert(Node->State).second) 978 // State already examined with lower penalty. 979 continue; 980 981 FormatDecision LastFormat = Node->State.NextToken->Decision; 982 if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) 983 addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); 984 if (LastFormat == FD_Unformatted || LastFormat == FD_Break) 985 addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); 986 } 987 988 if (Queue.empty()) { 989 // We were unable to find a solution, do nothing. 990 // FIXME: Add diagnostic? 991 DEBUG(llvm::dbgs() << "Could not find a solution.\n"); 992 return 0; 993 } 994 995 // Reconstruct the solution. 996 if (!DryRun) 997 reconstructPath(InitialState, Queue.top().second); 998 999 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 1000 DEBUG(llvm::dbgs() << "---\n"); 1001 1002 return Penalty; 1003 } 1004 1005 void reconstructPath(LineState &State, StateNode *Current) { 1006 std::deque<StateNode *> Path; 1007 // We do not need a break before the initial token. 1008 while (Current->Previous) { 1009 Path.push_front(Current); 1010 Current = Current->Previous; 1011 } 1012 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 1013 I != E; ++I) { 1014 unsigned Penalty = 0; 1015 formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); 1016 Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); 1017 1018 DEBUG({ 1019 if ((*I)->NewLine) { 1020 llvm::dbgs() << "Penalty for placing " 1021 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1022 << Penalty << "\n"; 1023 } 1024 }); 1025 } 1026 } 1027 1028 /// \brief Add the following state to the analysis queue \c Queue. 1029 /// 1030 /// Assume the current state is \p PreviousNode and has been reached with a 1031 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1032 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1033 bool NewLine, unsigned *Count, QueueType *Queue) { 1034 if (NewLine && !Indenter->canBreak(PreviousNode->State)) 1035 return; 1036 if (!NewLine && Indenter->mustBreak(PreviousNode->State)) 1037 return; 1038 1039 StateNode *Node = new (Allocator.Allocate()) 1040 StateNode(PreviousNode->State, NewLine, PreviousNode); 1041 if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) 1042 return; 1043 1044 Penalty += Indenter->addTokenToState(Node->State, NewLine, true); 1045 1046 Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); 1047 ++(*Count); 1048 } 1049 1050 /// \brief If the \p State's next token is an r_brace closing a nested block, 1051 /// format the nested block before it. 1052 /// 1053 /// Returns \c true if all children could be placed successfully and adapts 1054 /// \p Penalty as well as \p State. If \p DryRun is false, also directly 1055 /// creates changes using \c Whitespaces. 1056 /// 1057 /// The crucial idea here is that children always get formatted upon 1058 /// encountering the closing brace right after the nested block. Now, if we 1059 /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is 1060 /// \c false), the entire block has to be kept on the same line (which is only 1061 /// possible if it fits on the line, only contains a single statement, etc. 1062 /// 1063 /// If \p NewLine is true, we format the nested block on separate lines, i.e. 1064 /// break after the "{", format all lines with correct indentation and the put 1065 /// the closing "}" on yet another new line. 1066 /// 1067 /// This enables us to keep the simple structure of the 1068 /// \c UnwrappedLineFormatter, where we only have two options for each token: 1069 /// break or don't break. 1070 bool formatChildren(LineState &State, bool NewLine, bool DryRun, 1071 unsigned &Penalty) { 1072 FormatToken &Previous = *State.NextToken->Previous; 1073 const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); 1074 if (!LBrace || LBrace->isNot(tok::l_brace) || 1075 LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) 1076 // The previous token does not open a block. Nothing to do. We don't 1077 // assert so that we can simply call this function for all tokens. 1078 return true; 1079 1080 if (NewLine) { 1081 int AdditionalIndent = State.Stack.back().Indent - 1082 Previous.Children[0]->Level * Style.IndentWidth; 1083 Penalty += format(Previous.Children, DryRun, AdditionalIndent, 1084 /*FixBadIndentation=*/true); 1085 return true; 1086 } 1087 1088 // Cannot merge multiple statements into a single line. 1089 if (Previous.Children.size() > 1) 1090 return false; 1091 1092 // We can't put the closing "}" on a line with a trailing comment. 1093 if (Previous.Children[0]->Last->isTrailingComment()) 1094 return false; 1095 1096 if (!DryRun) { 1097 Whitespaces->replaceWhitespace( 1098 *Previous.Children[0]->First, 1099 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, 1100 /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); 1101 } 1102 Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); 1103 1104 State.Column += 1 + Previous.Children[0]->Last->TotalLength; 1105 return true; 1106 } 1107 1108 ContinuationIndenter *Indenter; 1109 WhitespaceManager *Whitespaces; 1110 FormatStyle Style; 1111 LineJoiner Joiner; 1112 1113 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1114 }; 1115 1116 class FormatTokenLexer { 1117 public: 1118 FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style, 1119 encoding::Encoding Encoding) 1120 : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0), 1121 TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style), 1122 IdentTable(getFormattingLangOpts()), Encoding(Encoding) { 1123 Lex.SetKeepWhitespaceMode(true); 1124 } 1125 1126 ArrayRef<FormatToken *> lex() { 1127 assert(Tokens.empty()); 1128 do { 1129 Tokens.push_back(getNextToken()); 1130 tryMergePreviousTokens(); 1131 } while (Tokens.back()->Tok.isNot(tok::eof)); 1132 return Tokens; 1133 } 1134 1135 IdentifierTable &getIdentTable() { return IdentTable; } 1136 1137 private: 1138 void tryMergePreviousTokens() { 1139 if (tryMerge_TMacro()) 1140 return; 1141 1142 if (Style.Language == FormatStyle::LK_JavaScript) { 1143 static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; 1144 static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; 1145 static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater, 1146 tok::greaterequal }; 1147 // FIXME: We probably need to change token type to mimic operator with the 1148 // correct priority. 1149 if (tryMergeTokens(JSIdentity)) 1150 return; 1151 if (tryMergeTokens(JSNotIdentity)) 1152 return; 1153 if (tryMergeTokens(JSShiftEqual)) 1154 return; 1155 } 1156 } 1157 1158 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) { 1159 if (Tokens.size() < Kinds.size()) 1160 return false; 1161 1162 SmallVectorImpl<FormatToken *>::const_iterator First = 1163 Tokens.end() - Kinds.size(); 1164 if (!First[0]->is(Kinds[0])) 1165 return false; 1166 unsigned AddLength = 0; 1167 for (unsigned i = 1; i < Kinds.size(); ++i) { 1168 if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != 1169 First[i]->WhitespaceRange.getEnd()) 1170 return false; 1171 AddLength += First[i]->TokenText.size(); 1172 } 1173 Tokens.resize(Tokens.size() - Kinds.size() + 1); 1174 First[0]->TokenText = StringRef(First[0]->TokenText.data(), 1175 First[0]->TokenText.size() + AddLength); 1176 First[0]->ColumnWidth += AddLength; 1177 return true; 1178 } 1179 1180 bool tryMerge_TMacro() { 1181 if (Tokens.size() < 4) 1182 return false; 1183 FormatToken *Last = Tokens.back(); 1184 if (!Last->is(tok::r_paren)) 1185 return false; 1186 1187 FormatToken *String = Tokens[Tokens.size() - 2]; 1188 if (!String->is(tok::string_literal) || String->IsMultiline) 1189 return false; 1190 1191 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) 1192 return false; 1193 1194 FormatToken *Macro = Tokens[Tokens.size() - 4]; 1195 if (Macro->TokenText != "_T") 1196 return false; 1197 1198 const char *Start = Macro->TokenText.data(); 1199 const char *End = Last->TokenText.data() + Last->TokenText.size(); 1200 String->TokenText = StringRef(Start, End - Start); 1201 String->IsFirst = Macro->IsFirst; 1202 String->LastNewlineOffset = Macro->LastNewlineOffset; 1203 String->WhitespaceRange = Macro->WhitespaceRange; 1204 String->OriginalColumn = Macro->OriginalColumn; 1205 String->ColumnWidth = encoding::columnWidthWithTabs( 1206 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); 1207 1208 Tokens.pop_back(); 1209 Tokens.pop_back(); 1210 Tokens.pop_back(); 1211 Tokens.back() = String; 1212 return true; 1213 } 1214 1215 FormatToken *getNextToken() { 1216 if (GreaterStashed) { 1217 // Create a synthesized second '>' token. 1218 // FIXME: Increment Column and set OriginalColumn. 1219 Token Greater = FormatTok->Tok; 1220 FormatTok = new (Allocator.Allocate()) FormatToken; 1221 FormatTok->Tok = Greater; 1222 SourceLocation GreaterLocation = 1223 FormatTok->Tok.getLocation().getLocWithOffset(1); 1224 FormatTok->WhitespaceRange = 1225 SourceRange(GreaterLocation, GreaterLocation); 1226 FormatTok->TokenText = ">"; 1227 FormatTok->ColumnWidth = 1; 1228 GreaterStashed = false; 1229 return FormatTok; 1230 } 1231 1232 FormatTok = new (Allocator.Allocate()) FormatToken; 1233 readRawToken(*FormatTok); 1234 SourceLocation WhitespaceStart = 1235 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1236 FormatTok->IsFirst = IsFirstToken; 1237 IsFirstToken = false; 1238 1239 // Consume and record whitespace until we find a significant token. 1240 unsigned WhitespaceLength = TrailingWhitespace; 1241 while (FormatTok->Tok.is(tok::unknown)) { 1242 for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) { 1243 switch (FormatTok->TokenText[i]) { 1244 case '\n': 1245 ++FormatTok->NewlinesBefore; 1246 // FIXME: This is technically incorrect, as it could also 1247 // be a literal backslash at the end of the line. 1248 if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' && 1249 (FormatTok->TokenText[i - 1] != '\r' || i == 1 || 1250 FormatTok->TokenText[i - 2] != '\\'))) 1251 FormatTok->HasUnescapedNewline = true; 1252 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; 1253 Column = 0; 1254 break; 1255 case '\r': 1256 case '\f': 1257 case '\v': 1258 Column = 0; 1259 break; 1260 case ' ': 1261 ++Column; 1262 break; 1263 case '\t': 1264 Column += Style.TabWidth - Column % Style.TabWidth; 1265 break; 1266 case '\\': 1267 ++Column; 1268 if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && 1269 FormatTok->TokenText[i + 1] != '\n')) 1270 FormatTok->Type = TT_ImplicitStringLiteral; 1271 break; 1272 default: 1273 FormatTok->Type = TT_ImplicitStringLiteral; 1274 ++Column; 1275 break; 1276 } 1277 } 1278 1279 if (FormatTok->Type == TT_ImplicitStringLiteral) 1280 break; 1281 WhitespaceLength += FormatTok->Tok.getLength(); 1282 1283 readRawToken(*FormatTok); 1284 } 1285 1286 // In case the token starts with escaped newlines, we want to 1287 // take them into account as whitespace - this pattern is quite frequent 1288 // in macro definitions. 1289 // FIXME: Add a more explicit test. 1290 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1291 FormatTok->TokenText[1] == '\n') { 1292 // FIXME: ++FormatTok->NewlinesBefore is missing... 1293 WhitespaceLength += 2; 1294 Column = 0; 1295 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1296 } 1297 1298 FormatTok->WhitespaceRange = SourceRange( 1299 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1300 1301 FormatTok->OriginalColumn = Column; 1302 1303 TrailingWhitespace = 0; 1304 if (FormatTok->Tok.is(tok::comment)) { 1305 // FIXME: Add the trimmed whitespace to Column. 1306 StringRef UntrimmedText = FormatTok->TokenText; 1307 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); 1308 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1309 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1310 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1311 FormatTok->Tok.setIdentifierInfo(&Info); 1312 FormatTok->Tok.setKind(Info.getTokenID()); 1313 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1314 FormatTok->Tok.setKind(tok::greater); 1315 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1316 GreaterStashed = true; 1317 } 1318 1319 // Now FormatTok is the next non-whitespace token. 1320 1321 StringRef Text = FormatTok->TokenText; 1322 size_t FirstNewlinePos = Text.find('\n'); 1323 if (FirstNewlinePos == StringRef::npos) { 1324 // FIXME: ColumnWidth actually depends on the start column, we need to 1325 // take this into account when the token is moved. 1326 FormatTok->ColumnWidth = 1327 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); 1328 Column += FormatTok->ColumnWidth; 1329 } else { 1330 FormatTok->IsMultiline = true; 1331 // FIXME: ColumnWidth actually depends on the start column, we need to 1332 // take this into account when the token is moved. 1333 FormatTok->ColumnWidth = encoding::columnWidthWithTabs( 1334 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); 1335 1336 // The last line of the token always starts in column 0. 1337 // Thus, the length can be precomputed even in the presence of tabs. 1338 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( 1339 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, 1340 Encoding); 1341 Column = FormatTok->LastLineColumnWidth; 1342 } 1343 1344 return FormatTok; 1345 } 1346 1347 FormatToken *FormatTok; 1348 bool IsFirstToken; 1349 bool GreaterStashed; 1350 unsigned Column; 1351 unsigned TrailingWhitespace; 1352 Lexer &Lex; 1353 SourceManager &SourceMgr; 1354 FormatStyle &Style; 1355 IdentifierTable IdentTable; 1356 encoding::Encoding Encoding; 1357 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1358 SmallVector<FormatToken *, 16> Tokens; 1359 1360 void readRawToken(FormatToken &Tok) { 1361 Lex.LexFromRawLexer(Tok.Tok); 1362 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1363 Tok.Tok.getLength()); 1364 // For formatting, treat unterminated string literals like normal string 1365 // literals. 1366 if (Tok.is(tok::unknown)) { 1367 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { 1368 Tok.Tok.setKind(tok::string_literal); 1369 Tok.IsUnterminatedLiteral = true; 1370 } else if (Style.Language == FormatStyle::LK_JavaScript && 1371 Tok.TokenText == "''") { 1372 Tok.Tok.setKind(tok::char_constant); 1373 } 1374 } 1375 } 1376 }; 1377 1378 static StringRef getLanguageName(FormatStyle::LanguageKind Language) { 1379 switch (Language) { 1380 case FormatStyle::LK_Cpp: 1381 return "C++"; 1382 case FormatStyle::LK_JavaScript: 1383 return "JavaScript"; 1384 case FormatStyle::LK_Proto: 1385 return "Proto"; 1386 default: 1387 return "Unknown"; 1388 } 1389 } 1390 1391 class Formatter : public UnwrappedLineConsumer { 1392 public: 1393 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1394 const std::vector<CharSourceRange> &Ranges) 1395 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1396 Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())), 1397 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1), 1398 Encoding(encoding::detectEncoding(Lex.getBuffer())) { 1399 DEBUG(llvm::dbgs() << "File encoding: " 1400 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1401 : "unknown") 1402 << "\n"); 1403 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 1404 << "\n"); 1405 } 1406 1407 tooling::Replacements format() { 1408 tooling::Replacements Result; 1409 FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding); 1410 1411 UnwrappedLineParser Parser(Style, Tokens.lex(), *this); 1412 bool StructuralError = Parser.parse(); 1413 assert(UnwrappedLines.rbegin()->empty()); 1414 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; 1415 ++Run) { 1416 DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1417 SmallVector<AnnotatedLine *, 16> AnnotatedLines; 1418 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { 1419 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); 1420 } 1421 tooling::Replacements RunResult = 1422 format(AnnotatedLines, StructuralError, Tokens); 1423 DEBUG({ 1424 llvm::dbgs() << "Replacements for run " << Run << ":\n"; 1425 for (tooling::Replacements::iterator I = RunResult.begin(), 1426 E = RunResult.end(); 1427 I != E; ++I) { 1428 llvm::dbgs() << I->toString() << "\n"; 1429 } 1430 }); 1431 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1432 delete AnnotatedLines[i]; 1433 } 1434 Result.insert(RunResult.begin(), RunResult.end()); 1435 Whitespaces.reset(); 1436 } 1437 return Result; 1438 } 1439 1440 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 1441 bool StructuralError, FormatTokenLexer &Tokens) { 1442 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); 1443 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1444 Annotator.annotate(*AnnotatedLines[i]); 1445 } 1446 deriveLocalStyle(AnnotatedLines); 1447 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1448 Annotator.calculateFormattingInformation(*AnnotatedLines[i]); 1449 } 1450 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); 1451 1452 Annotator.setCommentLineLevels(AnnotatedLines); 1453 ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding, 1454 BinPackInconclusiveFunctions); 1455 UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); 1456 Formatter.format(AnnotatedLines, /*DryRun=*/false); 1457 return Whitespaces.generateReplacements(); 1458 } 1459 1460 private: 1461 // Determines which lines are affected by the SourceRanges given as input. 1462 // Returns \c true if at least one line between I and E or one of their 1463 // children is affected. 1464 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, 1465 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1466 bool SomeLineAffected = false; 1467 const AnnotatedLine *PreviousLine = NULL; 1468 while (I != E) { 1469 AnnotatedLine *Line = *I; 1470 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); 1471 1472 // If a line is part of a preprocessor directive, it needs to be formatted 1473 // if any token within the directive is affected. 1474 if (Line->InPPDirective) { 1475 FormatToken *Last = Line->Last; 1476 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; 1477 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { 1478 Last = (*PPEnd)->Last; 1479 ++PPEnd; 1480 } 1481 1482 if (affectsTokenRange(*Line->First, *Last, 1483 /*IncludeLeadingNewlines=*/false)) { 1484 SomeLineAffected = true; 1485 markAllAsAffected(I, PPEnd); 1486 } 1487 I = PPEnd; 1488 continue; 1489 } 1490 1491 if (nonPPLineAffected(Line, PreviousLine)) 1492 SomeLineAffected = true; 1493 1494 PreviousLine = Line; 1495 ++I; 1496 } 1497 return SomeLineAffected; 1498 } 1499 1500 // Determines whether 'Line' is affected by the SourceRanges given as input. 1501 // Returns \c true if line or one if its children is affected. 1502 bool nonPPLineAffected(AnnotatedLine *Line, 1503 const AnnotatedLine *PreviousLine) { 1504 bool SomeLineAffected = false; 1505 Line->ChildrenAffected = 1506 computeAffectedLines(Line->Children.begin(), Line->Children.end()); 1507 if (Line->ChildrenAffected) 1508 SomeLineAffected = true; 1509 1510 // Stores whether one of the line's tokens is directly affected. 1511 bool SomeTokenAffected = false; 1512 // Stores whether we need to look at the leading newlines of the next token 1513 // in order to determine whether it was affected. 1514 bool IncludeLeadingNewlines = false; 1515 1516 // Stores whether the first child line of any of this line's tokens is 1517 // affected. 1518 bool SomeFirstChildAffected = false; 1519 1520 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { 1521 // Determine whether 'Tok' was affected. 1522 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) 1523 SomeTokenAffected = true; 1524 1525 // Determine whether the first child of 'Tok' was affected. 1526 if (!Tok->Children.empty() && Tok->Children.front()->Affected) 1527 SomeFirstChildAffected = true; 1528 1529 IncludeLeadingNewlines = Tok->Children.empty(); 1530 } 1531 1532 // Was this line moved, i.e. has it previously been on the same line as an 1533 // affected line? 1534 bool LineMoved = PreviousLine && PreviousLine->Affected && 1535 Line->First->NewlinesBefore == 0; 1536 1537 bool IsContinuedComment = Line->First->is(tok::comment) && 1538 Line->First->Next == NULL && 1539 Line->First->NewlinesBefore < 2 && PreviousLine && 1540 PreviousLine->Affected && 1541 PreviousLine->Last->is(tok::comment); 1542 1543 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || 1544 IsContinuedComment) { 1545 Line->Affected = true; 1546 SomeLineAffected = true; 1547 } 1548 return SomeLineAffected; 1549 } 1550 1551 // Marks all lines between I and E as well as all their children as affected. 1552 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, 1553 SmallVectorImpl<AnnotatedLine *>::iterator E) { 1554 while (I != E) { 1555 (*I)->Affected = true; 1556 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); 1557 ++I; 1558 } 1559 } 1560 1561 // Returns true if the range from 'First' to 'Last' intersects with one of the 1562 // input ranges. 1563 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, 1564 bool IncludeLeadingNewlines) { 1565 SourceLocation Start = First.WhitespaceRange.getBegin(); 1566 if (!IncludeLeadingNewlines) 1567 Start = Start.getLocWithOffset(First.LastNewlineOffset); 1568 SourceLocation End = Last.getStartOfNonWhitespace(); 1569 if (Last.TokenText.size() > 0) 1570 End = End.getLocWithOffset(Last.TokenText.size() - 1); 1571 CharSourceRange Range = CharSourceRange::getCharRange(Start, End); 1572 return affectsCharSourceRange(Range); 1573 } 1574 1575 // Returns true if one of the input ranges intersect the leading empty lines 1576 // before 'Tok'. 1577 bool affectsLeadingEmptyLines(const FormatToken &Tok) { 1578 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( 1579 Tok.WhitespaceRange.getBegin(), 1580 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); 1581 return affectsCharSourceRange(EmptyLineRange); 1582 } 1583 1584 // Returns true if 'Range' intersects with one of the input ranges. 1585 bool affectsCharSourceRange(const CharSourceRange &Range) { 1586 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), 1587 E = Ranges.end(); 1588 I != E; ++I) { 1589 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && 1590 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 static bool inputUsesCRLF(StringRef Text) { 1597 return Text.count('\r') * 2 > Text.count('\n'); 1598 } 1599 1600 void 1601 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 1602 unsigned CountBoundToVariable = 0; 1603 unsigned CountBoundToType = 0; 1604 bool HasCpp03IncompatibleFormat = false; 1605 bool HasBinPackedFunction = false; 1606 bool HasOnePerLineFunction = false; 1607 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1608 if (!AnnotatedLines[i]->First->Next) 1609 continue; 1610 FormatToken *Tok = AnnotatedLines[i]->First->Next; 1611 while (Tok->Next) { 1612 if (Tok->Type == TT_PointerOrReference) { 1613 bool SpacesBefore = 1614 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1615 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1616 Tok->Next->WhitespaceRange.getEnd(); 1617 if (SpacesBefore && !SpacesAfter) 1618 ++CountBoundToVariable; 1619 else if (!SpacesBefore && SpacesAfter) 1620 ++CountBoundToType; 1621 } 1622 1623 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { 1624 if (Tok->is(tok::coloncolon) && 1625 Tok->Previous->Type == TT_TemplateOpener) 1626 HasCpp03IncompatibleFormat = true; 1627 if (Tok->Type == TT_TemplateCloser && 1628 Tok->Previous->Type == TT_TemplateCloser) 1629 HasCpp03IncompatibleFormat = true; 1630 } 1631 1632 if (Tok->PackingKind == PPK_BinPacked) 1633 HasBinPackedFunction = true; 1634 if (Tok->PackingKind == PPK_OnePerLine) 1635 HasOnePerLineFunction = true; 1636 1637 Tok = Tok->Next; 1638 } 1639 } 1640 if (Style.DerivePointerBinding) { 1641 if (CountBoundToType > CountBoundToVariable) 1642 Style.PointerBindsToType = true; 1643 else if (CountBoundToType < CountBoundToVariable) 1644 Style.PointerBindsToType = false; 1645 } 1646 if (Style.Standard == FormatStyle::LS_Auto) { 1647 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1648 : FormatStyle::LS_Cpp03; 1649 } 1650 BinPackInconclusiveFunctions = 1651 HasBinPackedFunction || !HasOnePerLineFunction; 1652 } 1653 1654 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override { 1655 assert(!UnwrappedLines.empty()); 1656 UnwrappedLines.back().push_back(TheLine); 1657 } 1658 1659 void finishRun() override { 1660 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 1661 } 1662 1663 FormatStyle Style; 1664 Lexer &Lex; 1665 SourceManager &SourceMgr; 1666 WhitespaceManager Whitespaces; 1667 SmallVector<CharSourceRange, 8> Ranges; 1668 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 1669 1670 encoding::Encoding Encoding; 1671 bool BinPackInconclusiveFunctions; 1672 }; 1673 1674 } // end anonymous namespace 1675 1676 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1677 SourceManager &SourceMgr, 1678 std::vector<CharSourceRange> Ranges) { 1679 Formatter formatter(Style, Lex, SourceMgr, Ranges); 1680 return formatter.format(); 1681 } 1682 1683 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1684 std::vector<tooling::Range> Ranges, 1685 StringRef FileName) { 1686 FileManager Files((FileSystemOptions())); 1687 DiagnosticsEngine Diagnostics( 1688 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1689 new DiagnosticOptions); 1690 SourceManager SourceMgr(Diagnostics, Files); 1691 llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1692 const clang::FileEntry *Entry = 1693 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1694 SourceMgr.overrideFileContents(Entry, Buf); 1695 FileID ID = 1696 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1697 Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, 1698 getFormattingLangOpts(Style.Standard)); 1699 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1700 std::vector<CharSourceRange> CharRanges; 1701 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1702 SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); 1703 SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); 1704 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1705 } 1706 return reformat(Style, Lex, SourceMgr, CharRanges); 1707 } 1708 1709 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { 1710 LangOptions LangOpts; 1711 LangOpts.CPlusPlus = 1; 1712 LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1713 LangOpts.LineComment = 1; 1714 LangOpts.Bool = 1; 1715 LangOpts.ObjC1 = 1; 1716 LangOpts.ObjC2 = 1; 1717 return LangOpts; 1718 } 1719 1720 const char *StyleOptionHelpDescription = 1721 "Coding style, currently supports:\n" 1722 " LLVM, Google, Chromium, Mozilla, WebKit.\n" 1723 "Use -style=file to load style configuration from\n" 1724 ".clang-format file located in one of the parent\n" 1725 "directories of the source file (or current\n" 1726 "directory for stdin).\n" 1727 "Use -style=\"{key: value, ...}\" to set specific\n" 1728 "parameters, e.g.:\n" 1729 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; 1730 1731 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { 1732 if (FileName.endswith_lower(".js")) { 1733 return FormatStyle::LK_JavaScript; 1734 } else if (FileName.endswith_lower(".proto") || 1735 FileName.endswith_lower(".protodevel")) { 1736 return FormatStyle::LK_Proto; 1737 } 1738 return FormatStyle::LK_Cpp; 1739 } 1740 1741 FormatStyle getStyle(StringRef StyleName, StringRef FileName, 1742 StringRef FallbackStyle) { 1743 FormatStyle Style = getLLVMStyle(); 1744 Style.Language = getLanguageByFileName(FileName); 1745 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { 1746 llvm::errs() << "Invalid fallback style \"" << FallbackStyle 1747 << "\" using LLVM style\n"; 1748 return Style; 1749 } 1750 1751 if (StyleName.startswith("{")) { 1752 // Parse YAML/JSON style from the command line. 1753 if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) { 1754 llvm::errs() << "Error parsing -style: " << ec.message() << ", using " 1755 << FallbackStyle << " style\n"; 1756 } 1757 return Style; 1758 } 1759 1760 if (!StyleName.equals_lower("file")) { 1761 if (!getPredefinedStyle(StyleName, Style.Language, &Style)) 1762 llvm::errs() << "Invalid value for -style, using " << FallbackStyle 1763 << " style\n"; 1764 return Style; 1765 } 1766 1767 // Look for .clang-format/_clang-format file in the file's parent directories. 1768 SmallString<128> UnsuitableConfigFiles; 1769 SmallString<128> Path(FileName); 1770 llvm::sys::fs::make_absolute(Path); 1771 for (StringRef Directory = Path; !Directory.empty(); 1772 Directory = llvm::sys::path::parent_path(Directory)) { 1773 if (!llvm::sys::fs::is_directory(Directory)) 1774 continue; 1775 SmallString<128> ConfigFile(Directory); 1776 1777 llvm::sys::path::append(ConfigFile, ".clang-format"); 1778 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1779 bool IsFile = false; 1780 // Ignore errors from is_regular_file: we only need to know if we can read 1781 // the file or not. 1782 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1783 1784 if (!IsFile) { 1785 // Try _clang-format too, since dotfiles are not commonly used on Windows. 1786 ConfigFile = Directory; 1787 llvm::sys::path::append(ConfigFile, "_clang-format"); 1788 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); 1789 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile); 1790 } 1791 1792 if (IsFile) { 1793 std::unique_ptr<llvm::MemoryBuffer> Text; 1794 if (llvm::error_code ec = 1795 llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) { 1796 llvm::errs() << ec.message() << "\n"; 1797 break; 1798 } 1799 if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) { 1800 if (ec == llvm::errc::not_supported) { 1801 if (!UnsuitableConfigFiles.empty()) 1802 UnsuitableConfigFiles.append(", "); 1803 UnsuitableConfigFiles.append(ConfigFile); 1804 continue; 1805 } 1806 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() 1807 << "\n"; 1808 break; 1809 } 1810 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); 1811 return Style; 1812 } 1813 } 1814 llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle 1815 << " style\n"; 1816 if (!UnsuitableConfigFiles.empty()) { 1817 llvm::errs() << "Configuration file(s) do(es) not support " 1818 << getLanguageName(Style.Language) << ": " 1819 << UnsuitableConfigFiles << "\n"; 1820 } 1821 return Style; 1822 } 1823 1824 } // namespace format 1825 } // namespace clang 1826