1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "BreakableToken.h" 19 #include "TokenAnnotator.h" 20 #include "UnwrappedLineParser.h" 21 #include "WhitespaceManager.h" 22 #include "clang/Basic/Diagnostic.h" 23 #include "clang/Basic/OperatorPrecedence.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Format/Format.h" 26 #include "clang/Lex/Lexer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/Support/Allocator.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/YAMLTraits.h" 31 #include <queue> 32 #include <string> 33 34 namespace llvm { 35 namespace yaml { 36 template <> 37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> { 38 static void enumeration(IO &IO, 39 clang::format::FormatStyle::LanguageStandard &Value) { 40 IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03); 41 IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11); 42 IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto); 43 } 44 }; 45 46 template <> 47 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> { 48 static void 49 enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) { 50 IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach); 51 IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux); 52 IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup); 53 IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman); 54 } 55 }; 56 57 template <> 58 struct ScalarEnumerationTraits< 59 clang::format::FormatStyle::NamespaceIndentationKind> { 60 static void 61 enumeration(IO &IO, 62 clang::format::FormatStyle::NamespaceIndentationKind &Value) { 63 IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None); 64 IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner); 65 IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All); 66 } 67 }; 68 69 template <> struct MappingTraits<clang::format::FormatStyle> { 70 static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) { 71 if (IO.outputting()) { 72 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", "Mozilla" }; 73 ArrayRef<StringRef> Styles(StylesArray); 74 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 75 StringRef StyleName(Styles[i]); 76 clang::format::FormatStyle PredefinedStyle; 77 if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) && 78 Style == PredefinedStyle) { 79 IO.mapOptional("# BasedOnStyle", StyleName); 80 break; 81 } 82 } 83 } else { 84 StringRef BasedOnStyle; 85 IO.mapOptional("BasedOnStyle", BasedOnStyle); 86 if (!BasedOnStyle.empty()) 87 if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) { 88 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 89 return; 90 } 91 } 92 93 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 94 IO.mapOptional("ConstructorInitializerIndentWidth", 95 Style.ConstructorInitializerIndentWidth); 96 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 97 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 98 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 99 Style.AllowAllParametersOfDeclarationOnNextLine); 100 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 101 Style.AllowShortIfStatementsOnASingleLine); 102 IO.mapOptional("AllowShortLoopsOnASingleLine", 103 Style.AllowShortLoopsOnASingleLine); 104 IO.mapOptional("AlwaysBreakTemplateDeclarations", 105 Style.AlwaysBreakTemplateDeclarations); 106 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 107 Style.AlwaysBreakBeforeMultilineStrings); 108 IO.mapOptional("BreakBeforeBinaryOperators", 109 Style.BreakBeforeBinaryOperators); 110 IO.mapOptional("BreakConstructorInitializersBeforeComma", 111 Style.BreakConstructorInitializersBeforeComma); 112 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 113 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 114 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 115 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 116 IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); 117 IO.mapOptional("ExperimentalAutoDetectBinPacking", 118 Style.ExperimentalAutoDetectBinPacking); 119 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 120 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 121 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 122 IO.mapOptional("ObjCSpaceBeforeProtocolList", 123 Style.ObjCSpaceBeforeProtocolList); 124 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 125 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 126 IO.mapOptional("PenaltyBreakFirstLessLess", 127 Style.PenaltyBreakFirstLessLess); 128 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 129 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 130 Style.PenaltyReturnTypeOnItsOwnLine); 131 IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); 132 IO.mapOptional("SpacesBeforeTrailingComments", 133 Style.SpacesBeforeTrailingComments); 134 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 135 IO.mapOptional("Standard", Style.Standard); 136 IO.mapOptional("IndentWidth", Style.IndentWidth); 137 IO.mapOptional("UseTab", Style.UseTab); 138 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 139 IO.mapOptional("IndentFunctionDeclarationAfterType", 140 Style.IndentFunctionDeclarationAfterType); 141 } 142 }; 143 } 144 } 145 146 namespace clang { 147 namespace format { 148 149 void setDefaultPenalties(FormatStyle &Style) { 150 Style.PenaltyBreakComment = 45; 151 Style.PenaltyBreakFirstLessLess = 120; 152 Style.PenaltyBreakString = 1000; 153 Style.PenaltyExcessCharacter = 1000000; 154 } 155 156 FormatStyle getLLVMStyle() { 157 FormatStyle LLVMStyle; 158 LLVMStyle.AccessModifierOffset = -2; 159 LLVMStyle.AlignEscapedNewlinesLeft = false; 160 LLVMStyle.AlignTrailingComments = true; 161 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 162 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 163 LLVMStyle.AllowShortLoopsOnASingleLine = false; 164 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 165 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 166 LLVMStyle.BinPackParameters = true; 167 LLVMStyle.BreakBeforeBinaryOperators = false; 168 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 169 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 170 LLVMStyle.ColumnLimit = 80; 171 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 172 LLVMStyle.ConstructorInitializerIndentWidth = 4; 173 LLVMStyle.Cpp11BracedListStyle = false; 174 LLVMStyle.DerivePointerBinding = false; 175 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 176 LLVMStyle.IndentCaseLabels = false; 177 LLVMStyle.IndentFunctionDeclarationAfterType = false; 178 LLVMStyle.IndentWidth = 2; 179 LLVMStyle.MaxEmptyLinesToKeep = 1; 180 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 181 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 182 LLVMStyle.PointerBindsToType = false; 183 LLVMStyle.SpacesBeforeTrailingComments = 1; 184 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 185 LLVMStyle.UseTab = false; 186 187 setDefaultPenalties(LLVMStyle); 188 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 189 190 return LLVMStyle; 191 } 192 193 FormatStyle getGoogleStyle() { 194 FormatStyle GoogleStyle; 195 GoogleStyle.AccessModifierOffset = -1; 196 GoogleStyle.AlignEscapedNewlinesLeft = true; 197 GoogleStyle.AlignTrailingComments = true; 198 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 199 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 200 GoogleStyle.AllowShortLoopsOnASingleLine = true; 201 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 202 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 203 GoogleStyle.BinPackParameters = true; 204 GoogleStyle.BreakBeforeBinaryOperators = false; 205 GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 206 GoogleStyle.BreakConstructorInitializersBeforeComma = false; 207 GoogleStyle.ColumnLimit = 80; 208 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 209 GoogleStyle.ConstructorInitializerIndentWidth = 4; 210 GoogleStyle.Cpp11BracedListStyle = true; 211 GoogleStyle.DerivePointerBinding = true; 212 GoogleStyle.ExperimentalAutoDetectBinPacking = false; 213 GoogleStyle.IndentCaseLabels = true; 214 GoogleStyle.IndentFunctionDeclarationAfterType = true; 215 GoogleStyle.IndentWidth = 2; 216 GoogleStyle.MaxEmptyLinesToKeep = 1; 217 GoogleStyle.NamespaceIndentation = FormatStyle::NI_None; 218 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 219 GoogleStyle.PointerBindsToType = true; 220 GoogleStyle.SpacesBeforeTrailingComments = 2; 221 GoogleStyle.Standard = FormatStyle::LS_Auto; 222 GoogleStyle.UseTab = false; 223 224 setDefaultPenalties(GoogleStyle); 225 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 226 227 return GoogleStyle; 228 } 229 230 FormatStyle getChromiumStyle() { 231 FormatStyle ChromiumStyle = getGoogleStyle(); 232 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 233 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 234 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 235 ChromiumStyle.BinPackParameters = false; 236 ChromiumStyle.DerivePointerBinding = false; 237 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 238 return ChromiumStyle; 239 } 240 241 FormatStyle getMozillaStyle() { 242 FormatStyle MozillaStyle = getLLVMStyle(); 243 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 244 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 245 MozillaStyle.DerivePointerBinding = true; 246 MozillaStyle.IndentCaseLabels = true; 247 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 248 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 249 MozillaStyle.PointerBindsToType = true; 250 return MozillaStyle; 251 } 252 253 FormatStyle getWebKitStyle() { 254 FormatStyle Style = getLLVMStyle(); 255 Style.AccessModifierOffset = -4; 256 Style.AlignTrailingComments = false; 257 Style.BreakBeforeBinaryOperators = true; 258 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 259 Style.BreakConstructorInitializersBeforeComma = true; 260 Style.ColumnLimit = 0; 261 Style.IndentWidth = 4; 262 Style.NamespaceIndentation = FormatStyle::NI_Inner; 263 Style.PointerBindsToType = true; 264 return Style; 265 } 266 267 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) { 268 if (Name.equals_lower("llvm")) 269 *Style = getLLVMStyle(); 270 else if (Name.equals_lower("chromium")) 271 *Style = getChromiumStyle(); 272 else if (Name.equals_lower("mozilla")) 273 *Style = getMozillaStyle(); 274 else if (Name.equals_lower("google")) 275 *Style = getGoogleStyle(); 276 else if (Name.equals_lower("webkit")) 277 *Style = getWebKitStyle(); 278 else 279 return false; 280 281 return true; 282 } 283 284 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 285 if (Text.trim().empty()) 286 return llvm::make_error_code(llvm::errc::invalid_argument); 287 llvm::yaml::Input Input(Text); 288 Input >> *Style; 289 return Input.error(); 290 } 291 292 std::string configurationAsText(const FormatStyle &Style) { 293 std::string Text; 294 llvm::raw_string_ostream Stream(Text); 295 llvm::yaml::Output Output(Stream); 296 // We use the same mapping method for input and output, so we need a non-const 297 // reference here. 298 FormatStyle NonConstStyle = Style; 299 Output << NonConstStyle; 300 return Stream.str(); 301 } 302 303 // Returns the length of everything up to the first possible line break after 304 // the ), ], } or > matching \c Tok. 305 static unsigned getLengthToMatchingParen(const FormatToken &Tok) { 306 if (Tok.MatchingParen == NULL) 307 return 0; 308 FormatToken *End = Tok.MatchingParen; 309 while (End->Next && !End->Next->CanBreakBefore) { 310 End = End->Next; 311 } 312 return End->TotalLength - Tok.TotalLength + 1; 313 } 314 315 namespace { 316 317 class UnwrappedLineFormatter { 318 public: 319 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 320 const AnnotatedLine &Line, unsigned FirstIndent, 321 const FormatToken *RootToken, 322 WhitespaceManager &Whitespaces, 323 encoding::Encoding Encoding, 324 bool BinPackInconclusiveFunctions) 325 : Style(Style), SourceMgr(SourceMgr), Line(Line), 326 FirstIndent(FirstIndent), RootToken(RootToken), 327 Whitespaces(Whitespaces), Count(0), Encoding(Encoding), 328 BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {} 329 330 /// \brief Formats an \c UnwrappedLine. 331 void format(const AnnotatedLine *NextLine) { 332 // Initialize state dependent on indent. 333 LineState State; 334 State.Column = FirstIndent; 335 State.NextToken = RootToken; 336 State.Stack.push_back(ParenState(FirstIndent, FirstIndent, 337 /*AvoidBinPacking=*/false, 338 /*NoLineBreak=*/false)); 339 State.LineContainsContinuedForLoopSection = false; 340 State.ParenLevel = 0; 341 State.StartOfStringLiteral = 0; 342 State.StartOfLineLevel = State.ParenLevel; 343 State.LowestLevelOnLine = State.ParenLevel; 344 State.IgnoreStackForComparison = false; 345 346 // The first token has already been indented and thus consumed. 347 moveStateToNextToken(State, /*DryRun=*/false, /*Newline=*/false); 348 349 if (Style.ColumnLimit == 0) { 350 formatWithoutColumnLimit(State); 351 return; 352 } 353 354 // If everything fits on a single line, just put it there. 355 unsigned ColumnLimit = Style.ColumnLimit; 356 if (NextLine && NextLine->InPPDirective && 357 !NextLine->First->HasUnescapedNewline) 358 ColumnLimit = getColumnLimit(); 359 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 360 while (State.NextToken != NULL) { 361 addTokenToState(false, false, State); 362 } 363 } 364 365 // If the ObjC method declaration does not fit on a line, we should format 366 // it with one arg per line. 367 if (Line.Type == LT_ObjCMethodDecl) 368 State.Stack.back().BreakBeforeParameter = true; 369 370 // Find best solution in solution space. 371 analyzeSolutionSpace(State); 372 } 373 374 private: 375 void DebugTokenState(const FormatToken &FormatTok) { 376 const Token &Tok = FormatTok.Tok; 377 llvm::dbgs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 378 Tok.getLength()); 379 llvm::dbgs(); 380 } 381 382 struct ParenState { 383 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 384 bool NoLineBreak) 385 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 386 BreakBeforeClosingBrace(false), QuestionColumn(0), 387 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 388 NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), 389 StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), 390 CallContinuation(0), VariablePos(0), ContainsLineBreak(false) {} 391 392 /// \brief The position to which a specific parenthesis level needs to be 393 /// indented. 394 unsigned Indent; 395 396 /// \brief The position of the last space on each level. 397 /// 398 /// Used e.g. to break like: 399 /// functionCall(Parameter, otherCall( 400 /// OtherParameter)); 401 unsigned LastSpace; 402 403 /// \brief The position the first "<<" operator encountered on each level. 404 /// 405 /// Used to align "<<" operators. 0 if no such operator has been encountered 406 /// on a level. 407 unsigned FirstLessLess; 408 409 /// \brief Whether a newline needs to be inserted before the block's closing 410 /// brace. 411 /// 412 /// We only want to insert a newline before the closing brace if there also 413 /// was a newline after the beginning left brace. 414 bool BreakBeforeClosingBrace; 415 416 /// \brief The column of a \c ? in a conditional expression; 417 unsigned QuestionColumn; 418 419 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 420 /// lines, in this context. 421 bool AvoidBinPacking; 422 423 /// \brief Break after the next comma (or all the commas in this context if 424 /// \c AvoidBinPacking is \c true). 425 bool BreakBeforeParameter; 426 427 /// \brief Line breaking in this context would break a formatting rule. 428 bool NoLineBreak; 429 430 /// \brief The position of the colon in an ObjC method declaration/call. 431 unsigned ColonPos; 432 433 /// \brief The start of the most recent function in a builder-type call. 434 unsigned StartOfFunctionCall; 435 436 /// \brief Contains the start of array subscript expressions, so that they 437 /// can be aligned. 438 unsigned StartOfArraySubscripts; 439 440 /// \brief If a nested name specifier was broken over multiple lines, this 441 /// contains the start column of the second line. Otherwise 0. 442 unsigned NestedNameSpecifierContinuation; 443 444 /// \brief If a call expression was broken over multiple lines, this 445 /// contains the start column of the second line. Otherwise 0. 446 unsigned CallContinuation; 447 448 /// \brief The column of the first variable name in a variable declaration. 449 /// 450 /// Used to align further variables if necessary. 451 unsigned VariablePos; 452 453 /// \brief \c true if this \c ParenState already contains a line-break. 454 /// 455 /// The first line break in a certain \c ParenState causes extra penalty so 456 /// that clang-format prefers similar breaks, i.e. breaks in the same 457 /// parenthesis. 458 bool ContainsLineBreak; 459 460 bool operator<(const ParenState &Other) const { 461 if (Indent != Other.Indent) 462 return Indent < Other.Indent; 463 if (LastSpace != Other.LastSpace) 464 return LastSpace < Other.LastSpace; 465 if (FirstLessLess != Other.FirstLessLess) 466 return FirstLessLess < Other.FirstLessLess; 467 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 468 return BreakBeforeClosingBrace; 469 if (QuestionColumn != Other.QuestionColumn) 470 return QuestionColumn < Other.QuestionColumn; 471 if (AvoidBinPacking != Other.AvoidBinPacking) 472 return AvoidBinPacking; 473 if (BreakBeforeParameter != Other.BreakBeforeParameter) 474 return BreakBeforeParameter; 475 if (NoLineBreak != Other.NoLineBreak) 476 return NoLineBreak; 477 if (ColonPos != Other.ColonPos) 478 return ColonPos < Other.ColonPos; 479 if (StartOfFunctionCall != Other.StartOfFunctionCall) 480 return StartOfFunctionCall < Other.StartOfFunctionCall; 481 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 482 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 483 if (CallContinuation != Other.CallContinuation) 484 return CallContinuation < Other.CallContinuation; 485 if (VariablePos != Other.VariablePos) 486 return VariablePos < Other.VariablePos; 487 if (ContainsLineBreak != Other.ContainsLineBreak) 488 return ContainsLineBreak < Other.ContainsLineBreak; 489 return false; 490 } 491 }; 492 493 /// \brief The current state when indenting a unwrapped line. 494 /// 495 /// As the indenting tries different combinations this is copied by value. 496 struct LineState { 497 /// \brief The number of used columns in the current line. 498 unsigned Column; 499 500 /// \brief The token that needs to be next formatted. 501 const FormatToken *NextToken; 502 503 /// \brief \c true if this line contains a continued for-loop section. 504 bool LineContainsContinuedForLoopSection; 505 506 /// \brief The level of nesting inside (), [], <> and {}. 507 unsigned ParenLevel; 508 509 /// \brief The \c ParenLevel at the start of this line. 510 unsigned StartOfLineLevel; 511 512 /// \brief The lowest \c ParenLevel on the current line. 513 unsigned LowestLevelOnLine; 514 515 /// \brief The start column of the string literal, if we're in a string 516 /// literal sequence, 0 otherwise. 517 unsigned StartOfStringLiteral; 518 519 /// \brief A stack keeping track of properties applying to parenthesis 520 /// levels. 521 std::vector<ParenState> Stack; 522 523 /// \brief Ignore the stack of \c ParenStates for state comparison. 524 /// 525 /// In long and deeply nested unwrapped lines, the current algorithm can 526 /// be insufficient for finding the best formatting with a reasonable amount 527 /// of time and memory. Setting this flag will effectively lead to the 528 /// algorithm not analyzing some combinations. However, these combinations 529 /// rarely contain the optimal solution: In short, accepting a higher 530 /// penalty early would need to lead to different values in the \c 531 /// ParenState stack (in an otherwise identical state) and these different 532 /// values would need to lead to a significant amount of avoided penalty 533 /// later. 534 /// 535 /// FIXME: Come up with a better algorithm instead. 536 bool IgnoreStackForComparison; 537 538 /// \brief Comparison operator to be able to used \c LineState in \c map. 539 bool operator<(const LineState &Other) const { 540 if (NextToken != Other.NextToken) 541 return NextToken < Other.NextToken; 542 if (Column != Other.Column) 543 return Column < Other.Column; 544 if (LineContainsContinuedForLoopSection != 545 Other.LineContainsContinuedForLoopSection) 546 return LineContainsContinuedForLoopSection; 547 if (ParenLevel != Other.ParenLevel) 548 return ParenLevel < Other.ParenLevel; 549 if (StartOfLineLevel != Other.StartOfLineLevel) 550 return StartOfLineLevel < Other.StartOfLineLevel; 551 if (LowestLevelOnLine != Other.LowestLevelOnLine) 552 return LowestLevelOnLine < Other.LowestLevelOnLine; 553 if (StartOfStringLiteral != Other.StartOfStringLiteral) 554 return StartOfStringLiteral < Other.StartOfStringLiteral; 555 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 556 return false; 557 return Stack < Other.Stack; 558 } 559 }; 560 561 /// \brief Formats the line starting at \p State, simply keeping all of the 562 /// input's line breaking decisions. 563 void formatWithoutColumnLimit(LineState &State) { 564 while (State.NextToken != NULL) { 565 bool Newline = mustBreak(State) || 566 (canBreak(State) && State.NextToken->NewlinesBefore > 0); 567 addTokenToState(Newline, /*DryRun=*/false, State); 568 } 569 } 570 571 /// \brief Appends the next token to \p State and updates information 572 /// necessary for indentation. 573 /// 574 /// Puts the token on the current line if \p Newline is \c false and adds a 575 /// line break and necessary indentation otherwise. 576 /// 577 /// If \p DryRun is \c false, also creates and stores the required 578 /// \c Replacement. 579 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 580 const FormatToken &Current = *State.NextToken; 581 const FormatToken &Previous = *State.NextToken->Previous; 582 583 // Extra penalty that needs to be added because of the way certain line 584 // breaks are chosen. 585 unsigned ExtraPenalty = 0; 586 587 if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) { 588 // FIXME: Is this correct? 589 int WhitespaceLength = SourceMgr.getSpellingColumnNumber( 590 State.NextToken->WhitespaceRange.getEnd()) - 591 SourceMgr.getSpellingColumnNumber( 592 State.NextToken->WhitespaceRange.getBegin()); 593 State.Column += WhitespaceLength + State.NextToken->CodePointCount; 594 State.NextToken = State.NextToken->Next; 595 return 0; 596 } 597 598 // If we are continuing an expression, we want to indent an extra 4 spaces. 599 unsigned ContinuationIndent = 600 std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4; 601 if (Newline) { 602 // Breaking before the first "<<" is generally not desirable if the LHS is 603 // short. 604 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 && 605 State.Column <= Style.ColumnLimit / 2) 606 ExtraPenalty += Style.PenaltyBreakFirstLessLess; 607 608 State.Stack.back().ContainsLineBreak = true; 609 if (Current.is(tok::r_brace)) { 610 if (Current.BlockKind == BK_BracedInit) 611 State.Column = State.Stack[State.Stack.size() - 2].LastSpace; 612 else 613 State.Column = FirstIndent; 614 } else if (Current.is(tok::string_literal) && 615 State.StartOfStringLiteral != 0) { 616 State.Column = State.StartOfStringLiteral; 617 State.Stack.back().BreakBeforeParameter = true; 618 } else if (Current.is(tok::lessless) && 619 State.Stack.back().FirstLessLess != 0) { 620 State.Column = State.Stack.back().FirstLessLess; 621 } else if (Current.isOneOf(tok::period, tok::arrow) && 622 Current.Type != TT_DesignatedInitializerPeriod) { 623 if (State.Stack.back().CallContinuation == 0) { 624 State.Column = ContinuationIndent; 625 State.Stack.back().CallContinuation = State.Column; 626 } else { 627 State.Column = State.Stack.back().CallContinuation; 628 } 629 } else if (Current.Type == TT_ConditionalExpr) { 630 State.Column = State.Stack.back().QuestionColumn; 631 } else if (Previous.is(tok::comma) && 632 State.Stack.back().VariablePos != 0) { 633 State.Column = State.Stack.back().VariablePos; 634 } else if (Previous.ClosesTemplateDeclaration || 635 ((Current.Type == TT_StartOfName || 636 Current.is(tok::kw_operator)) && 637 State.ParenLevel == 0 && 638 (!Style.IndentFunctionDeclarationAfterType || 639 Line.StartsDefinition))) { 640 State.Column = State.Stack.back().Indent; 641 } else if (Current.Type == TT_ObjCSelectorName) { 642 if (State.Stack.back().ColonPos > Current.CodePointCount) { 643 State.Column = State.Stack.back().ColonPos - Current.CodePointCount; 644 } else { 645 State.Column = State.Stack.back().Indent; 646 State.Stack.back().ColonPos = State.Column + Current.CodePointCount; 647 } 648 } else if (Current.is(tok::l_square) && 649 Current.Type != TT_ObjCMethodExpr) { 650 if (State.Stack.back().StartOfArraySubscripts != 0) 651 State.Column = State.Stack.back().StartOfArraySubscripts; 652 else 653 State.Column = ContinuationIndent; 654 } else if (Current.Type == TT_StartOfName || 655 Previous.isOneOf(tok::coloncolon, tok::equal) || 656 Previous.Type == TT_ObjCMethodExpr) { 657 State.Column = ContinuationIndent; 658 } else if (Current.Type == TT_CtorInitializerColon) { 659 State.Column = FirstIndent + Style.ConstructorInitializerIndentWidth; 660 } else if (Current.Type == TT_CtorInitializerComma) { 661 State.Column = State.Stack.back().Indent; 662 } else { 663 State.Column = State.Stack.back().Indent; 664 // Ensure that we fall back to indenting 4 spaces instead of just 665 // flushing continuations left. 666 if (State.Column == FirstIndent) 667 State.Column += 4; 668 } 669 670 if (Current.is(tok::question)) 671 State.Stack.back().BreakBeforeParameter = true; 672 if ((Previous.isOneOf(tok::comma, tok::semi) && 673 !State.Stack.back().AvoidBinPacking) || 674 Previous.Type == TT_BinaryOperator) 675 State.Stack.back().BreakBeforeParameter = false; 676 if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0) 677 State.Stack.back().BreakBeforeParameter = false; 678 679 if (!DryRun) { 680 unsigned NewLines = 1; 681 if (Current.is(tok::comment)) 682 NewLines = std::max( 683 NewLines, 684 std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); 685 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 686 State.Column, Line.InPPDirective); 687 } 688 689 if (!Current.isTrailingComment()) 690 State.Stack.back().LastSpace = State.Column; 691 if (Current.isOneOf(tok::arrow, tok::period) && 692 Current.Type != TT_DesignatedInitializerPeriod) 693 State.Stack.back().LastSpace += Current.CodePointCount; 694 State.StartOfLineLevel = State.ParenLevel; 695 State.LowestLevelOnLine = State.ParenLevel; 696 697 // Any break on this level means that the parent level has been broken 698 // and we need to avoid bin packing there. 699 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 700 State.Stack[i].BreakBeforeParameter = true; 701 } 702 const FormatToken *TokenBefore = Current.getPreviousNonComment(); 703 if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) && 704 TokenBefore->Type != TT_TemplateCloser && 705 TokenBefore->Type != TT_BinaryOperator && !TokenBefore->opensScope()) 706 State.Stack.back().BreakBeforeParameter = true; 707 708 // If we break after {, we should also break before the corresponding }. 709 if (Previous.is(tok::l_brace)) 710 State.Stack.back().BreakBeforeClosingBrace = true; 711 712 if (State.Stack.back().AvoidBinPacking) { 713 // If we are breaking after '(', '{', '<', this is not bin packing 714 // unless AllowAllParametersOfDeclarationOnNextLine is false. 715 if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || 716 Previous.Type == TT_BinaryOperator) || 717 (!Style.AllowAllParametersOfDeclarationOnNextLine && 718 Line.MustBeDeclaration)) 719 State.Stack.back().BreakBeforeParameter = true; 720 } 721 722 } else { 723 if (Current.is(tok::equal) && 724 (RootToken->is(tok::kw_for) || State.ParenLevel == 0) && 725 State.Stack.back().VariablePos == 0) { 726 State.Stack.back().VariablePos = State.Column; 727 // Move over * and & if they are bound to the variable name. 728 const FormatToken *Tok = &Previous; 729 while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) { 730 State.Stack.back().VariablePos -= Tok->CodePointCount; 731 if (Tok->SpacesRequiredBefore != 0) 732 break; 733 Tok = Tok->Previous; 734 } 735 if (Previous.PartOfMultiVariableDeclStmt) 736 State.Stack.back().LastSpace = State.Stack.back().VariablePos; 737 } 738 739 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 740 741 if (!DryRun) 742 Whitespaces.replaceWhitespace(Current, 0, Spaces, 743 State.Column + Spaces); 744 745 if (Current.Type == TT_ObjCSelectorName && 746 State.Stack.back().ColonPos == 0) { 747 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 748 State.Column + Spaces + Current.CodePointCount) 749 State.Stack.back().ColonPos = 750 State.Stack.back().Indent + Current.LongestObjCSelectorName; 751 else 752 State.Stack.back().ColonPos = 753 State.Column + Spaces + Current.CodePointCount; 754 } 755 756 if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && 757 Current.Type != TT_LineComment) 758 State.Stack.back().Indent = State.Column + Spaces; 759 if (Previous.is(tok::comma) && !Current.isTrailingComment() && 760 State.Stack.back().AvoidBinPacking) 761 State.Stack.back().NoLineBreak = true; 762 763 State.Column += Spaces; 764 if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) 765 // Treat the condition inside an if as if it was a second function 766 // parameter, i.e. let nested calls have an indent of 4. 767 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 768 else if (Previous.is(tok::comma)) 769 State.Stack.back().LastSpace = State.Column; 770 else if ((Previous.Type == TT_BinaryOperator || 771 Previous.Type == TT_ConditionalExpr || 772 Previous.Type == TT_CtorInitializerColon) && 773 !(Previous.getPrecedence() == prec::Assignment && 774 Current.FakeLParens.empty())) 775 // Always indent relative to the RHS of the expression unless this is a 776 // simple assignment without binary expression on the RHS. 777 State.Stack.back().LastSpace = State.Column; 778 else if (Previous.Type == TT_InheritanceColon) 779 State.Stack.back().Indent = State.Column; 780 else if (Previous.opensScope()) { 781 // If a function has multiple parameters (including a single parameter 782 // that is a binary expression) or a trailing call, indent all 783 // parameters from the opening parenthesis. This avoids confusing 784 // indents like: 785 // OuterFunction(InnerFunctionCall( 786 // ParameterToInnerFunction), 787 // SecondParameterToOuterFunction); 788 bool HasMultipleParameters = !Current.FakeLParens.empty(); 789 bool HasTrailingCall = false; 790 if (Previous.MatchingParen) { 791 const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); 792 if (Next && Next->isOneOf(tok::period, tok::arrow)) 793 HasTrailingCall = true; 794 } 795 if (HasMultipleParameters || HasTrailingCall) 796 State.Stack.back().LastSpace = State.Column; 797 } 798 } 799 800 return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty; 801 } 802 803 /// \brief Mark the next token as consumed in \p State and modify its stacks 804 /// accordingly. 805 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline) { 806 const FormatToken &Current = *State.NextToken; 807 assert(State.Stack.size()); 808 809 if (Current.Type == TT_InheritanceColon) 810 State.Stack.back().AvoidBinPacking = true; 811 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 812 State.Stack.back().FirstLessLess = State.Column; 813 if (Current.is(tok::l_square) && 814 State.Stack.back().StartOfArraySubscripts == 0) 815 State.Stack.back().StartOfArraySubscripts = State.Column; 816 if (Current.is(tok::question)) 817 State.Stack.back().QuestionColumn = State.Column; 818 if (!Current.opensScope() && !Current.closesScope()) 819 State.LowestLevelOnLine = 820 std::min(State.LowestLevelOnLine, State.ParenLevel); 821 if (Current.isOneOf(tok::period, tok::arrow) && 822 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 823 State.Stack.back().StartOfFunctionCall = 824 Current.LastInChainOfCalls ? 0 825 : State.Column + Current.CodePointCount; 826 if (Current.Type == TT_CtorInitializerColon) { 827 // Indent 2 from the column, so: 828 // SomeClass::SomeClass() 829 // : First(...), ... 830 // Next(...) 831 // ^ line up here. 832 State.Stack.back().Indent = 833 State.Column + 834 (Style.BreakConstructorInitializersBeforeComma ? 0 : 2); 835 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 836 State.Stack.back().AvoidBinPacking = true; 837 State.Stack.back().BreakBeforeParameter = false; 838 } 839 840 // If return returns a binary expression, align after it. 841 if (Current.is(tok::kw_return) && !Current.FakeLParens.empty()) 842 State.Stack.back().LastSpace = State.Column + 7; 843 844 // In ObjC method declaration we align on the ":" of parameters, but we need 845 // to ensure that we indent parameters on subsequent lines by at least 4. 846 if (Current.Type == TT_ObjCMethodSpecifier) 847 State.Stack.back().Indent += 4; 848 849 // Insert scopes created by fake parenthesis. 850 const FormatToken *Previous = Current.getPreviousNonComment(); 851 // Don't add extra indentation for the first fake parenthesis after 852 // 'return', assignements or opening <({[. The indentation for these cases 853 // is special cased. 854 bool SkipFirstExtraIndent = 855 Current.is(tok::kw_return) || 856 (Previous && (Previous->opensScope() || 857 Previous->getPrecedence() == prec::Assignment)); 858 for (SmallVectorImpl<prec::Level>::const_reverse_iterator 859 I = Current.FakeLParens.rbegin(), 860 E = Current.FakeLParens.rend(); 861 I != E; ++I) { 862 ParenState NewParenState = State.Stack.back(); 863 NewParenState.ContainsLineBreak = false; 864 NewParenState.Indent = 865 std::max(std::max(State.Column, NewParenState.Indent), 866 State.Stack.back().LastSpace); 867 868 // Always indent conditional expressions. Never indent expression where 869 // the 'operator' is ',', ';' or an assignment (i.e. *I <= 870 // prec::Assignment) as those have different indentation rules. Indent 871 // other expression, unless the indentation needs to be skipped. 872 if (*I == prec::Conditional || 873 (!SkipFirstExtraIndent && *I > prec::Assignment && 874 !Style.BreakBeforeBinaryOperators)) 875 NewParenState.Indent += 4; 876 if (Previous && !Previous->opensScope()) 877 NewParenState.BreakBeforeParameter = false; 878 State.Stack.push_back(NewParenState); 879 SkipFirstExtraIndent = false; 880 } 881 882 // If we encounter an opening (, [, { or <, we add a level to our stacks to 883 // prepare for the following tokens. 884 if (Current.opensScope()) { 885 unsigned NewIndent; 886 unsigned LastSpace = State.Stack.back().LastSpace; 887 bool AvoidBinPacking; 888 if (Current.is(tok::l_brace)) { 889 NewIndent = 890 LastSpace + (Style.Cpp11BracedListStyle ? 4 : Style.IndentWidth); 891 const FormatToken *NextNoComment = Current.getNextNonComment(); 892 AvoidBinPacking = NextNoComment && 893 NextNoComment->Type == TT_DesignatedInitializerPeriod; 894 } else { 895 NewIndent = 896 4 + std::max(LastSpace, State.Stack.back().StartOfFunctionCall); 897 AvoidBinPacking = !Style.BinPackParameters || 898 (Style.ExperimentalAutoDetectBinPacking && 899 (Current.PackingKind == PPK_OnePerLine || 900 (!BinPackInconclusiveFunctions && 901 Current.PackingKind == PPK_Inconclusive))); 902 } 903 904 State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking, 905 State.Stack.back().NoLineBreak)); 906 ++State.ParenLevel; 907 } 908 909 // If this '[' opens an ObjC call, determine whether all parameters fit into 910 // one line and put one per line if they don't. 911 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 912 Current.MatchingParen != NULL) { 913 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 914 State.Stack.back().BreakBeforeParameter = true; 915 } 916 917 // If we encounter a closing ), ], } or >, we can remove a level from our 918 // stacks. 919 if (Current.isOneOf(tok::r_paren, tok::r_square) || 920 (Current.is(tok::r_brace) && State.NextToken != RootToken) || 921 State.NextToken->Type == TT_TemplateCloser) { 922 State.Stack.pop_back(); 923 --State.ParenLevel; 924 } 925 if (Current.is(tok::r_square)) { 926 // If this ends the array subscript expr, reset the corresponding value. 927 const FormatToken *NextNonComment = Current.getNextNonComment(); 928 if (NextNonComment && NextNonComment->isNot(tok::l_square)) 929 State.Stack.back().StartOfArraySubscripts = 0; 930 } 931 932 // Remove scopes created by fake parenthesis. 933 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 934 unsigned VariablePos = State.Stack.back().VariablePos; 935 State.Stack.pop_back(); 936 State.Stack.back().VariablePos = VariablePos; 937 } 938 939 if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) { 940 State.StartOfStringLiteral = State.Column; 941 } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash, 942 tok::string_literal)) { 943 State.StartOfStringLiteral = 0; 944 } 945 946 State.Column += Current.CodePointCount; 947 948 State.NextToken = State.NextToken->Next; 949 950 if (!Newline && Style.AlwaysBreakBeforeMultilineStrings && 951 Current.is(tok::string_literal) && Current.CanBreakBefore) 952 return 0; 953 954 return breakProtrudingToken(Current, State, DryRun); 955 } 956 957 /// \brief If the current token sticks out over the end of the line, break 958 /// it if possible. 959 /// 960 /// \returns An extra penalty if a token was broken, otherwise 0. 961 /// 962 /// The returned penalty will cover the cost of the additional line breaks and 963 /// column limit violation in all lines except for the last one. The penalty 964 /// for the column limit violation in the last line (and in single line 965 /// tokens) is handled in \c addNextStateToQueue. 966 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 967 bool DryRun) { 968 llvm::OwningPtr<BreakableToken> Token; 969 unsigned StartColumn = State.Column - Current.CodePointCount; 970 unsigned OriginalStartColumn = 971 SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 972 1; 973 974 if (Current.is(tok::string_literal) && 975 Current.Type != TT_ImplicitStringLiteral) { 976 // Only break up default narrow strings. 977 if (!Current.TokenText.startswith("\"")) 978 return 0; 979 // Don't break string literals with escaped newlines. As clang-format must 980 // not change the string's content, it is unlikely that we'll end up with 981 // a better format. 982 if (Current.TokenText.find("\\\n") != StringRef::npos) 983 return 0; 984 // Exempts unterminated string literals from line breaking. The user will 985 // likely want to terminate the string before any line breaking is done. 986 if (Current.IsUnterminatedLiteral) 987 return 0; 988 989 Token.reset(new BreakableStringLiteral(Current, StartColumn, 990 Line.InPPDirective, Encoding)); 991 } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { 992 Token.reset(new BreakableBlockComment( 993 Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, 994 Line.InPPDirective, Encoding)); 995 } else if (Current.Type == TT_LineComment && 996 (Current.Previous == NULL || 997 Current.Previous->Type != TT_ImplicitStringLiteral)) { 998 // Don't break line comments with escaped newlines. These look like 999 // separate line comments, but in fact contain a single line comment with 1000 // multiple lines including leading whitespace and the '//' markers. 1001 // 1002 // FIXME: If we want to handle them correctly, we'll need to adjust 1003 // leading whitespace in consecutive lines when changing indentation of 1004 // the first line similar to what we do with block comments. 1005 StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n"); 1006 if (EscapedNewlinePos != StringRef::npos) { 1007 State.Column = 1008 StartColumn + 1009 encoding::getCodePointCount( 1010 Current.TokenText.substr(0, EscapedNewlinePos), Encoding) + 1011 1; 1012 return 0; 1013 } 1014 1015 Token.reset(new BreakableLineComment(Current, StartColumn, 1016 Line.InPPDirective, Encoding)); 1017 } else { 1018 return 0; 1019 } 1020 if (Current.UnbreakableTailLength >= getColumnLimit()) 1021 return 0; 1022 1023 unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength; 1024 bool BreakInserted = false; 1025 unsigned Penalty = 0; 1026 unsigned RemainingTokenColumns = 0; 1027 for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); 1028 LineIndex != EndIndex; ++LineIndex) { 1029 if (!DryRun) 1030 Token->replaceWhitespaceBefore(LineIndex, Whitespaces); 1031 unsigned TailOffset = 0; 1032 RemainingTokenColumns = Token->getLineLengthAfterSplit( 1033 LineIndex, TailOffset, StringRef::npos); 1034 while (RemainingTokenColumns > RemainingSpace) { 1035 BreakableToken::Split Split = 1036 Token->getSplit(LineIndex, TailOffset, getColumnLimit()); 1037 if (Split.first == StringRef::npos) { 1038 // The last line's penalty is handled in addNextStateToQueue(). 1039 if (LineIndex < EndIndex - 1) 1040 Penalty += Style.PenaltyExcessCharacter * 1041 (RemainingTokenColumns - RemainingSpace); 1042 break; 1043 } 1044 assert(Split.first != 0); 1045 unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( 1046 LineIndex, TailOffset + Split.first + Split.second, 1047 StringRef::npos); 1048 assert(NewRemainingTokenColumns < RemainingTokenColumns); 1049 if (!DryRun) 1050 Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); 1051 Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString 1052 : Style.PenaltyBreakComment; 1053 unsigned ColumnsUsed = 1054 Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); 1055 if (ColumnsUsed > getColumnLimit()) { 1056 Penalty += 1057 Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit()); 1058 } 1059 TailOffset += Split.first + Split.second; 1060 RemainingTokenColumns = NewRemainingTokenColumns; 1061 BreakInserted = true; 1062 } 1063 } 1064 1065 State.Column = RemainingTokenColumns; 1066 1067 if (BreakInserted) { 1068 // If we break the token inside a parameter list, we need to break before 1069 // the next parameter on all levels, so that the next parameter is clearly 1070 // visible. Line comments already introduce a break. 1071 if (Current.Type != TT_LineComment) { 1072 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 1073 State.Stack[i].BreakBeforeParameter = true; 1074 } 1075 1076 State.Stack.back().LastSpace = StartColumn; 1077 } 1078 return Penalty; 1079 } 1080 1081 unsigned getColumnLimit() { 1082 // In preprocessor directives reserve two chars for trailing " \" 1083 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 1084 } 1085 1086 /// \brief An edge in the solution space from \c Previous->State to \c State, 1087 /// inserting a newline dependent on the \c NewLine. 1088 struct StateNode { 1089 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 1090 : State(State), NewLine(NewLine), Previous(Previous) {} 1091 LineState State; 1092 bool NewLine; 1093 StateNode *Previous; 1094 }; 1095 1096 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 1097 /// 1098 /// In case of equal penalties, we want to prefer states that were inserted 1099 /// first. During state generation we make sure that we insert states first 1100 /// that break the line as late as possible. 1101 typedef std::pair<unsigned, unsigned> OrderedPenalty; 1102 1103 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 1104 /// \c State has the given \c OrderedPenalty. 1105 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 1106 1107 /// \brief The BFS queue type. 1108 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 1109 std::greater<QueueItem> > QueueType; 1110 1111 /// \brief Analyze the entire solution space starting from \p InitialState. 1112 /// 1113 /// This implements a variant of Dijkstra's algorithm on the graph that spans 1114 /// the solution space (\c LineStates are the nodes). The algorithm tries to 1115 /// find the shortest path (the one with lowest penalty) from \p InitialState 1116 /// to a state where all tokens are placed. 1117 void analyzeSolutionSpace(LineState &InitialState) { 1118 std::set<LineState> Seen; 1119 1120 // Insert start element into queue. 1121 StateNode *Node = 1122 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 1123 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 1124 ++Count; 1125 1126 // While not empty, take first element and follow edges. 1127 while (!Queue.empty()) { 1128 unsigned Penalty = Queue.top().first.first; 1129 StateNode *Node = Queue.top().second; 1130 if (Node->State.NextToken == NULL) { 1131 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 1132 break; 1133 } 1134 Queue.pop(); 1135 1136 // Cut off the analysis of certain solutions if the analysis gets too 1137 // complex. See description of IgnoreStackForComparison. 1138 if (Count > 10000) 1139 Node->State.IgnoreStackForComparison = true; 1140 1141 if (!Seen.insert(Node->State).second) 1142 // State already examined with lower penalty. 1143 continue; 1144 1145 addNextStateToQueue(Penalty, Node, /*NewLine=*/false); 1146 addNextStateToQueue(Penalty, Node, /*NewLine=*/true); 1147 } 1148 1149 if (Queue.empty()) 1150 // We were unable to find a solution, do nothing. 1151 // FIXME: Add diagnostic? 1152 return; 1153 1154 // Reconstruct the solution. 1155 reconstructPath(InitialState, Queue.top().second); 1156 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 1157 DEBUG(llvm::dbgs() << "---\n"); 1158 } 1159 1160 void reconstructPath(LineState &State, StateNode *Current) { 1161 std::deque<StateNode *> Path; 1162 // We do not need a break before the initial token. 1163 while (Current->Previous) { 1164 Path.push_front(Current); 1165 Current = Current->Previous; 1166 } 1167 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 1168 I != E; ++I) { 1169 DEBUG({ 1170 if ((*I)->NewLine) { 1171 llvm::dbgs() << "Penalty for splitting before " 1172 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1173 << (*I)->Previous->State.NextToken->SplitPenalty << "\n"; 1174 } 1175 }); 1176 addTokenToState((*I)->NewLine, false, State); 1177 } 1178 } 1179 1180 /// \brief Add the following state to the analysis queue \c Queue. 1181 /// 1182 /// Assume the current state is \p PreviousNode and has been reached with a 1183 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1184 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1185 bool NewLine) { 1186 if (NewLine && !canBreak(PreviousNode->State)) 1187 return; 1188 if (!NewLine && mustBreak(PreviousNode->State)) 1189 return; 1190 if (NewLine) { 1191 if (!PreviousNode->State.Stack.back().ContainsLineBreak) 1192 Penalty += 15; 1193 Penalty += PreviousNode->State.NextToken->SplitPenalty; 1194 } 1195 1196 StateNode *Node = new (Allocator.Allocate()) 1197 StateNode(PreviousNode->State, NewLine, PreviousNode); 1198 Penalty += addTokenToState(NewLine, true, Node->State); 1199 if (Node->State.Column > getColumnLimit()) { 1200 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 1201 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 1202 } 1203 1204 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 1205 ++Count; 1206 } 1207 1208 /// \brief Returns \c true, if a line break after \p State is allowed. 1209 bool canBreak(const LineState &State) { 1210 const FormatToken &Current = *State.NextToken; 1211 const FormatToken &Previous = *Current.Previous; 1212 assert(&Previous == Current.Previous); 1213 if (!Current.CanBreakBefore && 1214 !(Current.is(tok::r_brace) && 1215 State.Stack.back().BreakBeforeClosingBrace)) 1216 return false; 1217 // The opening "{" of a braced list has to be on the same line as the first 1218 // element if it is nested in another braced init list or function call. 1219 if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && 1220 Previous.Previous && 1221 Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) 1222 return false; 1223 // This prevents breaks like: 1224 // ... 1225 // SomeParameter, OtherParameter).DoSomething( 1226 // ... 1227 // As they hide "DoSomething" and are generally bad for readability. 1228 if (Previous.opensScope() && 1229 State.LowestLevelOnLine < State.StartOfLineLevel) 1230 return false; 1231 return !State.Stack.back().NoLineBreak; 1232 } 1233 1234 /// \brief Returns \c true, if a line break after \p State is mandatory. 1235 bool mustBreak(const LineState &State) { 1236 const FormatToken &Current = *State.NextToken; 1237 const FormatToken &Previous = *Current.Previous; 1238 if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon) 1239 return true; 1240 if (!Style.Cpp11BracedListStyle && Current.is(tok::r_brace) && 1241 State.Stack.back().BreakBeforeClosingBrace) 1242 return true; 1243 if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) 1244 return true; 1245 if (Style.BreakConstructorInitializersBeforeComma) { 1246 if (Previous.Type == TT_CtorInitializerComma) 1247 return false; 1248 if (Current.Type == TT_CtorInitializerComma) 1249 return true; 1250 } 1251 if ((Previous.isOneOf(tok::comma, tok::semi) || Current.is(tok::question) || 1252 (Current.Type == TT_ConditionalExpr && 1253 !(Current.is(tok::colon) && Previous.is(tok::question)))) && 1254 State.Stack.back().BreakBeforeParameter && 1255 !Current.isTrailingComment() && 1256 !Current.isOneOf(tok::r_paren, tok::r_brace)) 1257 return true; 1258 if (Style.AlwaysBreakBeforeMultilineStrings && 1259 State.Column > State.Stack.back().Indent && 1260 Current.is(tok::string_literal) && Previous.isNot(tok::lessless) && 1261 Previous.Type != TT_InlineASMColon && 1262 ((Current.getNextNonComment() && 1263 Current.getNextNonComment()->is(tok::string_literal)) || 1264 (Current.TokenText.find("\\\n") != StringRef::npos))) 1265 return true; 1266 1267 if (!Style.BreakBeforeBinaryOperators) { 1268 // If we need to break somewhere inside the LHS of a binary expression, we 1269 // should also break after the operator. Otherwise, the formatting would 1270 // hide the operator precedence, e.g. in: 1271 // if (aaaaaaaaaaaaaa == 1272 // bbbbbbbbbbbbbb && c) {.. 1273 // For comparisons, we only apply this rule, if the LHS is a binary 1274 // expression itself as otherwise, the line breaks seem superfluous. 1275 // We need special cases for ">>" which we have split into two ">" while 1276 // lexing in order to make template parsing easier. 1277 // 1278 // FIXME: We'll need something similar for styles that break before binary 1279 // operators. 1280 bool IsComparison = (Previous.getPrecedence() == prec::Relational || 1281 Previous.getPrecedence() == prec::Equality) && 1282 Previous.Previous && Previous.Previous->Type != 1283 TT_BinaryOperator; // For >>. 1284 bool LHSIsBinaryExpr = 1285 Previous.Previous && Previous.Previous->FakeRParens > 0; 1286 if (Previous.Type == TT_BinaryOperator && 1287 (!IsComparison || LHSIsBinaryExpr) && 1288 Current.Type != TT_BinaryOperator && // For >>. 1289 !Current.isTrailingComment() && 1290 !Previous.isOneOf(tok::lessless, tok::question) && 1291 Previous.getPrecedence() != prec::Assignment && 1292 State.Stack.back().BreakBeforeParameter) 1293 return true; 1294 } 1295 1296 // Same as above, but for the first "<<" operator. 1297 if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter && 1298 State.Stack.back().FirstLessLess == 0) 1299 return true; 1300 1301 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 1302 // out whether it is the first parameter. Clean this up. 1303 if (Current.Type == TT_ObjCSelectorName && 1304 Current.LongestObjCSelectorName == 0 && 1305 State.Stack.back().BreakBeforeParameter) 1306 return true; 1307 if ((Current.Type == TT_CtorInitializerColon || 1308 (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0))) 1309 return true; 1310 1311 if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) && 1312 Line.MightBeFunctionDecl && State.Stack.back().BreakBeforeParameter && 1313 State.ParenLevel == 0) 1314 return true; 1315 return false; 1316 } 1317 1318 // Returns the total number of columns required for the remaining tokens. 1319 unsigned getRemainingLength(const LineState &State) { 1320 if (State.NextToken && State.NextToken->Previous) 1321 return Line.Last->TotalLength - State.NextToken->Previous->TotalLength; 1322 return 0; 1323 } 1324 1325 FormatStyle Style; 1326 SourceManager &SourceMgr; 1327 const AnnotatedLine &Line; 1328 const unsigned FirstIndent; 1329 const FormatToken *RootToken; 1330 WhitespaceManager &Whitespaces; 1331 1332 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1333 QueueType Queue; 1334 // Increasing count of \c StateNode items we have created. This is used 1335 // to create a deterministic order independent of the container. 1336 unsigned Count; 1337 encoding::Encoding Encoding; 1338 bool BinPackInconclusiveFunctions; 1339 }; 1340 1341 class FormatTokenLexer { 1342 public: 1343 FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, 1344 encoding::Encoding Encoding) 1345 : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), 1346 SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()), 1347 Encoding(Encoding) { 1348 Lex.SetKeepWhitespaceMode(true); 1349 } 1350 1351 ArrayRef<FormatToken *> lex() { 1352 assert(Tokens.empty()); 1353 do { 1354 Tokens.push_back(getNextToken()); 1355 } while (Tokens.back()->Tok.isNot(tok::eof)); 1356 return Tokens; 1357 } 1358 1359 IdentifierTable &getIdentTable() { return IdentTable; } 1360 1361 private: 1362 FormatToken *getNextToken() { 1363 if (GreaterStashed) { 1364 // Create a synthesized second '>' token. 1365 Token Greater = FormatTok->Tok; 1366 FormatTok = new (Allocator.Allocate()) FormatToken; 1367 FormatTok->Tok = Greater; 1368 SourceLocation GreaterLocation = 1369 FormatTok->Tok.getLocation().getLocWithOffset(1); 1370 FormatTok->WhitespaceRange = 1371 SourceRange(GreaterLocation, GreaterLocation); 1372 FormatTok->TokenText = ">"; 1373 FormatTok->CodePointCount = 1; 1374 GreaterStashed = false; 1375 return FormatTok; 1376 } 1377 1378 FormatTok = new (Allocator.Allocate()) FormatToken; 1379 readRawToken(*FormatTok); 1380 SourceLocation WhitespaceStart = 1381 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1382 if (SourceMgr.getFileOffset(WhitespaceStart) == 0) 1383 FormatTok->IsFirst = true; 1384 1385 // Consume and record whitespace until we find a significant token. 1386 unsigned WhitespaceLength = TrailingWhitespace; 1387 while (FormatTok->Tok.is(tok::unknown)) { 1388 unsigned Newlines = FormatTok->TokenText.count('\n'); 1389 if (Newlines > 0) 1390 FormatTok->LastNewlineOffset = 1391 WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1; 1392 FormatTok->NewlinesBefore += Newlines; 1393 unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n"); 1394 FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; 1395 WhitespaceLength += FormatTok->Tok.getLength(); 1396 1397 readRawToken(*FormatTok); 1398 } 1399 1400 // In case the token starts with escaped newlines, we want to 1401 // take them into account as whitespace - this pattern is quite frequent 1402 // in macro definitions. 1403 // FIXME: What do we want to do with other escaped spaces, and escaped 1404 // spaces or newlines in the middle of tokens? 1405 // FIXME: Add a more explicit test. 1406 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1407 FormatTok->TokenText[1] == '\n') { 1408 // FIXME: ++FormatTok->NewlinesBefore is missing... 1409 WhitespaceLength += 2; 1410 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1411 } 1412 1413 TrailingWhitespace = 0; 1414 if (FormatTok->Tok.is(tok::comment)) { 1415 StringRef UntrimmedText = FormatTok->TokenText; 1416 FormatTok->TokenText = FormatTok->TokenText.rtrim(); 1417 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1418 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1419 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1420 FormatTok->Tok.setIdentifierInfo(&Info); 1421 FormatTok->Tok.setKind(Info.getTokenID()); 1422 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1423 FormatTok->Tok.setKind(tok::greater); 1424 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1425 GreaterStashed = true; 1426 } 1427 1428 // Now FormatTok is the next non-whitespace token. 1429 FormatTok->CodePointCount = 1430 encoding::getCodePointCount(FormatTok->TokenText, Encoding); 1431 1432 FormatTok->WhitespaceRange = SourceRange( 1433 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1434 return FormatTok; 1435 } 1436 1437 FormatToken *FormatTok; 1438 bool GreaterStashed; 1439 unsigned TrailingWhitespace; 1440 Lexer &Lex; 1441 SourceManager &SourceMgr; 1442 IdentifierTable IdentTable; 1443 encoding::Encoding Encoding; 1444 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1445 SmallVector<FormatToken *, 16> Tokens; 1446 1447 void readRawToken(FormatToken &Tok) { 1448 Lex.LexFromRawLexer(Tok.Tok); 1449 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1450 Tok.Tok.getLength()); 1451 1452 // For formatting, treat unterminated string literals like normal string 1453 // literals. 1454 if (Tok.is(tok::unknown) && !Tok.TokenText.empty() && 1455 Tok.TokenText[0] == '"') { 1456 Tok.Tok.setKind(tok::string_literal); 1457 Tok.IsUnterminatedLiteral = true; 1458 } 1459 } 1460 }; 1461 1462 class Formatter : public UnwrappedLineConsumer { 1463 public: 1464 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1465 const std::vector<CharSourceRange> &Ranges) 1466 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1467 Whitespaces(SourceMgr, Style), Ranges(Ranges), 1468 Encoding(encoding::detectEncoding(Lex.getBuffer())) { 1469 DEBUG(llvm::dbgs() << "File encoding: " 1470 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1471 : "unknown") 1472 << "\n"); 1473 } 1474 1475 virtual ~Formatter() {} 1476 1477 tooling::Replacements format() { 1478 FormatTokenLexer Tokens(Lex, SourceMgr, Encoding); 1479 1480 UnwrappedLineParser Parser(Style, Tokens.lex(), *this); 1481 bool StructuralError = Parser.parse(); 1482 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); 1483 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1484 Annotator.annotate(AnnotatedLines[i]); 1485 } 1486 deriveLocalStyle(); 1487 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1488 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1489 } 1490 1491 // Adapt level to the next line if this is a comment. 1492 // FIXME: Can/should this be done in the UnwrappedLineParser? 1493 const AnnotatedLine *NextNonCommentLine = NULL; 1494 for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) { 1495 if (NextNonCommentLine && AnnotatedLines[i].First->is(tok::comment) && 1496 !AnnotatedLines[i].First->Next) 1497 AnnotatedLines[i].Level = NextNonCommentLine->Level; 1498 else 1499 NextNonCommentLine = AnnotatedLines[i].First->isNot(tok::r_brace) 1500 ? &AnnotatedLines[i] 1501 : NULL; 1502 } 1503 1504 std::vector<int> IndentForLevel; 1505 bool PreviousLineWasTouched = false; 1506 const FormatToken *PreviousLineLastToken = 0; 1507 bool FormatPPDirective = false; 1508 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1509 E = AnnotatedLines.end(); 1510 I != E; ++I) { 1511 const AnnotatedLine &TheLine = *I; 1512 const FormatToken *FirstTok = TheLine.First; 1513 int Offset = getIndentOffset(*TheLine.First); 1514 1515 // Check whether this line is part of a formatted preprocessor directive. 1516 if (FirstTok->HasUnescapedNewline) 1517 FormatPPDirective = false; 1518 if (!FormatPPDirective && TheLine.InPPDirective && 1519 (touchesLine(TheLine) || touchesPPDirective(I + 1, E))) 1520 FormatPPDirective = true; 1521 1522 // Determine indent and try to merge multiple unwrapped lines. 1523 while (IndentForLevel.size() <= TheLine.Level) 1524 IndentForLevel.push_back(-1); 1525 IndentForLevel.resize(TheLine.Level + 1); 1526 unsigned Indent = getIndent(IndentForLevel, TheLine.Level); 1527 if (static_cast<int>(Indent) + Offset >= 0) 1528 Indent += Offset; 1529 tryFitMultipleLinesInOne(Indent, I, E); 1530 1531 bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0; 1532 if (TheLine.First->is(tok::eof)) { 1533 if (PreviousLineWasTouched) { 1534 unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u); 1535 Whitespaces.replaceWhitespace(*TheLine.First, NewLines, /*Indent*/ 0, 1536 /*TargetColumn*/ 0); 1537 } 1538 } else if (TheLine.Type != LT_Invalid && 1539 (WasMoved || FormatPPDirective || touchesLine(TheLine))) { 1540 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1541 if (FirstTok->WhitespaceRange.isValid() && 1542 // Insert a break even if there is a structural error in case where 1543 // we break apart a line consisting of multiple unwrapped lines. 1544 (FirstTok->NewlinesBefore == 0 || !StructuralError)) { 1545 formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent, 1546 TheLine.InPPDirective); 1547 } else { 1548 Indent = LevelIndent = 1549 SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) - 1550 1; 1551 } 1552 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1553 TheLine.First, Whitespaces, Encoding, 1554 BinPackInconclusiveFunctions); 1555 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1556 IndentForLevel[TheLine.Level] = LevelIndent; 1557 PreviousLineWasTouched = true; 1558 } else { 1559 // Format the first token if necessary, and notify the WhitespaceManager 1560 // about the unchanged whitespace. 1561 for (const FormatToken *Tok = TheLine.First; Tok != NULL; 1562 Tok = Tok->Next) { 1563 if (Tok == TheLine.First && 1564 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 1565 unsigned LevelIndent = 1566 SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1; 1567 // Remove trailing whitespace of the previous line if it was 1568 // touched. 1569 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { 1570 formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent, 1571 TheLine.InPPDirective); 1572 } else { 1573 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1574 } 1575 1576 if (static_cast<int>(LevelIndent) - Offset >= 0) 1577 LevelIndent -= Offset; 1578 if (Tok->isNot(tok::comment)) 1579 IndentForLevel[TheLine.Level] = LevelIndent; 1580 } else { 1581 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1582 } 1583 } 1584 // If we did not reformat this unwrapped line, the column at the end of 1585 // the last token is unchanged - thus, we can calculate the end of the 1586 // last token. 1587 PreviousLineWasTouched = false; 1588 } 1589 PreviousLineLastToken = I->Last; 1590 } 1591 return Whitespaces.generateReplacements(); 1592 } 1593 1594 private: 1595 void deriveLocalStyle() { 1596 unsigned CountBoundToVariable = 0; 1597 unsigned CountBoundToType = 0; 1598 bool HasCpp03IncompatibleFormat = false; 1599 bool HasBinPackedFunction = false; 1600 bool HasOnePerLineFunction = false; 1601 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1602 if (!AnnotatedLines[i].First->Next) 1603 continue; 1604 FormatToken *Tok = AnnotatedLines[i].First->Next; 1605 while (Tok->Next) { 1606 if (Tok->Type == TT_PointerOrReference) { 1607 bool SpacesBefore = 1608 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1609 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1610 Tok->Next->WhitespaceRange.getEnd(); 1611 if (SpacesBefore && !SpacesAfter) 1612 ++CountBoundToVariable; 1613 else if (!SpacesBefore && SpacesAfter) 1614 ++CountBoundToType; 1615 } 1616 1617 if (Tok->Type == TT_TemplateCloser && 1618 Tok->Previous->Type == TT_TemplateCloser && 1619 Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) 1620 HasCpp03IncompatibleFormat = true; 1621 1622 if (Tok->PackingKind == PPK_BinPacked) 1623 HasBinPackedFunction = true; 1624 if (Tok->PackingKind == PPK_OnePerLine) 1625 HasOnePerLineFunction = true; 1626 1627 Tok = Tok->Next; 1628 } 1629 } 1630 if (Style.DerivePointerBinding) { 1631 if (CountBoundToType > CountBoundToVariable) 1632 Style.PointerBindsToType = true; 1633 else if (CountBoundToType < CountBoundToVariable) 1634 Style.PointerBindsToType = false; 1635 } 1636 if (Style.Standard == FormatStyle::LS_Auto) { 1637 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1638 : FormatStyle::LS_Cpp03; 1639 } 1640 BinPackInconclusiveFunctions = 1641 HasBinPackedFunction || !HasOnePerLineFunction; 1642 } 1643 1644 /// \brief Get the indent of \p Level from \p IndentForLevel. 1645 /// 1646 /// \p IndentForLevel must contain the indent for the level \c l 1647 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1648 /// that level is unknown. 1649 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1650 if (IndentForLevel[Level] != -1) 1651 return IndentForLevel[Level]; 1652 if (Level == 0) 1653 return 0; 1654 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 1655 } 1656 1657 /// \brief Get the offset of the line relatively to the level. 1658 /// 1659 /// For example, 'public:' labels in classes are offset by 1 or 2 1660 /// characters to the left from their level. 1661 int getIndentOffset(const FormatToken &RootToken) { 1662 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 1663 return Style.AccessModifierOffset; 1664 return 0; 1665 } 1666 1667 /// \brief Tries to merge lines into one. 1668 /// 1669 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1670 /// if possible; note that \c I will be incremented when lines are merged. 1671 void tryFitMultipleLinesInOne(unsigned Indent, 1672 std::vector<AnnotatedLine>::iterator &I, 1673 std::vector<AnnotatedLine>::iterator E) { 1674 // We can never merge stuff if there are trailing line comments. 1675 if (I->Last->Type == TT_LineComment) 1676 return; 1677 1678 if (Indent > Style.ColumnLimit) 1679 return; 1680 1681 unsigned Limit = Style.ColumnLimit - Indent; 1682 // If we already exceed the column limit, we set 'Limit' to 0. The different 1683 // tryMerge..() functions can then decide whether to still do merging. 1684 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1685 1686 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1687 return; 1688 1689 if (I->Last->is(tok::l_brace)) { 1690 tryMergeSimpleBlock(I, E, Limit); 1691 } else if (Style.AllowShortIfStatementsOnASingleLine && 1692 I->First->is(tok::kw_if)) { 1693 tryMergeSimpleControlStatement(I, E, Limit); 1694 } else if (Style.AllowShortLoopsOnASingleLine && 1695 I->First->isOneOf(tok::kw_for, tok::kw_while)) { 1696 tryMergeSimpleControlStatement(I, E, Limit); 1697 } else if (I->InPPDirective && 1698 (I->First->HasUnescapedNewline || I->First->IsFirst)) { 1699 tryMergeSimplePPDirective(I, E, Limit); 1700 } 1701 } 1702 1703 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1704 std::vector<AnnotatedLine>::iterator E, 1705 unsigned Limit) { 1706 if (Limit == 0) 1707 return; 1708 AnnotatedLine &Line = *I; 1709 if (!(I + 1)->InPPDirective || (I + 1)->First->HasUnescapedNewline) 1710 return; 1711 if (I + 2 != E && (I + 2)->InPPDirective && 1712 !(I + 2)->First->HasUnescapedNewline) 1713 return; 1714 if (1 + (I + 1)->Last->TotalLength > Limit) 1715 return; 1716 join(Line, *(++I)); 1717 } 1718 1719 void tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator &I, 1720 std::vector<AnnotatedLine>::iterator E, 1721 unsigned Limit) { 1722 if (Limit == 0) 1723 return; 1724 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman && 1725 (I + 1)->First->is(tok::l_brace)) 1726 return; 1727 if ((I + 1)->InPPDirective != I->InPPDirective || 1728 ((I + 1)->InPPDirective && (I + 1)->First->HasUnescapedNewline)) 1729 return; 1730 AnnotatedLine &Line = *I; 1731 if (Line.Last->isNot(tok::r_paren)) 1732 return; 1733 if (1 + (I + 1)->Last->TotalLength > Limit) 1734 return; 1735 if ((I + 1)->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 1736 tok::kw_while) || 1737 (I + 1)->First->Type == TT_LineComment) 1738 return; 1739 // Only inline simple if's (no nested if or else). 1740 if (I + 2 != E && Line.First->is(tok::kw_if) && 1741 (I + 2)->First->is(tok::kw_else)) 1742 return; 1743 join(Line, *(++I)); 1744 } 1745 1746 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1747 std::vector<AnnotatedLine>::iterator E, 1748 unsigned Limit) { 1749 // No merging if the brace already is on the next line. 1750 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 1751 return; 1752 1753 // First, check that the current line allows merging. This is the case if 1754 // we're not in a control flow statement and the last token is an opening 1755 // brace. 1756 AnnotatedLine &Line = *I; 1757 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 1758 tok::kw_else, tok::kw_try, tok::kw_catch, 1759 tok::kw_for, 1760 // This gets rid of all ObjC @ keywords and methods. 1761 tok::at, tok::minus, tok::plus)) 1762 return; 1763 1764 FormatToken *Tok = (I + 1)->First; 1765 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 1766 (Tok->getNextNonComment() == NULL || 1767 Tok->getNextNonComment()->is(tok::semi))) { 1768 // We merge empty blocks even if the line exceeds the column limit. 1769 Tok->SpacesRequiredBefore = 0; 1770 Tok->CanBreakBefore = true; 1771 join(Line, *(I + 1)); 1772 I += 1; 1773 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 1774 // Check that we still have three lines and they fit into the limit. 1775 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1776 !nextTwoLinesFitInto(I, Limit)) 1777 return; 1778 1779 // Second, check that the next line does not contain any braces - if it 1780 // does, readability declines when putting it into a single line. 1781 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1782 return; 1783 do { 1784 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 1785 return; 1786 Tok = Tok->Next; 1787 } while (Tok != NULL); 1788 1789 // Last, check that the third line contains a single closing brace. 1790 Tok = (I + 2)->First; 1791 if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || 1792 Tok->MustBreakBefore) 1793 return; 1794 1795 join(Line, *(I + 1)); 1796 join(Line, *(I + 2)); 1797 I += 2; 1798 } 1799 } 1800 1801 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1802 unsigned Limit) { 1803 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1804 Limit; 1805 } 1806 1807 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1808 assert(!A.Last->Next); 1809 assert(!B.First->Previous); 1810 A.Last->Next = B.First; 1811 B.First->Previous = A.Last; 1812 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 1813 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 1814 Tok->TotalLength += LengthA; 1815 A.Last = Tok; 1816 } 1817 } 1818 1819 bool touchesRanges(const CharSourceRange &Range) { 1820 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1821 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1822 Ranges[i].getBegin()) && 1823 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1824 Range.getBegin())) 1825 return true; 1826 } 1827 return false; 1828 } 1829 1830 bool touchesLine(const AnnotatedLine &TheLine) { 1831 const FormatToken *First = TheLine.First; 1832 const FormatToken *Last = TheLine.Last; 1833 CharSourceRange LineRange = CharSourceRange::getCharRange( 1834 First->WhitespaceRange.getBegin().getLocWithOffset( 1835 First->LastNewlineOffset), 1836 Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1)); 1837 return touchesRanges(LineRange); 1838 } 1839 1840 bool touchesPPDirective(std::vector<AnnotatedLine>::iterator I, 1841 std::vector<AnnotatedLine>::iterator E) { 1842 for (; I != E; ++I) { 1843 if (I->First->HasUnescapedNewline) 1844 return false; 1845 if (touchesLine(*I)) 1846 return true; 1847 } 1848 return false; 1849 } 1850 1851 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1852 const FormatToken *First = TheLine.First; 1853 CharSourceRange LineRange = CharSourceRange::getCharRange( 1854 First->WhitespaceRange.getBegin(), 1855 First->WhitespaceRange.getBegin().getLocWithOffset( 1856 First->LastNewlineOffset)); 1857 return touchesRanges(LineRange); 1858 } 1859 1860 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1861 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1862 } 1863 1864 /// \brief Add a new line and the required indent before the first Token 1865 /// of the \c UnwrappedLine if there was no structural parsing error. 1866 /// Returns the indent level of the \c UnwrappedLine. 1867 void formatFirstToken(const FormatToken &RootToken, 1868 const FormatToken *PreviousToken, unsigned Indent, 1869 bool InPPDirective) { 1870 unsigned Newlines = 1871 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1872 // Remove empty lines before "}" where applicable. 1873 if (RootToken.is(tok::r_brace) && 1874 (!RootToken.Next || 1875 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 1876 Newlines = std::min(Newlines, 1u); 1877 if (Newlines == 0 && !RootToken.IsFirst) 1878 Newlines = 1; 1879 1880 // Insert extra new line before access specifiers. 1881 if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && 1882 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 1883 ++Newlines; 1884 1885 Whitespaces.replaceWhitespace( 1886 RootToken, Newlines, Indent, Indent, 1887 InPPDirective && !RootToken.HasUnescapedNewline); 1888 } 1889 1890 FormatStyle Style; 1891 Lexer &Lex; 1892 SourceManager &SourceMgr; 1893 WhitespaceManager Whitespaces; 1894 std::vector<CharSourceRange> Ranges; 1895 std::vector<AnnotatedLine> AnnotatedLines; 1896 1897 encoding::Encoding Encoding; 1898 bool BinPackInconclusiveFunctions; 1899 }; 1900 1901 } // end anonymous namespace 1902 1903 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1904 SourceManager &SourceMgr, 1905 std::vector<CharSourceRange> Ranges) { 1906 Formatter formatter(Style, Lex, SourceMgr, Ranges); 1907 return formatter.format(); 1908 } 1909 1910 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1911 std::vector<tooling::Range> Ranges, 1912 StringRef FileName) { 1913 FileManager Files((FileSystemOptions())); 1914 DiagnosticsEngine Diagnostics( 1915 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1916 new DiagnosticOptions); 1917 SourceManager SourceMgr(Diagnostics, Files); 1918 llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1919 const clang::FileEntry *Entry = 1920 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1921 SourceMgr.overrideFileContents(Entry, Buf); 1922 FileID ID = 1923 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1924 Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, 1925 getFormattingLangOpts(Style.Standard)); 1926 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1927 std::vector<CharSourceRange> CharRanges; 1928 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1929 SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); 1930 SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); 1931 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1932 } 1933 return reformat(Style, Lex, SourceMgr, CharRanges); 1934 } 1935 1936 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { 1937 LangOptions LangOpts; 1938 LangOpts.CPlusPlus = 1; 1939 LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1940 LangOpts.LineComment = 1; 1941 LangOpts.Bool = 1; 1942 LangOpts.ObjC1 = 1; 1943 LangOpts.ObjC2 = 1; 1944 return LangOpts; 1945 } 1946 1947 } // namespace format 1948 } // namespace clang 1949