1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "BreakableToken.h" 19 #include "TokenAnnotator.h" 20 #include "UnwrappedLineParser.h" 21 #include "WhitespaceManager.h" 22 #include "clang/Basic/Diagnostic.h" 23 #include "clang/Basic/OperatorPrecedence.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Format/Format.h" 26 #include "clang/Lex/Lexer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/Support/Allocator.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/YAMLTraits.h" 31 #include <queue> 32 #include <string> 33 34 namespace llvm { 35 namespace yaml { 36 template <> 37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> { 38 static void enumeration(IO &IO, 39 clang::format::FormatStyle::LanguageStandard &Value) { 40 IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03); 41 IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11); 42 IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto); 43 } 44 }; 45 46 template <> 47 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> { 48 static void 49 enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) { 50 IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach); 51 IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux); 52 IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup); 53 IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman); 54 } 55 }; 56 57 template <> 58 struct ScalarEnumerationTraits< 59 clang::format::FormatStyle::NamespaceIndentationKind> { 60 static void 61 enumeration(IO &IO, 62 clang::format::FormatStyle::NamespaceIndentationKind &Value) { 63 IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None); 64 IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner); 65 IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All); 66 } 67 }; 68 69 template <> struct MappingTraits<clang::format::FormatStyle> { 70 static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) { 71 if (IO.outputting()) { 72 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", "Mozilla" }; 73 ArrayRef<StringRef> Styles(StylesArray); 74 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 75 StringRef StyleName(Styles[i]); 76 clang::format::FormatStyle PredefinedStyle; 77 if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) && 78 Style == PredefinedStyle) { 79 IO.mapOptional("# BasedOnStyle", StyleName); 80 break; 81 } 82 } 83 } else { 84 StringRef BasedOnStyle; 85 IO.mapOptional("BasedOnStyle", BasedOnStyle); 86 if (!BasedOnStyle.empty()) 87 if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) { 88 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 89 return; 90 } 91 } 92 93 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 94 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 95 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); 96 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 97 Style.AllowAllParametersOfDeclarationOnNextLine); 98 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 99 Style.AllowShortIfStatementsOnASingleLine); 100 IO.mapOptional("AllowShortLoopsOnASingleLine", 101 Style.AllowShortLoopsOnASingleLine); 102 IO.mapOptional("AlwaysBreakTemplateDeclarations", 103 Style.AlwaysBreakTemplateDeclarations); 104 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 105 Style.AlwaysBreakBeforeMultilineStrings); 106 IO.mapOptional("BreakBeforeBinaryOperators", 107 Style.BreakBeforeBinaryOperators); 108 IO.mapOptional("BreakConstructorInitializersBeforeComma", 109 Style.BreakConstructorInitializersBeforeComma); 110 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 111 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 112 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 113 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 114 IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); 115 IO.mapOptional("ExperimentalAutoDetectBinPacking", 116 Style.ExperimentalAutoDetectBinPacking); 117 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 118 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 119 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); 120 IO.mapOptional("ObjCSpaceBeforeProtocolList", 121 Style.ObjCSpaceBeforeProtocolList); 122 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 123 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 124 IO.mapOptional("PenaltyBreakFirstLessLess", 125 Style.PenaltyBreakFirstLessLess); 126 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 127 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 128 Style.PenaltyReturnTypeOnItsOwnLine); 129 IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); 130 IO.mapOptional("SpacesBeforeTrailingComments", 131 Style.SpacesBeforeTrailingComments); 132 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); 133 IO.mapOptional("Standard", Style.Standard); 134 IO.mapOptional("IndentWidth", Style.IndentWidth); 135 IO.mapOptional("UseTab", Style.UseTab); 136 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 137 IO.mapOptional("IndentFunctionDeclarationAfterType", 138 Style.IndentFunctionDeclarationAfterType); 139 } 140 }; 141 } 142 } 143 144 namespace clang { 145 namespace format { 146 147 void setDefaultPenalties(FormatStyle &Style) { 148 Style.PenaltyBreakComment = 45; 149 Style.PenaltyBreakFirstLessLess = 120; 150 Style.PenaltyBreakString = 1000; 151 Style.PenaltyExcessCharacter = 1000000; 152 } 153 154 FormatStyle getLLVMStyle() { 155 FormatStyle LLVMStyle; 156 LLVMStyle.AccessModifierOffset = -2; 157 LLVMStyle.AlignEscapedNewlinesLeft = false; 158 LLVMStyle.AlignTrailingComments = true; 159 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 160 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 161 LLVMStyle.AllowShortLoopsOnASingleLine = false; 162 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 163 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 164 LLVMStyle.BinPackParameters = true; 165 LLVMStyle.BreakBeforeBinaryOperators = false; 166 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 167 LLVMStyle.BreakConstructorInitializersBeforeComma = false; 168 LLVMStyle.ColumnLimit = 80; 169 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 170 LLVMStyle.Cpp11BracedListStyle = false; 171 LLVMStyle.DerivePointerBinding = false; 172 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 173 LLVMStyle.IndentCaseLabels = false; 174 LLVMStyle.IndentFunctionDeclarationAfterType = false; 175 LLVMStyle.IndentWidth = 2; 176 LLVMStyle.MaxEmptyLinesToKeep = 1; 177 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; 178 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 179 LLVMStyle.PointerBindsToType = false; 180 LLVMStyle.SpacesBeforeTrailingComments = 1; 181 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 182 LLVMStyle.UseTab = false; 183 184 setDefaultPenalties(LLVMStyle); 185 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 186 187 return LLVMStyle; 188 } 189 190 FormatStyle getGoogleStyle() { 191 FormatStyle GoogleStyle; 192 GoogleStyle.AccessModifierOffset = -1; 193 GoogleStyle.AlignEscapedNewlinesLeft = true; 194 GoogleStyle.AlignTrailingComments = true; 195 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 196 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 197 GoogleStyle.AllowShortLoopsOnASingleLine = true; 198 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 199 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 200 GoogleStyle.BinPackParameters = true; 201 GoogleStyle.BreakBeforeBinaryOperators = false; 202 GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 203 GoogleStyle.BreakConstructorInitializersBeforeComma = false; 204 GoogleStyle.ColumnLimit = 80; 205 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 206 GoogleStyle.Cpp11BracedListStyle = true; 207 GoogleStyle.DerivePointerBinding = true; 208 GoogleStyle.ExperimentalAutoDetectBinPacking = false; 209 GoogleStyle.IndentCaseLabels = true; 210 GoogleStyle.IndentFunctionDeclarationAfterType = true; 211 GoogleStyle.IndentWidth = 2; 212 GoogleStyle.MaxEmptyLinesToKeep = 1; 213 GoogleStyle.NamespaceIndentation = FormatStyle::NI_None; 214 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 215 GoogleStyle.PointerBindsToType = true; 216 GoogleStyle.SpacesBeforeTrailingComments = 2; 217 GoogleStyle.Standard = FormatStyle::LS_Auto; 218 GoogleStyle.UseTab = false; 219 220 setDefaultPenalties(GoogleStyle); 221 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 222 223 return GoogleStyle; 224 } 225 226 FormatStyle getChromiumStyle() { 227 FormatStyle ChromiumStyle = getGoogleStyle(); 228 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 229 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 230 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 231 ChromiumStyle.BinPackParameters = false; 232 ChromiumStyle.DerivePointerBinding = false; 233 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 234 return ChromiumStyle; 235 } 236 237 FormatStyle getMozillaStyle() { 238 FormatStyle MozillaStyle = getLLVMStyle(); 239 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 240 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 241 MozillaStyle.DerivePointerBinding = true; 242 MozillaStyle.IndentCaseLabels = true; 243 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 244 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 245 MozillaStyle.PointerBindsToType = true; 246 return MozillaStyle; 247 } 248 249 FormatStyle getWebKitStyle() { 250 FormatStyle Style = getLLVMStyle(); 251 Style.AccessModifierOffset = -4; 252 Style.AlignTrailingComments = false; 253 Style.BreakBeforeBinaryOperators = true; 254 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup; 255 Style.BreakConstructorInitializersBeforeComma = true; 256 Style.ColumnLimit = 0; 257 Style.IndentWidth = 4; 258 Style.NamespaceIndentation = FormatStyle::NI_Inner; 259 Style.PointerBindsToType = true; 260 return Style; 261 } 262 263 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) { 264 if (Name.equals_lower("llvm")) 265 *Style = getLLVMStyle(); 266 else if (Name.equals_lower("chromium")) 267 *Style = getChromiumStyle(); 268 else if (Name.equals_lower("mozilla")) 269 *Style = getMozillaStyle(); 270 else if (Name.equals_lower("google")) 271 *Style = getGoogleStyle(); 272 else if (Name.equals_lower("webkit")) 273 *Style = getWebKitStyle(); 274 else 275 return false; 276 277 return true; 278 } 279 280 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 281 if (Text.trim().empty()) 282 return llvm::make_error_code(llvm::errc::invalid_argument); 283 llvm::yaml::Input Input(Text); 284 Input >> *Style; 285 return Input.error(); 286 } 287 288 std::string configurationAsText(const FormatStyle &Style) { 289 std::string Text; 290 llvm::raw_string_ostream Stream(Text); 291 llvm::yaml::Output Output(Stream); 292 // We use the same mapping method for input and output, so we need a non-const 293 // reference here. 294 FormatStyle NonConstStyle = Style; 295 Output << NonConstStyle; 296 return Stream.str(); 297 } 298 299 // Returns the length of everything up to the first possible line break after 300 // the ), ], } or > matching \c Tok. 301 static unsigned getLengthToMatchingParen(const FormatToken &Tok) { 302 if (Tok.MatchingParen == NULL) 303 return 0; 304 FormatToken *End = Tok.MatchingParen; 305 while (End->Next && !End->Next->CanBreakBefore) { 306 End = End->Next; 307 } 308 return End->TotalLength - Tok.TotalLength + 1; 309 } 310 311 namespace { 312 313 class UnwrappedLineFormatter { 314 public: 315 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 316 const AnnotatedLine &Line, unsigned FirstIndent, 317 const FormatToken *RootToken, 318 WhitespaceManager &Whitespaces, 319 encoding::Encoding Encoding, 320 bool BinPackInconclusiveFunctions) 321 : Style(Style), SourceMgr(SourceMgr), Line(Line), 322 FirstIndent(FirstIndent), RootToken(RootToken), 323 Whitespaces(Whitespaces), Count(0), Encoding(Encoding), 324 BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {} 325 326 /// \brief Formats an \c UnwrappedLine. 327 void format(const AnnotatedLine *NextLine) { 328 // Initialize state dependent on indent. 329 LineState State; 330 State.Column = FirstIndent; 331 State.NextToken = RootToken; 332 State.Stack.push_back(ParenState(FirstIndent, FirstIndent, 333 /*AvoidBinPacking=*/false, 334 /*NoLineBreak=*/false)); 335 State.LineContainsContinuedForLoopSection = false; 336 State.ParenLevel = 0; 337 State.StartOfStringLiteral = 0; 338 State.StartOfLineLevel = State.ParenLevel; 339 State.LowestLevelOnLine = State.ParenLevel; 340 State.IgnoreStackForComparison = false; 341 342 // The first token has already been indented and thus consumed. 343 moveStateToNextToken(State, /*DryRun=*/false, /*Newline=*/false); 344 345 if (Style.ColumnLimit == 0) { 346 formatWithoutColumnLimit(State); 347 return; 348 } 349 350 // If everything fits on a single line, just put it there. 351 unsigned ColumnLimit = Style.ColumnLimit; 352 if (NextLine && NextLine->InPPDirective && 353 !NextLine->First->HasUnescapedNewline) 354 ColumnLimit = getColumnLimit(); 355 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 356 while (State.NextToken != NULL) { 357 addTokenToState(false, false, State); 358 } 359 } 360 361 // If the ObjC method declaration does not fit on a line, we should format 362 // it with one arg per line. 363 if (Line.Type == LT_ObjCMethodDecl) 364 State.Stack.back().BreakBeforeParameter = true; 365 366 // Find best solution in solution space. 367 analyzeSolutionSpace(State); 368 } 369 370 private: 371 void DebugTokenState(const FormatToken &FormatTok) { 372 const Token &Tok = FormatTok.Tok; 373 llvm::dbgs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 374 Tok.getLength()); 375 llvm::dbgs(); 376 } 377 378 struct ParenState { 379 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 380 bool NoLineBreak) 381 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 382 BreakBeforeClosingBrace(false), QuestionColumn(0), 383 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 384 NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), 385 StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), 386 CallContinuation(0), VariablePos(0), ContainsLineBreak(false) {} 387 388 /// \brief The position to which a specific parenthesis level needs to be 389 /// indented. 390 unsigned Indent; 391 392 /// \brief The position of the last space on each level. 393 /// 394 /// Used e.g. to break like: 395 /// functionCall(Parameter, otherCall( 396 /// OtherParameter)); 397 unsigned LastSpace; 398 399 /// \brief The position the first "<<" operator encountered on each level. 400 /// 401 /// Used to align "<<" operators. 0 if no such operator has been encountered 402 /// on a level. 403 unsigned FirstLessLess; 404 405 /// \brief Whether a newline needs to be inserted before the block's closing 406 /// brace. 407 /// 408 /// We only want to insert a newline before the closing brace if there also 409 /// was a newline after the beginning left brace. 410 bool BreakBeforeClosingBrace; 411 412 /// \brief The column of a \c ? in a conditional expression; 413 unsigned QuestionColumn; 414 415 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 416 /// lines, in this context. 417 bool AvoidBinPacking; 418 419 /// \brief Break after the next comma (or all the commas in this context if 420 /// \c AvoidBinPacking is \c true). 421 bool BreakBeforeParameter; 422 423 /// \brief Line breaking in this context would break a formatting rule. 424 bool NoLineBreak; 425 426 /// \brief The position of the colon in an ObjC method declaration/call. 427 unsigned ColonPos; 428 429 /// \brief The start of the most recent function in a builder-type call. 430 unsigned StartOfFunctionCall; 431 432 /// \brief Contains the start of array subscript expressions, so that they 433 /// can be aligned. 434 unsigned StartOfArraySubscripts; 435 436 /// \brief If a nested name specifier was broken over multiple lines, this 437 /// contains the start column of the second line. Otherwise 0. 438 unsigned NestedNameSpecifierContinuation; 439 440 /// \brief If a call expression was broken over multiple lines, this 441 /// contains the start column of the second line. Otherwise 0. 442 unsigned CallContinuation; 443 444 /// \brief The column of the first variable name in a variable declaration. 445 /// 446 /// Used to align further variables if necessary. 447 unsigned VariablePos; 448 449 /// \brief \c true if this \c ParenState already contains a line-break. 450 /// 451 /// The first line break in a certain \c ParenState causes extra penalty so 452 /// that clang-format prefers similar breaks, i.e. breaks in the same 453 /// parenthesis. 454 bool ContainsLineBreak; 455 456 bool operator<(const ParenState &Other) const { 457 if (Indent != Other.Indent) 458 return Indent < Other.Indent; 459 if (LastSpace != Other.LastSpace) 460 return LastSpace < Other.LastSpace; 461 if (FirstLessLess != Other.FirstLessLess) 462 return FirstLessLess < Other.FirstLessLess; 463 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 464 return BreakBeforeClosingBrace; 465 if (QuestionColumn != Other.QuestionColumn) 466 return QuestionColumn < Other.QuestionColumn; 467 if (AvoidBinPacking != Other.AvoidBinPacking) 468 return AvoidBinPacking; 469 if (BreakBeforeParameter != Other.BreakBeforeParameter) 470 return BreakBeforeParameter; 471 if (NoLineBreak != Other.NoLineBreak) 472 return NoLineBreak; 473 if (ColonPos != Other.ColonPos) 474 return ColonPos < Other.ColonPos; 475 if (StartOfFunctionCall != Other.StartOfFunctionCall) 476 return StartOfFunctionCall < Other.StartOfFunctionCall; 477 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 478 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 479 if (CallContinuation != Other.CallContinuation) 480 return CallContinuation < Other.CallContinuation; 481 if (VariablePos != Other.VariablePos) 482 return VariablePos < Other.VariablePos; 483 if (ContainsLineBreak != Other.ContainsLineBreak) 484 return ContainsLineBreak < Other.ContainsLineBreak; 485 return false; 486 } 487 }; 488 489 /// \brief The current state when indenting a unwrapped line. 490 /// 491 /// As the indenting tries different combinations this is copied by value. 492 struct LineState { 493 /// \brief The number of used columns in the current line. 494 unsigned Column; 495 496 /// \brief The token that needs to be next formatted. 497 const FormatToken *NextToken; 498 499 /// \brief \c true if this line contains a continued for-loop section. 500 bool LineContainsContinuedForLoopSection; 501 502 /// \brief The level of nesting inside (), [], <> and {}. 503 unsigned ParenLevel; 504 505 /// \brief The \c ParenLevel at the start of this line. 506 unsigned StartOfLineLevel; 507 508 /// \brief The lowest \c ParenLevel on the current line. 509 unsigned LowestLevelOnLine; 510 511 /// \brief The start column of the string literal, if we're in a string 512 /// literal sequence, 0 otherwise. 513 unsigned StartOfStringLiteral; 514 515 /// \brief A stack keeping track of properties applying to parenthesis 516 /// levels. 517 std::vector<ParenState> Stack; 518 519 /// \brief Ignore the stack of \c ParenStates for state comparison. 520 /// 521 /// In long and deeply nested unwrapped lines, the current algorithm can 522 /// be insufficient for finding the best formatting with a reasonable amount 523 /// of time and memory. Setting this flag will effectively lead to the 524 /// algorithm not analyzing some combinations. However, these combinations 525 /// rarely contain the optimal solution: In short, accepting a higher 526 /// penalty early would need to lead to different values in the \c 527 /// ParenState stack (in an otherwise identical state) and these different 528 /// values would need to lead to a significant amount of avoided penalty 529 /// later. 530 /// 531 /// FIXME: Come up with a better algorithm instead. 532 bool IgnoreStackForComparison; 533 534 /// \brief Comparison operator to be able to used \c LineState in \c map. 535 bool operator<(const LineState &Other) const { 536 if (NextToken != Other.NextToken) 537 return NextToken < Other.NextToken; 538 if (Column != Other.Column) 539 return Column < Other.Column; 540 if (LineContainsContinuedForLoopSection != 541 Other.LineContainsContinuedForLoopSection) 542 return LineContainsContinuedForLoopSection; 543 if (ParenLevel != Other.ParenLevel) 544 return ParenLevel < Other.ParenLevel; 545 if (StartOfLineLevel != Other.StartOfLineLevel) 546 return StartOfLineLevel < Other.StartOfLineLevel; 547 if (LowestLevelOnLine != Other.LowestLevelOnLine) 548 return LowestLevelOnLine < Other.LowestLevelOnLine; 549 if (StartOfStringLiteral != Other.StartOfStringLiteral) 550 return StartOfStringLiteral < Other.StartOfStringLiteral; 551 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 552 return false; 553 return Stack < Other.Stack; 554 } 555 }; 556 557 /// \brief Formats the line starting at \p State, simply keeping all of the 558 /// input's line breaking decisions. 559 void formatWithoutColumnLimit(LineState &State) { 560 while (State.NextToken != NULL) { 561 bool Newline = mustBreak(State) || 562 (canBreak(State) && State.NextToken->NewlinesBefore > 0); 563 addTokenToState(Newline, /*DryRun=*/false, State); 564 } 565 } 566 567 /// \brief Appends the next token to \p State and updates information 568 /// necessary for indentation. 569 /// 570 /// Puts the token on the current line if \p Newline is \c false and adds a 571 /// line break and necessary indentation otherwise. 572 /// 573 /// If \p DryRun is \c false, also creates and stores the required 574 /// \c Replacement. 575 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 576 const FormatToken &Current = *State.NextToken; 577 const FormatToken &Previous = *State.NextToken->Previous; 578 579 // Extra penalty that needs to be added because of the way certain line 580 // breaks are chosen. 581 unsigned ExtraPenalty = 0; 582 583 if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) { 584 // FIXME: Is this correct? 585 int WhitespaceLength = SourceMgr.getSpellingColumnNumber( 586 State.NextToken->WhitespaceRange.getEnd()) - 587 SourceMgr.getSpellingColumnNumber( 588 State.NextToken->WhitespaceRange.getBegin()); 589 State.Column += WhitespaceLength + State.NextToken->CodePointCount; 590 State.NextToken = State.NextToken->Next; 591 return 0; 592 } 593 594 // If we are continuing an expression, we want to indent an extra 4 spaces. 595 unsigned ContinuationIndent = 596 std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4; 597 if (Newline) { 598 // Breaking before the first "<<" is generally not desirable if the LHS is 599 // short. 600 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 && 601 State.Column <= Style.ColumnLimit / 2) 602 ExtraPenalty += Style.PenaltyBreakFirstLessLess; 603 604 State.Stack.back().ContainsLineBreak = true; 605 if (Current.is(tok::r_brace)) { 606 if (Current.BlockKind == BK_BracedInit) 607 State.Column = State.Stack[State.Stack.size() - 2].LastSpace; 608 else 609 State.Column = FirstIndent; 610 } else if (Current.is(tok::string_literal) && 611 State.StartOfStringLiteral != 0) { 612 State.Column = State.StartOfStringLiteral; 613 State.Stack.back().BreakBeforeParameter = true; 614 } else if (Current.is(tok::lessless) && 615 State.Stack.back().FirstLessLess != 0) { 616 State.Column = State.Stack.back().FirstLessLess; 617 } else if (Current.isOneOf(tok::period, tok::arrow) && 618 Current.Type != TT_DesignatedInitializerPeriod) { 619 if (State.Stack.back().CallContinuation == 0) { 620 State.Column = ContinuationIndent; 621 State.Stack.back().CallContinuation = State.Column; 622 } else { 623 State.Column = State.Stack.back().CallContinuation; 624 } 625 } else if (Current.Type == TT_ConditionalExpr) { 626 State.Column = State.Stack.back().QuestionColumn; 627 } else if (Previous.is(tok::comma) && 628 State.Stack.back().VariablePos != 0) { 629 State.Column = State.Stack.back().VariablePos; 630 } else if (Previous.ClosesTemplateDeclaration || 631 ((Current.Type == TT_StartOfName || 632 Current.is(tok::kw_operator)) && 633 State.ParenLevel == 0 && 634 (!Style.IndentFunctionDeclarationAfterType || 635 Line.StartsDefinition))) { 636 State.Column = State.Stack.back().Indent; 637 } else if (Current.Type == TT_ObjCSelectorName) { 638 if (State.Stack.back().ColonPos > Current.CodePointCount) { 639 State.Column = State.Stack.back().ColonPos - Current.CodePointCount; 640 } else { 641 State.Column = State.Stack.back().Indent; 642 State.Stack.back().ColonPos = State.Column + Current.CodePointCount; 643 } 644 } else if (Current.is(tok::l_square) && 645 Current.Type != TT_ObjCMethodExpr) { 646 if (State.Stack.back().StartOfArraySubscripts != 0) 647 State.Column = State.Stack.back().StartOfArraySubscripts; 648 else 649 State.Column = ContinuationIndent; 650 } else if (Current.Type == TT_StartOfName || 651 Previous.isOneOf(tok::coloncolon, tok::equal) || 652 Previous.Type == TT_ObjCMethodExpr) { 653 State.Column = ContinuationIndent; 654 } else { 655 State.Column = State.Stack.back().Indent; 656 // Ensure that we fall back to indenting 4 spaces instead of just 657 // flushing continuations left. 658 if (State.Column == FirstIndent) 659 State.Column += 4; 660 } 661 662 if (Current.is(tok::question)) 663 State.Stack.back().BreakBeforeParameter = true; 664 if ((Previous.isOneOf(tok::comma, tok::semi) && 665 !State.Stack.back().AvoidBinPacking) || 666 Previous.Type == TT_BinaryOperator) 667 State.Stack.back().BreakBeforeParameter = false; 668 if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0) 669 State.Stack.back().BreakBeforeParameter = false; 670 671 if (!DryRun) { 672 unsigned NewLines = 1; 673 if (Current.is(tok::comment)) 674 NewLines = std::max( 675 NewLines, 676 std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); 677 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 678 State.Column, Line.InPPDirective); 679 } 680 681 if (!Current.isTrailingComment()) 682 State.Stack.back().LastSpace = State.Column; 683 if (Current.isOneOf(tok::arrow, tok::period) && 684 Current.Type != TT_DesignatedInitializerPeriod) 685 State.Stack.back().LastSpace += Current.CodePointCount; 686 State.StartOfLineLevel = State.ParenLevel; 687 State.LowestLevelOnLine = State.ParenLevel; 688 689 // Any break on this level means that the parent level has been broken 690 // and we need to avoid bin packing there. 691 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 692 State.Stack[i].BreakBeforeParameter = true; 693 } 694 const FormatToken *TokenBefore = Current.getPreviousNonComment(); 695 if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) && 696 TokenBefore->Type != TT_TemplateCloser && 697 TokenBefore->Type != TT_BinaryOperator && !TokenBefore->opensScope()) 698 State.Stack.back().BreakBeforeParameter = true; 699 700 // If we break after {, we should also break before the corresponding }. 701 if (Previous.is(tok::l_brace)) 702 State.Stack.back().BreakBeforeClosingBrace = true; 703 704 if (State.Stack.back().AvoidBinPacking) { 705 // If we are breaking after '(', '{', '<', this is not bin packing 706 // unless AllowAllParametersOfDeclarationOnNextLine is false. 707 if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || 708 Previous.Type == TT_BinaryOperator) || 709 (!Style.AllowAllParametersOfDeclarationOnNextLine && 710 Line.MustBeDeclaration)) 711 State.Stack.back().BreakBeforeParameter = true; 712 } 713 714 } else { 715 if (Current.is(tok::equal) && 716 (RootToken->is(tok::kw_for) || State.ParenLevel == 0) && 717 State.Stack.back().VariablePos == 0) { 718 State.Stack.back().VariablePos = State.Column; 719 // Move over * and & if they are bound to the variable name. 720 const FormatToken *Tok = &Previous; 721 while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) { 722 State.Stack.back().VariablePos -= Tok->CodePointCount; 723 if (Tok->SpacesRequiredBefore != 0) 724 break; 725 Tok = Tok->Previous; 726 } 727 if (Previous.PartOfMultiVariableDeclStmt) 728 State.Stack.back().LastSpace = State.Stack.back().VariablePos; 729 } 730 731 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 732 733 if (!DryRun) 734 Whitespaces.replaceWhitespace(Current, 0, Spaces, 735 State.Column + Spaces); 736 737 if (Current.Type == TT_ObjCSelectorName && 738 State.Stack.back().ColonPos == 0) { 739 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 740 State.Column + Spaces + Current.CodePointCount) 741 State.Stack.back().ColonPos = 742 State.Stack.back().Indent + Current.LongestObjCSelectorName; 743 else 744 State.Stack.back().ColonPos = 745 State.Column + Spaces + Current.CodePointCount; 746 } 747 748 if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && 749 Current.Type != TT_LineComment) 750 State.Stack.back().Indent = State.Column + Spaces; 751 if (Previous.is(tok::comma) && !Current.isTrailingComment() && 752 State.Stack.back().AvoidBinPacking) 753 State.Stack.back().NoLineBreak = true; 754 755 State.Column += Spaces; 756 if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) 757 // Treat the condition inside an if as if it was a second function 758 // parameter, i.e. let nested calls have an indent of 4. 759 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 760 else if (Previous.is(tok::comma)) 761 State.Stack.back().LastSpace = State.Column; 762 else if ((Previous.Type == TT_BinaryOperator || 763 Previous.Type == TT_ConditionalExpr || 764 Previous.Type == TT_CtorInitializerColon) && 765 !(Previous.getPrecedence() == prec::Assignment && 766 Current.FakeLParens.empty())) 767 // Always indent relative to the RHS of the expression unless this is a 768 // simple assignment without binary expression on the RHS. 769 State.Stack.back().LastSpace = State.Column; 770 else if (Previous.Type == TT_InheritanceColon) 771 State.Stack.back().Indent = State.Column; 772 else if (Previous.opensScope()) { 773 // If a function has multiple parameters (including a single parameter 774 // that is a binary expression) or a trailing call, indent all 775 // parameters from the opening parenthesis. This avoids confusing 776 // indents like: 777 // OuterFunction(InnerFunctionCall( 778 // ParameterToInnerFunction), 779 // SecondParameterToOuterFunction); 780 bool HasMultipleParameters = !Current.FakeLParens.empty(); 781 bool HasTrailingCall = false; 782 if (Previous.MatchingParen) { 783 const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); 784 if (Next && Next->isOneOf(tok::period, tok::arrow)) 785 HasTrailingCall = true; 786 } 787 if (HasMultipleParameters || HasTrailingCall) 788 State.Stack.back().LastSpace = State.Column; 789 } 790 } 791 792 return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty; 793 } 794 795 /// \brief Mark the next token as consumed in \p State and modify its stacks 796 /// accordingly. 797 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline) { 798 const FormatToken &Current = *State.NextToken; 799 assert(State.Stack.size()); 800 801 if (Current.Type == TT_InheritanceColon) 802 State.Stack.back().AvoidBinPacking = true; 803 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 804 State.Stack.back().FirstLessLess = State.Column; 805 if (Current.is(tok::l_square) && 806 State.Stack.back().StartOfArraySubscripts == 0) 807 State.Stack.back().StartOfArraySubscripts = State.Column; 808 if (Current.is(tok::question)) 809 State.Stack.back().QuestionColumn = State.Column; 810 if (!Current.opensScope() && !Current.closesScope()) 811 State.LowestLevelOnLine = 812 std::min(State.LowestLevelOnLine, State.ParenLevel); 813 if (Current.isOneOf(tok::period, tok::arrow) && 814 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 815 State.Stack.back().StartOfFunctionCall = 816 Current.LastInChainOfCalls ? 0 817 : State.Column + Current.CodePointCount; 818 if (Current.Type == TT_CtorInitializerColon) { 819 // Indent 2 from the column, so: 820 // SomeClass::SomeClass() 821 // : First(...), ... 822 // Next(...) 823 // ^ line up here. 824 if (!Style.BreakConstructorInitializersBeforeComma) 825 State.Stack.back().Indent = State.Column + 2; 826 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 827 State.Stack.back().AvoidBinPacking = true; 828 State.Stack.back().BreakBeforeParameter = false; 829 } 830 831 // If return returns a binary expression, align after it. 832 if (Current.is(tok::kw_return) && !Current.FakeLParens.empty()) 833 State.Stack.back().LastSpace = State.Column + 7; 834 835 // In ObjC method declaration we align on the ":" of parameters, but we need 836 // to ensure that we indent parameters on subsequent lines by at least 4. 837 if (Current.Type == TT_ObjCMethodSpecifier) 838 State.Stack.back().Indent += 4; 839 840 // Insert scopes created by fake parenthesis. 841 const FormatToken *Previous = Current.getPreviousNonComment(); 842 // Don't add extra indentation for the first fake parenthesis after 843 // 'return', assignements or opening <({[. The indentation for these cases 844 // is special cased. 845 bool SkipFirstExtraIndent = 846 Current.is(tok::kw_return) || 847 (Previous && (Previous->opensScope() || 848 Previous->getPrecedence() == prec::Assignment)); 849 for (SmallVectorImpl<prec::Level>::const_reverse_iterator 850 I = Current.FakeLParens.rbegin(), 851 E = Current.FakeLParens.rend(); 852 I != E; ++I) { 853 ParenState NewParenState = State.Stack.back(); 854 NewParenState.ContainsLineBreak = false; 855 NewParenState.Indent = 856 std::max(std::max(State.Column, NewParenState.Indent), 857 State.Stack.back().LastSpace); 858 859 // Always indent conditional expressions. Never indent expression where 860 // the 'operator' is ',', ';' or an assignment (i.e. *I <= 861 // prec::Assignment) as those have different indentation rules. Indent 862 // other expression, unless the indentation needs to be skipped. 863 if (*I == prec::Conditional || 864 (!SkipFirstExtraIndent && *I > prec::Assignment && 865 !Style.BreakBeforeBinaryOperators)) 866 NewParenState.Indent += 4; 867 if (Previous && !Previous->opensScope()) 868 NewParenState.BreakBeforeParameter = false; 869 State.Stack.push_back(NewParenState); 870 SkipFirstExtraIndent = false; 871 } 872 873 // If we encounter an opening (, [, { or <, we add a level to our stacks to 874 // prepare for the following tokens. 875 if (Current.opensScope()) { 876 unsigned NewIndent; 877 unsigned LastSpace = State.Stack.back().LastSpace; 878 bool AvoidBinPacking; 879 if (Current.is(tok::l_brace)) { 880 NewIndent = 881 LastSpace + (Style.Cpp11BracedListStyle ? 4 : Style.IndentWidth); 882 const FormatToken *NextNoComment = Current.getNextNonComment(); 883 AvoidBinPacking = NextNoComment && 884 NextNoComment->Type == TT_DesignatedInitializerPeriod; 885 } else { 886 NewIndent = 887 4 + std::max(LastSpace, State.Stack.back().StartOfFunctionCall); 888 AvoidBinPacking = !Style.BinPackParameters || 889 (Style.ExperimentalAutoDetectBinPacking && 890 (Current.PackingKind == PPK_OnePerLine || 891 (!BinPackInconclusiveFunctions && 892 Current.PackingKind == PPK_Inconclusive))); 893 } 894 895 State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking, 896 State.Stack.back().NoLineBreak)); 897 ++State.ParenLevel; 898 } 899 900 // If this '[' opens an ObjC call, determine whether all parameters fit into 901 // one line and put one per line if they don't. 902 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 903 Current.MatchingParen != NULL) { 904 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 905 State.Stack.back().BreakBeforeParameter = true; 906 } 907 908 // If we encounter a closing ), ], } or >, we can remove a level from our 909 // stacks. 910 if (Current.isOneOf(tok::r_paren, tok::r_square) || 911 (Current.is(tok::r_brace) && State.NextToken != RootToken) || 912 State.NextToken->Type == TT_TemplateCloser) { 913 State.Stack.pop_back(); 914 --State.ParenLevel; 915 } 916 if (Current.is(tok::r_square)) { 917 // If this ends the array subscript expr, reset the corresponding value. 918 const FormatToken *NextNonComment = Current.getNextNonComment(); 919 if (NextNonComment && NextNonComment->isNot(tok::l_square)) 920 State.Stack.back().StartOfArraySubscripts = 0; 921 } 922 923 // Remove scopes created by fake parenthesis. 924 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 925 unsigned VariablePos = State.Stack.back().VariablePos; 926 State.Stack.pop_back(); 927 State.Stack.back().VariablePos = VariablePos; 928 } 929 930 if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) { 931 State.StartOfStringLiteral = State.Column; 932 } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash, 933 tok::string_literal)) { 934 State.StartOfStringLiteral = 0; 935 } 936 937 State.Column += Current.CodePointCount; 938 939 State.NextToken = State.NextToken->Next; 940 941 if (!Newline && Style.AlwaysBreakBeforeMultilineStrings && 942 Current.is(tok::string_literal) && Current.CanBreakBefore) 943 return 0; 944 945 return breakProtrudingToken(Current, State, DryRun); 946 } 947 948 /// \brief If the current token sticks out over the end of the line, break 949 /// it if possible. 950 /// 951 /// \returns An extra penalty if a token was broken, otherwise 0. 952 /// 953 /// The returned penalty will cover the cost of the additional line breaks and 954 /// column limit violation in all lines except for the last one. The penalty 955 /// for the column limit violation in the last line (and in single line 956 /// tokens) is handled in \c addNextStateToQueue. 957 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 958 bool DryRun) { 959 llvm::OwningPtr<BreakableToken> Token; 960 unsigned StartColumn = State.Column - Current.CodePointCount; 961 unsigned OriginalStartColumn = 962 SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 963 1; 964 965 if (Current.is(tok::string_literal) && 966 Current.Type != TT_ImplicitStringLiteral) { 967 // Only break up default narrow strings. 968 if (!Current.TokenText.startswith("\"")) 969 return 0; 970 // Don't break string literals with escaped newlines. As clang-format must 971 // not change the string's content, it is unlikely that we'll end up with 972 // a better format. 973 if (Current.TokenText.find("\\\n") != StringRef::npos) 974 return 0; 975 // Exempts unterminated string literals from line breaking. The user will 976 // likely want to terminate the string before any line breaking is done. 977 if (Current.IsUnterminatedLiteral) 978 return 0; 979 980 Token.reset(new BreakableStringLiteral(Current, StartColumn, 981 Line.InPPDirective, Encoding)); 982 } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) { 983 Token.reset(new BreakableBlockComment( 984 Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, 985 Line.InPPDirective, Encoding)); 986 } else if (Current.Type == TT_LineComment && 987 (Current.Previous == NULL || 988 Current.Previous->Type != TT_ImplicitStringLiteral)) { 989 // Don't break line comments with escaped newlines. These look like 990 // separate line comments, but in fact contain a single line comment with 991 // multiple lines including leading whitespace and the '//' markers. 992 // 993 // FIXME: If we want to handle them correctly, we'll need to adjust 994 // leading whitespace in consecutive lines when changing indentation of 995 // the first line similar to what we do with block comments. 996 StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n"); 997 if (EscapedNewlinePos != StringRef::npos) { 998 State.Column = 999 StartColumn + 1000 encoding::getCodePointCount( 1001 Current.TokenText.substr(0, EscapedNewlinePos), Encoding) + 1002 1; 1003 return 0; 1004 } 1005 1006 Token.reset(new BreakableLineComment(Current, StartColumn, 1007 Line.InPPDirective, Encoding)); 1008 } else { 1009 return 0; 1010 } 1011 if (Current.UnbreakableTailLength >= getColumnLimit()) 1012 return 0; 1013 1014 unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength; 1015 bool BreakInserted = false; 1016 unsigned Penalty = 0; 1017 unsigned RemainingTokenColumns = 0; 1018 for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); 1019 LineIndex != EndIndex; ++LineIndex) { 1020 if (!DryRun) 1021 Token->replaceWhitespaceBefore(LineIndex, Whitespaces); 1022 unsigned TailOffset = 0; 1023 RemainingTokenColumns = Token->getLineLengthAfterSplit( 1024 LineIndex, TailOffset, StringRef::npos); 1025 while (RemainingTokenColumns > RemainingSpace) { 1026 BreakableToken::Split Split = 1027 Token->getSplit(LineIndex, TailOffset, getColumnLimit()); 1028 if (Split.first == StringRef::npos) { 1029 // The last line's penalty is handled in addNextStateToQueue(). 1030 if (LineIndex < EndIndex - 1) 1031 Penalty += Style.PenaltyExcessCharacter * 1032 (RemainingTokenColumns - RemainingSpace); 1033 break; 1034 } 1035 assert(Split.first != 0); 1036 unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( 1037 LineIndex, TailOffset + Split.first + Split.second, 1038 StringRef::npos); 1039 assert(NewRemainingTokenColumns < RemainingTokenColumns); 1040 if (!DryRun) 1041 Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); 1042 Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString 1043 : Style.PenaltyBreakComment; 1044 unsigned ColumnsUsed = 1045 Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); 1046 if (ColumnsUsed > getColumnLimit()) { 1047 Penalty += 1048 Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit()); 1049 } 1050 TailOffset += Split.first + Split.second; 1051 RemainingTokenColumns = NewRemainingTokenColumns; 1052 BreakInserted = true; 1053 } 1054 } 1055 1056 State.Column = RemainingTokenColumns; 1057 1058 if (BreakInserted) { 1059 // If we break the token inside a parameter list, we need to break before 1060 // the next parameter on all levels, so that the next parameter is clearly 1061 // visible. Line comments already introduce a break. 1062 if (Current.Type != TT_LineComment) { 1063 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 1064 State.Stack[i].BreakBeforeParameter = true; 1065 } 1066 1067 State.Stack.back().LastSpace = StartColumn; 1068 } 1069 return Penalty; 1070 } 1071 1072 unsigned getColumnLimit() { 1073 // In preprocessor directives reserve two chars for trailing " \" 1074 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 1075 } 1076 1077 /// \brief An edge in the solution space from \c Previous->State to \c State, 1078 /// inserting a newline dependent on the \c NewLine. 1079 struct StateNode { 1080 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 1081 : State(State), NewLine(NewLine), Previous(Previous) {} 1082 LineState State; 1083 bool NewLine; 1084 StateNode *Previous; 1085 }; 1086 1087 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 1088 /// 1089 /// In case of equal penalties, we want to prefer states that were inserted 1090 /// first. During state generation we make sure that we insert states first 1091 /// that break the line as late as possible. 1092 typedef std::pair<unsigned, unsigned> OrderedPenalty; 1093 1094 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 1095 /// \c State has the given \c OrderedPenalty. 1096 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 1097 1098 /// \brief The BFS queue type. 1099 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 1100 std::greater<QueueItem> > QueueType; 1101 1102 /// \brief Analyze the entire solution space starting from \p InitialState. 1103 /// 1104 /// This implements a variant of Dijkstra's algorithm on the graph that spans 1105 /// the solution space (\c LineStates are the nodes). The algorithm tries to 1106 /// find the shortest path (the one with lowest penalty) from \p InitialState 1107 /// to a state where all tokens are placed. 1108 void analyzeSolutionSpace(LineState &InitialState) { 1109 std::set<LineState> Seen; 1110 1111 // Insert start element into queue. 1112 StateNode *Node = 1113 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 1114 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 1115 ++Count; 1116 1117 // While not empty, take first element and follow edges. 1118 while (!Queue.empty()) { 1119 unsigned Penalty = Queue.top().first.first; 1120 StateNode *Node = Queue.top().second; 1121 if (Node->State.NextToken == NULL) { 1122 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 1123 break; 1124 } 1125 Queue.pop(); 1126 1127 // Cut off the analysis of certain solutions if the analysis gets too 1128 // complex. See description of IgnoreStackForComparison. 1129 if (Count > 10000) 1130 Node->State.IgnoreStackForComparison = true; 1131 1132 if (!Seen.insert(Node->State).second) 1133 // State already examined with lower penalty. 1134 continue; 1135 1136 addNextStateToQueue(Penalty, Node, /*NewLine=*/false); 1137 addNextStateToQueue(Penalty, Node, /*NewLine=*/true); 1138 } 1139 1140 if (Queue.empty()) 1141 // We were unable to find a solution, do nothing. 1142 // FIXME: Add diagnostic? 1143 return; 1144 1145 // Reconstruct the solution. 1146 reconstructPath(InitialState, Queue.top().second); 1147 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 1148 DEBUG(llvm::dbgs() << "---\n"); 1149 } 1150 1151 void reconstructPath(LineState &State, StateNode *Current) { 1152 std::deque<StateNode *> Path; 1153 // We do not need a break before the initial token. 1154 while (Current->Previous) { 1155 Path.push_front(Current); 1156 Current = Current->Previous; 1157 } 1158 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 1159 I != E; ++I) { 1160 DEBUG({ 1161 if ((*I)->NewLine) { 1162 llvm::dbgs() << "Penalty for splitting before " 1163 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1164 << (*I)->Previous->State.NextToken->SplitPenalty << "\n"; 1165 } 1166 }); 1167 addTokenToState((*I)->NewLine, false, State); 1168 } 1169 } 1170 1171 /// \brief Add the following state to the analysis queue \c Queue. 1172 /// 1173 /// Assume the current state is \p PreviousNode and has been reached with a 1174 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1175 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1176 bool NewLine) { 1177 if (NewLine && !canBreak(PreviousNode->State)) 1178 return; 1179 if (!NewLine && mustBreak(PreviousNode->State)) 1180 return; 1181 if (NewLine) { 1182 if (!PreviousNode->State.Stack.back().ContainsLineBreak) 1183 Penalty += 15; 1184 Penalty += PreviousNode->State.NextToken->SplitPenalty; 1185 } 1186 1187 StateNode *Node = new (Allocator.Allocate()) 1188 StateNode(PreviousNode->State, NewLine, PreviousNode); 1189 Penalty += addTokenToState(NewLine, true, Node->State); 1190 if (Node->State.Column > getColumnLimit()) { 1191 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 1192 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 1193 } 1194 1195 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 1196 ++Count; 1197 } 1198 1199 /// \brief Returns \c true, if a line break after \p State is allowed. 1200 bool canBreak(const LineState &State) { 1201 const FormatToken &Current = *State.NextToken; 1202 const FormatToken &Previous = *Current.Previous; 1203 assert(&Previous == Current.Previous); 1204 if (!Current.CanBreakBefore && 1205 !(Current.is(tok::r_brace) && 1206 State.Stack.back().BreakBeforeClosingBrace)) 1207 return false; 1208 // The opening "{" of a braced list has to be on the same line as the first 1209 // element if it is nested in another braced init list or function call. 1210 if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && 1211 Previous.Previous && 1212 Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) 1213 return false; 1214 // This prevents breaks like: 1215 // ... 1216 // SomeParameter, OtherParameter).DoSomething( 1217 // ... 1218 // As they hide "DoSomething" and are generally bad for readability. 1219 if (Previous.opensScope() && 1220 State.LowestLevelOnLine < State.StartOfLineLevel) 1221 return false; 1222 return !State.Stack.back().NoLineBreak; 1223 } 1224 1225 /// \brief Returns \c true, if a line break after \p State is mandatory. 1226 bool mustBreak(const LineState &State) { 1227 const FormatToken &Current = *State.NextToken; 1228 const FormatToken &Previous = *Current.Previous; 1229 if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon) 1230 return true; 1231 if (!Style.Cpp11BracedListStyle && Current.is(tok::r_brace) && 1232 State.Stack.back().BreakBeforeClosingBrace) 1233 return true; 1234 if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) 1235 return true; 1236 if (Style.BreakConstructorInitializersBeforeComma) { 1237 if (Previous.Type == TT_CtorInitializerComma) 1238 return false; 1239 if (Current.Type == TT_CtorInitializerComma) 1240 return true; 1241 } 1242 if ((Previous.isOneOf(tok::comma, tok::semi) || Current.is(tok::question) || 1243 (Current.Type == TT_ConditionalExpr && 1244 !(Current.is(tok::colon) && Previous.is(tok::question)))) && 1245 State.Stack.back().BreakBeforeParameter && 1246 !Current.isTrailingComment() && 1247 !Current.isOneOf(tok::r_paren, tok::r_brace)) 1248 return true; 1249 if (Style.AlwaysBreakBeforeMultilineStrings && 1250 State.Column > State.Stack.back().Indent && 1251 Current.is(tok::string_literal) && Previous.isNot(tok::lessless) && 1252 Previous.Type != TT_InlineASMColon && 1253 ((Current.getNextNonComment() && 1254 Current.getNextNonComment()->is(tok::string_literal)) || 1255 (Current.TokenText.find("\\\n") != StringRef::npos))) 1256 return true; 1257 1258 if (!Style.BreakBeforeBinaryOperators) { 1259 // If we need to break somewhere inside the LHS of a binary expression, we 1260 // should also break after the operator. Otherwise, the formatting would 1261 // hide the operator precedence, e.g. in: 1262 // if (aaaaaaaaaaaaaa == 1263 // bbbbbbbbbbbbbb && c) {.. 1264 // For comparisons, we only apply this rule, if the LHS is a binary 1265 // expression itself as otherwise, the line breaks seem superfluous. 1266 // We need special cases for ">>" which we have split into two ">" while 1267 // lexing in order to make template parsing easier. 1268 // 1269 // FIXME: We'll need something similar for styles that break before binary 1270 // operators. 1271 bool IsComparison = (Previous.getPrecedence() == prec::Relational || 1272 Previous.getPrecedence() == prec::Equality) && 1273 Previous.Previous && Previous.Previous->Type != 1274 TT_BinaryOperator; // For >>. 1275 bool LHSIsBinaryExpr = 1276 Previous.Previous && Previous.Previous->FakeRParens > 0; 1277 if (Previous.Type == TT_BinaryOperator && 1278 (!IsComparison || LHSIsBinaryExpr) && 1279 Current.Type != TT_BinaryOperator && // For >>. 1280 !Current.isTrailingComment() && 1281 !Previous.isOneOf(tok::lessless, tok::question) && 1282 Previous.getPrecedence() != prec::Assignment && 1283 State.Stack.back().BreakBeforeParameter) 1284 return true; 1285 } 1286 1287 // Same as above, but for the first "<<" operator. 1288 if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter && 1289 State.Stack.back().FirstLessLess == 0) 1290 return true; 1291 1292 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 1293 // out whether it is the first parameter. Clean this up. 1294 if (Current.Type == TT_ObjCSelectorName && 1295 Current.LongestObjCSelectorName == 0 && 1296 State.Stack.back().BreakBeforeParameter) 1297 return true; 1298 if ((Current.Type == TT_CtorInitializerColon || 1299 (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0))) 1300 return true; 1301 1302 if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) && 1303 Line.MightBeFunctionDecl && State.Stack.back().BreakBeforeParameter && 1304 State.ParenLevel == 0) 1305 return true; 1306 return false; 1307 } 1308 1309 // Returns the total number of columns required for the remaining tokens. 1310 unsigned getRemainingLength(const LineState &State) { 1311 if (State.NextToken && State.NextToken->Previous) 1312 return Line.Last->TotalLength - State.NextToken->Previous->TotalLength; 1313 return 0; 1314 } 1315 1316 FormatStyle Style; 1317 SourceManager &SourceMgr; 1318 const AnnotatedLine &Line; 1319 const unsigned FirstIndent; 1320 const FormatToken *RootToken; 1321 WhitespaceManager &Whitespaces; 1322 1323 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1324 QueueType Queue; 1325 // Increasing count of \c StateNode items we have created. This is used 1326 // to create a deterministic order independent of the container. 1327 unsigned Count; 1328 encoding::Encoding Encoding; 1329 bool BinPackInconclusiveFunctions; 1330 }; 1331 1332 class FormatTokenLexer { 1333 public: 1334 FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, 1335 encoding::Encoding Encoding) 1336 : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), 1337 SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()), 1338 Encoding(Encoding) { 1339 Lex.SetKeepWhitespaceMode(true); 1340 } 1341 1342 ArrayRef<FormatToken *> lex() { 1343 assert(Tokens.empty()); 1344 do { 1345 Tokens.push_back(getNextToken()); 1346 } while (Tokens.back()->Tok.isNot(tok::eof)); 1347 return Tokens; 1348 } 1349 1350 IdentifierTable &getIdentTable() { return IdentTable; } 1351 1352 private: 1353 FormatToken *getNextToken() { 1354 if (GreaterStashed) { 1355 // Create a synthesized second '>' token. 1356 Token Greater = FormatTok->Tok; 1357 FormatTok = new (Allocator.Allocate()) FormatToken; 1358 FormatTok->Tok = Greater; 1359 SourceLocation GreaterLocation = 1360 FormatTok->Tok.getLocation().getLocWithOffset(1); 1361 FormatTok->WhitespaceRange = 1362 SourceRange(GreaterLocation, GreaterLocation); 1363 FormatTok->TokenText = ">"; 1364 FormatTok->CodePointCount = 1; 1365 GreaterStashed = false; 1366 return FormatTok; 1367 } 1368 1369 FormatTok = new (Allocator.Allocate()) FormatToken; 1370 readRawToken(*FormatTok); 1371 SourceLocation WhitespaceStart = 1372 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1373 if (SourceMgr.getFileOffset(WhitespaceStart) == 0) 1374 FormatTok->IsFirst = true; 1375 1376 // Consume and record whitespace until we find a significant token. 1377 unsigned WhitespaceLength = TrailingWhitespace; 1378 while (FormatTok->Tok.is(tok::unknown)) { 1379 unsigned Newlines = FormatTok->TokenText.count('\n'); 1380 if (Newlines > 0) 1381 FormatTok->LastNewlineOffset = 1382 WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1; 1383 FormatTok->NewlinesBefore += Newlines; 1384 unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n"); 1385 FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; 1386 WhitespaceLength += FormatTok->Tok.getLength(); 1387 1388 readRawToken(*FormatTok); 1389 } 1390 1391 // In case the token starts with escaped newlines, we want to 1392 // take them into account as whitespace - this pattern is quite frequent 1393 // in macro definitions. 1394 // FIXME: What do we want to do with other escaped spaces, and escaped 1395 // spaces or newlines in the middle of tokens? 1396 // FIXME: Add a more explicit test. 1397 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && 1398 FormatTok->TokenText[1] == '\n') { 1399 // FIXME: ++FormatTok->NewlinesBefore is missing... 1400 WhitespaceLength += 2; 1401 FormatTok->TokenText = FormatTok->TokenText.substr(2); 1402 } 1403 1404 TrailingWhitespace = 0; 1405 if (FormatTok->Tok.is(tok::comment)) { 1406 StringRef UntrimmedText = FormatTok->TokenText; 1407 FormatTok->TokenText = FormatTok->TokenText.rtrim(); 1408 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); 1409 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1410 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); 1411 FormatTok->Tok.setIdentifierInfo(&Info); 1412 FormatTok->Tok.setKind(Info.getTokenID()); 1413 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1414 FormatTok->Tok.setKind(tok::greater); 1415 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); 1416 GreaterStashed = true; 1417 } 1418 1419 // Now FormatTok is the next non-whitespace token. 1420 FormatTok->CodePointCount = 1421 encoding::getCodePointCount(FormatTok->TokenText, Encoding); 1422 1423 FormatTok->WhitespaceRange = SourceRange( 1424 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1425 return FormatTok; 1426 } 1427 1428 FormatToken *FormatTok; 1429 bool GreaterStashed; 1430 unsigned TrailingWhitespace; 1431 Lexer &Lex; 1432 SourceManager &SourceMgr; 1433 IdentifierTable IdentTable; 1434 encoding::Encoding Encoding; 1435 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1436 SmallVector<FormatToken *, 16> Tokens; 1437 1438 void readRawToken(FormatToken &Tok) { 1439 Lex.LexFromRawLexer(Tok.Tok); 1440 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 1441 Tok.Tok.getLength()); 1442 1443 // For formatting, treat unterminated string literals like normal string 1444 // literals. 1445 if (Tok.is(tok::unknown) && !Tok.TokenText.empty() && 1446 Tok.TokenText[0] == '"') { 1447 Tok.Tok.setKind(tok::string_literal); 1448 Tok.IsUnterminatedLiteral = true; 1449 } 1450 } 1451 }; 1452 1453 class Formatter : public UnwrappedLineConsumer { 1454 public: 1455 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1456 const std::vector<CharSourceRange> &Ranges) 1457 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1458 Whitespaces(SourceMgr, Style), Ranges(Ranges), 1459 Encoding(encoding::detectEncoding(Lex.getBuffer())) { 1460 DEBUG(llvm::dbgs() << "File encoding: " 1461 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1462 : "unknown") 1463 << "\n"); 1464 } 1465 1466 virtual ~Formatter() {} 1467 1468 tooling::Replacements format() { 1469 FormatTokenLexer Tokens(Lex, SourceMgr, Encoding); 1470 1471 UnwrappedLineParser Parser(Style, Tokens.lex(), *this); 1472 bool StructuralError = Parser.parse(); 1473 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); 1474 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1475 Annotator.annotate(AnnotatedLines[i]); 1476 } 1477 deriveLocalStyle(); 1478 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1479 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1480 } 1481 1482 // Adapt level to the next line if this is a comment. 1483 // FIXME: Can/should this be done in the UnwrappedLineParser? 1484 const AnnotatedLine *NextNonCommentLine = NULL; 1485 for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) { 1486 if (NextNonCommentLine && AnnotatedLines[i].First->is(tok::comment) && 1487 !AnnotatedLines[i].First->Next) 1488 AnnotatedLines[i].Level = NextNonCommentLine->Level; 1489 else 1490 NextNonCommentLine = AnnotatedLines[i].First->isNot(tok::r_brace) 1491 ? &AnnotatedLines[i] 1492 : NULL; 1493 } 1494 1495 std::vector<int> IndentForLevel; 1496 bool PreviousLineWasTouched = false; 1497 const FormatToken *PreviousLineLastToken = 0; 1498 bool FormatPPDirective = false; 1499 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1500 E = AnnotatedLines.end(); 1501 I != E; ++I) { 1502 const AnnotatedLine &TheLine = *I; 1503 const FormatToken *FirstTok = TheLine.First; 1504 int Offset = getIndentOffset(*TheLine.First); 1505 1506 // Check whether this line is part of a formatted preprocessor directive. 1507 if (FirstTok->HasUnescapedNewline) 1508 FormatPPDirective = false; 1509 if (!FormatPPDirective && TheLine.InPPDirective && 1510 (touchesLine(TheLine) || touchesPPDirective(I + 1, E))) 1511 FormatPPDirective = true; 1512 1513 // Determine indent and try to merge multiple unwrapped lines. 1514 while (IndentForLevel.size() <= TheLine.Level) 1515 IndentForLevel.push_back(-1); 1516 IndentForLevel.resize(TheLine.Level + 1); 1517 unsigned Indent = getIndent(IndentForLevel, TheLine.Level); 1518 if (static_cast<int>(Indent) + Offset >= 0) 1519 Indent += Offset; 1520 tryFitMultipleLinesInOne(Indent, I, E); 1521 1522 bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0; 1523 if (TheLine.First->is(tok::eof)) { 1524 if (PreviousLineWasTouched) { 1525 unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u); 1526 Whitespaces.replaceWhitespace(*TheLine.First, NewLines, /*Indent*/ 0, 1527 /*TargetColumn*/ 0); 1528 } 1529 } else if (TheLine.Type != LT_Invalid && 1530 (WasMoved || FormatPPDirective || touchesLine(TheLine))) { 1531 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1532 if (FirstTok->WhitespaceRange.isValid() && 1533 // Insert a break even if there is a structural error in case where 1534 // we break apart a line consisting of multiple unwrapped lines. 1535 (FirstTok->NewlinesBefore == 0 || !StructuralError)) { 1536 formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent, 1537 TheLine.InPPDirective); 1538 } else { 1539 Indent = LevelIndent = 1540 SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) - 1541 1; 1542 } 1543 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1544 TheLine.First, Whitespaces, Encoding, 1545 BinPackInconclusiveFunctions); 1546 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1547 IndentForLevel[TheLine.Level] = LevelIndent; 1548 PreviousLineWasTouched = true; 1549 } else { 1550 // Format the first token if necessary, and notify the WhitespaceManager 1551 // about the unchanged whitespace. 1552 for (const FormatToken *Tok = TheLine.First; Tok != NULL; 1553 Tok = Tok->Next) { 1554 if (Tok == TheLine.First && 1555 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 1556 unsigned LevelIndent = 1557 SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1; 1558 // Remove trailing whitespace of the previous line if it was 1559 // touched. 1560 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { 1561 formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent, 1562 TheLine.InPPDirective); 1563 } else { 1564 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1565 } 1566 1567 if (static_cast<int>(LevelIndent) - Offset >= 0) 1568 LevelIndent -= Offset; 1569 if (Tok->isNot(tok::comment)) 1570 IndentForLevel[TheLine.Level] = LevelIndent; 1571 } else { 1572 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1573 } 1574 } 1575 // If we did not reformat this unwrapped line, the column at the end of 1576 // the last token is unchanged - thus, we can calculate the end of the 1577 // last token. 1578 PreviousLineWasTouched = false; 1579 } 1580 PreviousLineLastToken = I->Last; 1581 } 1582 return Whitespaces.generateReplacements(); 1583 } 1584 1585 private: 1586 void deriveLocalStyle() { 1587 unsigned CountBoundToVariable = 0; 1588 unsigned CountBoundToType = 0; 1589 bool HasCpp03IncompatibleFormat = false; 1590 bool HasBinPackedFunction = false; 1591 bool HasOnePerLineFunction = false; 1592 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1593 if (!AnnotatedLines[i].First->Next) 1594 continue; 1595 FormatToken *Tok = AnnotatedLines[i].First->Next; 1596 while (Tok->Next) { 1597 if (Tok->Type == TT_PointerOrReference) { 1598 bool SpacesBefore = 1599 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1600 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1601 Tok->Next->WhitespaceRange.getEnd(); 1602 if (SpacesBefore && !SpacesAfter) 1603 ++CountBoundToVariable; 1604 else if (!SpacesBefore && SpacesAfter) 1605 ++CountBoundToType; 1606 } 1607 1608 if (Tok->Type == TT_TemplateCloser && 1609 Tok->Previous->Type == TT_TemplateCloser && 1610 Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) 1611 HasCpp03IncompatibleFormat = true; 1612 1613 if (Tok->PackingKind == PPK_BinPacked) 1614 HasBinPackedFunction = true; 1615 if (Tok->PackingKind == PPK_OnePerLine) 1616 HasOnePerLineFunction = true; 1617 1618 Tok = Tok->Next; 1619 } 1620 } 1621 if (Style.DerivePointerBinding) { 1622 if (CountBoundToType > CountBoundToVariable) 1623 Style.PointerBindsToType = true; 1624 else if (CountBoundToType < CountBoundToVariable) 1625 Style.PointerBindsToType = false; 1626 } 1627 if (Style.Standard == FormatStyle::LS_Auto) { 1628 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1629 : FormatStyle::LS_Cpp03; 1630 } 1631 BinPackInconclusiveFunctions = 1632 HasBinPackedFunction || !HasOnePerLineFunction; 1633 } 1634 1635 /// \brief Get the indent of \p Level from \p IndentForLevel. 1636 /// 1637 /// \p IndentForLevel must contain the indent for the level \c l 1638 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1639 /// that level is unknown. 1640 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1641 if (IndentForLevel[Level] != -1) 1642 return IndentForLevel[Level]; 1643 if (Level == 0) 1644 return 0; 1645 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 1646 } 1647 1648 /// \brief Get the offset of the line relatively to the level. 1649 /// 1650 /// For example, 'public:' labels in classes are offset by 1 or 2 1651 /// characters to the left from their level. 1652 int getIndentOffset(const FormatToken &RootToken) { 1653 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 1654 return Style.AccessModifierOffset; 1655 return 0; 1656 } 1657 1658 /// \brief Tries to merge lines into one. 1659 /// 1660 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1661 /// if possible; note that \c I will be incremented when lines are merged. 1662 void tryFitMultipleLinesInOne(unsigned Indent, 1663 std::vector<AnnotatedLine>::iterator &I, 1664 std::vector<AnnotatedLine>::iterator E) { 1665 // We can never merge stuff if there are trailing line comments. 1666 if (I->Last->Type == TT_LineComment) 1667 return; 1668 1669 if (Indent > Style.ColumnLimit) 1670 return; 1671 1672 unsigned Limit = Style.ColumnLimit - Indent; 1673 // If we already exceed the column limit, we set 'Limit' to 0. The different 1674 // tryMerge..() functions can then decide whether to still do merging. 1675 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1676 1677 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1678 return; 1679 1680 if (I->Last->is(tok::l_brace)) { 1681 tryMergeSimpleBlock(I, E, Limit); 1682 } else if (Style.AllowShortIfStatementsOnASingleLine && 1683 I->First->is(tok::kw_if)) { 1684 tryMergeSimpleControlStatement(I, E, Limit); 1685 } else if (Style.AllowShortLoopsOnASingleLine && 1686 I->First->isOneOf(tok::kw_for, tok::kw_while)) { 1687 tryMergeSimpleControlStatement(I, E, Limit); 1688 } else if (I->InPPDirective && 1689 (I->First->HasUnescapedNewline || I->First->IsFirst)) { 1690 tryMergeSimplePPDirective(I, E, Limit); 1691 } 1692 } 1693 1694 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1695 std::vector<AnnotatedLine>::iterator E, 1696 unsigned Limit) { 1697 if (Limit == 0) 1698 return; 1699 AnnotatedLine &Line = *I; 1700 if (!(I + 1)->InPPDirective || (I + 1)->First->HasUnescapedNewline) 1701 return; 1702 if (I + 2 != E && (I + 2)->InPPDirective && 1703 !(I + 2)->First->HasUnescapedNewline) 1704 return; 1705 if (1 + (I + 1)->Last->TotalLength > Limit) 1706 return; 1707 join(Line, *(++I)); 1708 } 1709 1710 void tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator &I, 1711 std::vector<AnnotatedLine>::iterator E, 1712 unsigned Limit) { 1713 if (Limit == 0) 1714 return; 1715 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman && 1716 (I + 1)->First->is(tok::l_brace)) 1717 return; 1718 if ((I + 1)->InPPDirective != I->InPPDirective || 1719 ((I + 1)->InPPDirective && (I + 1)->First->HasUnescapedNewline)) 1720 return; 1721 AnnotatedLine &Line = *I; 1722 if (Line.Last->isNot(tok::r_paren)) 1723 return; 1724 if (1 + (I + 1)->Last->TotalLength > Limit) 1725 return; 1726 if ((I + 1)->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 1727 tok::kw_while) || 1728 (I + 1)->First->Type == TT_LineComment) 1729 return; 1730 // Only inline simple if's (no nested if or else). 1731 if (I + 2 != E && Line.First->is(tok::kw_if) && 1732 (I + 2)->First->is(tok::kw_else)) 1733 return; 1734 join(Line, *(++I)); 1735 } 1736 1737 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1738 std::vector<AnnotatedLine>::iterator E, 1739 unsigned Limit) { 1740 // No merging if the brace already is on the next line. 1741 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 1742 return; 1743 1744 // First, check that the current line allows merging. This is the case if 1745 // we're not in a control flow statement and the last token is an opening 1746 // brace. 1747 AnnotatedLine &Line = *I; 1748 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 1749 tok::kw_else, tok::kw_try, tok::kw_catch, 1750 tok::kw_for, 1751 // This gets rid of all ObjC @ keywords and methods. 1752 tok::at, tok::minus, tok::plus)) 1753 return; 1754 1755 FormatToken *Tok = (I + 1)->First; 1756 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 1757 (Tok->getNextNonComment() == NULL || 1758 Tok->getNextNonComment()->is(tok::semi))) { 1759 // We merge empty blocks even if the line exceeds the column limit. 1760 Tok->SpacesRequiredBefore = 0; 1761 Tok->CanBreakBefore = true; 1762 join(Line, *(I + 1)); 1763 I += 1; 1764 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 1765 // Check that we still have three lines and they fit into the limit. 1766 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1767 !nextTwoLinesFitInto(I, Limit)) 1768 return; 1769 1770 // Second, check that the next line does not contain any braces - if it 1771 // does, readability declines when putting it into a single line. 1772 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1773 return; 1774 do { 1775 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 1776 return; 1777 Tok = Tok->Next; 1778 } while (Tok != NULL); 1779 1780 // Last, check that the third line contains a single closing brace. 1781 Tok = (I + 2)->First; 1782 if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || 1783 Tok->MustBreakBefore) 1784 return; 1785 1786 join(Line, *(I + 1)); 1787 join(Line, *(I + 2)); 1788 I += 2; 1789 } 1790 } 1791 1792 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1793 unsigned Limit) { 1794 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1795 Limit; 1796 } 1797 1798 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1799 assert(!A.Last->Next); 1800 assert(!B.First->Previous); 1801 A.Last->Next = B.First; 1802 B.First->Previous = A.Last; 1803 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 1804 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 1805 Tok->TotalLength += LengthA; 1806 A.Last = Tok; 1807 } 1808 } 1809 1810 bool touchesRanges(const CharSourceRange &Range) { 1811 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1812 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1813 Ranges[i].getBegin()) && 1814 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1815 Range.getBegin())) 1816 return true; 1817 } 1818 return false; 1819 } 1820 1821 bool touchesLine(const AnnotatedLine &TheLine) { 1822 const FormatToken *First = TheLine.First; 1823 const FormatToken *Last = TheLine.Last; 1824 CharSourceRange LineRange = CharSourceRange::getCharRange( 1825 First->WhitespaceRange.getBegin().getLocWithOffset( 1826 First->LastNewlineOffset), 1827 Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1)); 1828 return touchesRanges(LineRange); 1829 } 1830 1831 bool touchesPPDirective(std::vector<AnnotatedLine>::iterator I, 1832 std::vector<AnnotatedLine>::iterator E) { 1833 for (; I != E; ++I) { 1834 if (I->First->HasUnescapedNewline) 1835 return false; 1836 if (touchesLine(*I)) 1837 return true; 1838 } 1839 return false; 1840 } 1841 1842 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1843 const FormatToken *First = TheLine.First; 1844 CharSourceRange LineRange = CharSourceRange::getCharRange( 1845 First->WhitespaceRange.getBegin(), 1846 First->WhitespaceRange.getBegin().getLocWithOffset( 1847 First->LastNewlineOffset)); 1848 return touchesRanges(LineRange); 1849 } 1850 1851 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1852 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1853 } 1854 1855 /// \brief Add a new line and the required indent before the first Token 1856 /// of the \c UnwrappedLine if there was no structural parsing error. 1857 /// Returns the indent level of the \c UnwrappedLine. 1858 void formatFirstToken(const FormatToken &RootToken, 1859 const FormatToken *PreviousToken, unsigned Indent, 1860 bool InPPDirective) { 1861 unsigned Newlines = 1862 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1863 // Remove empty lines before "}" where applicable. 1864 if (RootToken.is(tok::r_brace) && 1865 (!RootToken.Next || 1866 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 1867 Newlines = std::min(Newlines, 1u); 1868 if (Newlines == 0 && !RootToken.IsFirst) 1869 Newlines = 1; 1870 1871 // Insert extra new line before access specifiers. 1872 if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && 1873 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 1874 ++Newlines; 1875 1876 Whitespaces.replaceWhitespace( 1877 RootToken, Newlines, Indent, Indent, 1878 InPPDirective && !RootToken.HasUnescapedNewline); 1879 } 1880 1881 FormatStyle Style; 1882 Lexer &Lex; 1883 SourceManager &SourceMgr; 1884 WhitespaceManager Whitespaces; 1885 std::vector<CharSourceRange> Ranges; 1886 std::vector<AnnotatedLine> AnnotatedLines; 1887 1888 encoding::Encoding Encoding; 1889 bool BinPackInconclusiveFunctions; 1890 }; 1891 1892 } // end anonymous namespace 1893 1894 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1895 SourceManager &SourceMgr, 1896 std::vector<CharSourceRange> Ranges) { 1897 Formatter formatter(Style, Lex, SourceMgr, Ranges); 1898 return formatter.format(); 1899 } 1900 1901 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1902 std::vector<tooling::Range> Ranges, 1903 StringRef FileName) { 1904 FileManager Files((FileSystemOptions())); 1905 DiagnosticsEngine Diagnostics( 1906 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1907 new DiagnosticOptions); 1908 SourceManager SourceMgr(Diagnostics, Files); 1909 llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1910 const clang::FileEntry *Entry = 1911 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1912 SourceMgr.overrideFileContents(Entry, Buf); 1913 FileID ID = 1914 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1915 Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, 1916 getFormattingLangOpts(Style.Standard)); 1917 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1918 std::vector<CharSourceRange> CharRanges; 1919 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1920 SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); 1921 SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); 1922 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1923 } 1924 return reformat(Style, Lex, SourceMgr, CharRanges); 1925 } 1926 1927 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { 1928 LangOptions LangOpts; 1929 LangOpts.CPlusPlus = 1; 1930 LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1931 LangOpts.LineComment = 1; 1932 LangOpts.Bool = 1; 1933 LangOpts.ObjC1 = 1; 1934 LangOpts.ObjC2 = 1; 1935 return LangOpts; 1936 } 1937 1938 } // namespace format 1939 } // namespace clang 1940