1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "BreakableToken.h" 19 #include "TokenAnnotator.h" 20 #include "UnwrappedLineParser.h" 21 #include "WhitespaceManager.h" 22 #include "clang/Basic/Diagnostic.h" 23 #include "clang/Basic/OperatorPrecedence.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Format/Format.h" 26 #include "clang/Lex/Lexer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/Support/Allocator.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/YAMLTraits.h" 31 #include <queue> 32 #include <string> 33 34 namespace llvm { 35 namespace yaml { 36 template <> 37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> { 38 static void enumeration(IO &IO, 39 clang::format::FormatStyle::LanguageStandard &Value) { 40 IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03); 41 IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11); 42 IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto); 43 } 44 }; 45 46 template <> 47 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> { 48 static void 49 enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) { 50 IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach); 51 IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux); 52 IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup); 53 } 54 }; 55 56 template <> struct MappingTraits<clang::format::FormatStyle> { 57 static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) { 58 if (IO.outputting()) { 59 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", "Mozilla" }; 60 ArrayRef<StringRef> Styles(StylesArray); 61 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 62 StringRef StyleName(Styles[i]); 63 clang::format::FormatStyle PredefinedStyle; 64 if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) && 65 Style == PredefinedStyle) { 66 IO.mapOptional("# BasedOnStyle", StyleName); 67 break; 68 } 69 } 70 } else { 71 StringRef BasedOnStyle; 72 IO.mapOptional("BasedOnStyle", BasedOnStyle); 73 if (!BasedOnStyle.empty()) 74 if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) { 75 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 76 return; 77 } 78 } 79 80 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 81 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 82 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 83 Style.AllowAllParametersOfDeclarationOnNextLine); 84 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 85 Style.AllowShortIfStatementsOnASingleLine); 86 IO.mapOptional("AllowShortLoopsOnASingleLine", 87 Style.AllowShortLoopsOnASingleLine); 88 IO.mapOptional("AlwaysBreakTemplateDeclarations", 89 Style.AlwaysBreakTemplateDeclarations); 90 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 91 Style.AlwaysBreakBeforeMultilineStrings); 92 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 93 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 94 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 95 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 96 IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); 97 IO.mapOptional("ExperimentalAutoDetectBinPacking", 98 Style.ExperimentalAutoDetectBinPacking); 99 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 100 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 101 IO.mapOptional("ObjCSpaceBeforeProtocolList", 102 Style.ObjCSpaceBeforeProtocolList); 103 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 104 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 105 IO.mapOptional("PenaltyBreakFirstLessLess", 106 Style.PenaltyBreakFirstLessLess); 107 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 108 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 109 Style.PenaltyReturnTypeOnItsOwnLine); 110 IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); 111 IO.mapOptional("SpacesBeforeTrailingComments", 112 Style.SpacesBeforeTrailingComments); 113 IO.mapOptional("SpacesInBracedLists", Style.SpacesInBracedLists); 114 IO.mapOptional("Standard", Style.Standard); 115 IO.mapOptional("IndentWidth", Style.IndentWidth); 116 IO.mapOptional("UseTab", Style.UseTab); 117 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 118 IO.mapOptional("IndentFunctionDeclarationAfterType", 119 Style.IndentFunctionDeclarationAfterType); 120 } 121 }; 122 } 123 } 124 125 namespace clang { 126 namespace format { 127 128 void setDefaultPenalties(FormatStyle &Style) { 129 Style.PenaltyBreakComment = 45; 130 Style.PenaltyBreakFirstLessLess = 120; 131 Style.PenaltyBreakString = 1000; 132 Style.PenaltyExcessCharacter = 1000000; 133 } 134 135 FormatStyle getLLVMStyle() { 136 FormatStyle LLVMStyle; 137 LLVMStyle.AccessModifierOffset = -2; 138 LLVMStyle.AlignEscapedNewlinesLeft = false; 139 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 140 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 141 LLVMStyle.AllowShortLoopsOnASingleLine = false; 142 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 143 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 144 LLVMStyle.BinPackParameters = true; 145 LLVMStyle.ColumnLimit = 80; 146 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 147 LLVMStyle.DerivePointerBinding = false; 148 LLVMStyle.ExperimentalAutoDetectBinPacking = false; 149 LLVMStyle.IndentCaseLabels = false; 150 LLVMStyle.MaxEmptyLinesToKeep = 1; 151 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 152 LLVMStyle.PointerBindsToType = false; 153 LLVMStyle.SpacesBeforeTrailingComments = 1; 154 LLVMStyle.SpacesInBracedLists = true; 155 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 156 LLVMStyle.IndentWidth = 2; 157 LLVMStyle.UseTab = false; 158 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 159 LLVMStyle.IndentFunctionDeclarationAfterType = false; 160 161 setDefaultPenalties(LLVMStyle); 162 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 163 164 return LLVMStyle; 165 } 166 167 FormatStyle getGoogleStyle() { 168 FormatStyle GoogleStyle; 169 GoogleStyle.AccessModifierOffset = -1; 170 GoogleStyle.AlignEscapedNewlinesLeft = true; 171 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 172 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 173 GoogleStyle.AllowShortLoopsOnASingleLine = true; 174 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 175 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 176 GoogleStyle.BinPackParameters = true; 177 GoogleStyle.ColumnLimit = 80; 178 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 179 GoogleStyle.DerivePointerBinding = true; 180 GoogleStyle.ExperimentalAutoDetectBinPacking = false; 181 GoogleStyle.IndentCaseLabels = true; 182 GoogleStyle.MaxEmptyLinesToKeep = 1; 183 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 184 GoogleStyle.PointerBindsToType = true; 185 GoogleStyle.SpacesBeforeTrailingComments = 2; 186 GoogleStyle.SpacesInBracedLists = false; 187 GoogleStyle.Standard = FormatStyle::LS_Auto; 188 GoogleStyle.IndentWidth = 2; 189 GoogleStyle.UseTab = false; 190 GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 191 GoogleStyle.IndentFunctionDeclarationAfterType = true; 192 193 setDefaultPenalties(GoogleStyle); 194 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 195 196 return GoogleStyle; 197 } 198 199 FormatStyle getChromiumStyle() { 200 FormatStyle ChromiumStyle = getGoogleStyle(); 201 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 202 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 203 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 204 ChromiumStyle.BinPackParameters = false; 205 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 206 ChromiumStyle.DerivePointerBinding = false; 207 return ChromiumStyle; 208 } 209 210 FormatStyle getMozillaStyle() { 211 FormatStyle MozillaStyle = getLLVMStyle(); 212 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 213 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 214 MozillaStyle.DerivePointerBinding = true; 215 MozillaStyle.IndentCaseLabels = true; 216 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 217 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 218 MozillaStyle.PointerBindsToType = true; 219 return MozillaStyle; 220 } 221 222 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) { 223 if (Name.equals_lower("llvm")) 224 *Style = getLLVMStyle(); 225 else if (Name.equals_lower("chromium")) 226 *Style = getChromiumStyle(); 227 else if (Name.equals_lower("mozilla")) 228 *Style = getMozillaStyle(); 229 else if (Name.equals_lower("google")) 230 *Style = getGoogleStyle(); 231 else 232 return false; 233 234 return true; 235 } 236 237 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 238 if (Text.trim().empty()) 239 return llvm::make_error_code(llvm::errc::invalid_argument); 240 llvm::yaml::Input Input(Text); 241 Input >> *Style; 242 return Input.error(); 243 } 244 245 std::string configurationAsText(const FormatStyle &Style) { 246 std::string Text; 247 llvm::raw_string_ostream Stream(Text); 248 llvm::yaml::Output Output(Stream); 249 // We use the same mapping method for input and output, so we need a non-const 250 // reference here. 251 FormatStyle NonConstStyle = Style; 252 Output << NonConstStyle; 253 return Stream.str(); 254 } 255 256 // Returns the length of everything up to the first possible line break after 257 // the ), ], } or > matching \c Tok. 258 static unsigned getLengthToMatchingParen(const FormatToken &Tok) { 259 if (Tok.MatchingParen == NULL) 260 return 0; 261 FormatToken *End = Tok.MatchingParen; 262 while (End->Next && !End->Next->CanBreakBefore) { 263 End = End->Next; 264 } 265 return End->TotalLength - Tok.TotalLength + 1; 266 } 267 268 namespace { 269 270 class UnwrappedLineFormatter { 271 public: 272 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 273 const AnnotatedLine &Line, unsigned FirstIndent, 274 const FormatToken *RootToken, 275 WhitespaceManager &Whitespaces, 276 encoding::Encoding Encoding, 277 bool BinPackInconclusiveFunctions) 278 : Style(Style), SourceMgr(SourceMgr), Line(Line), 279 FirstIndent(FirstIndent), RootToken(RootToken), 280 Whitespaces(Whitespaces), Count(0), Encoding(Encoding), 281 BinPackInconclusiveFunctions(BinPackInconclusiveFunctions) {} 282 283 /// \brief Formats an \c UnwrappedLine. 284 void format(const AnnotatedLine *NextLine) { 285 // Initialize state dependent on indent. 286 LineState State; 287 State.Column = FirstIndent; 288 State.NextToken = RootToken; 289 State.Stack.push_back(ParenState(FirstIndent, FirstIndent, 290 /*AvoidBinPacking=*/false, 291 /*NoLineBreak=*/false)); 292 State.LineContainsContinuedForLoopSection = false; 293 State.ParenLevel = 0; 294 State.StartOfStringLiteral = 0; 295 State.StartOfLineLevel = State.ParenLevel; 296 State.LowestLevelOnLine = State.ParenLevel; 297 State.IgnoreStackForComparison = false; 298 299 // The first token has already been indented and thus consumed. 300 moveStateToNextToken(State, /*DryRun=*/false, /*Newline=*/false); 301 302 // If everything fits on a single line, just put it there. 303 unsigned ColumnLimit = Style.ColumnLimit; 304 if (NextLine && NextLine->InPPDirective && 305 !NextLine->First->HasUnescapedNewline) 306 ColumnLimit = getColumnLimit(); 307 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 308 while (State.NextToken != NULL) { 309 addTokenToState(false, false, State); 310 } 311 } 312 313 // If the ObjC method declaration does not fit on a line, we should format 314 // it with one arg per line. 315 if (Line.Type == LT_ObjCMethodDecl) 316 State.Stack.back().BreakBeforeParameter = true; 317 318 // Find best solution in solution space. 319 analyzeSolutionSpace(State); 320 } 321 322 private: 323 void DebugTokenState(const FormatToken &FormatTok) { 324 const Token &Tok = FormatTok.Tok; 325 llvm::dbgs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 326 Tok.getLength()); 327 llvm::dbgs(); 328 } 329 330 struct ParenState { 331 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 332 bool NoLineBreak) 333 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 334 BreakBeforeClosingBrace(false), QuestionColumn(0), 335 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 336 NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), 337 StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), 338 CallContinuation(0), VariablePos(0), ContainsLineBreak(false) {} 339 340 /// \brief The position to which a specific parenthesis level needs to be 341 /// indented. 342 unsigned Indent; 343 344 /// \brief The position of the last space on each level. 345 /// 346 /// Used e.g. to break like: 347 /// functionCall(Parameter, otherCall( 348 /// OtherParameter)); 349 unsigned LastSpace; 350 351 /// \brief The position the first "<<" operator encountered on each level. 352 /// 353 /// Used to align "<<" operators. 0 if no such operator has been encountered 354 /// on a level. 355 unsigned FirstLessLess; 356 357 /// \brief Whether a newline needs to be inserted before the block's closing 358 /// brace. 359 /// 360 /// We only want to insert a newline before the closing brace if there also 361 /// was a newline after the beginning left brace. 362 bool BreakBeforeClosingBrace; 363 364 /// \brief The column of a \c ? in a conditional expression; 365 unsigned QuestionColumn; 366 367 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 368 /// lines, in this context. 369 bool AvoidBinPacking; 370 371 /// \brief Break after the next comma (or all the commas in this context if 372 /// \c AvoidBinPacking is \c true). 373 bool BreakBeforeParameter; 374 375 /// \brief Line breaking in this context would break a formatting rule. 376 bool NoLineBreak; 377 378 /// \brief The position of the colon in an ObjC method declaration/call. 379 unsigned ColonPos; 380 381 /// \brief The start of the most recent function in a builder-type call. 382 unsigned StartOfFunctionCall; 383 384 /// \brief Contains the start of array subscript expressions, so that they 385 /// can be aligned. 386 unsigned StartOfArraySubscripts; 387 388 /// \brief If a nested name specifier was broken over multiple lines, this 389 /// contains the start column of the second line. Otherwise 0. 390 unsigned NestedNameSpecifierContinuation; 391 392 /// \brief If a call expression was broken over multiple lines, this 393 /// contains the start column of the second line. Otherwise 0. 394 unsigned CallContinuation; 395 396 /// \brief The column of the first variable name in a variable declaration. 397 /// 398 /// Used to align further variables if necessary. 399 unsigned VariablePos; 400 401 /// \brief \c true if this \c ParenState already contains a line-break. 402 /// 403 /// The first line break in a certain \c ParenState causes extra penalty so 404 /// that clang-format prefers similar breaks, i.e. breaks in the same 405 /// parenthesis. 406 bool ContainsLineBreak; 407 408 bool operator<(const ParenState &Other) const { 409 if (Indent != Other.Indent) 410 return Indent < Other.Indent; 411 if (LastSpace != Other.LastSpace) 412 return LastSpace < Other.LastSpace; 413 if (FirstLessLess != Other.FirstLessLess) 414 return FirstLessLess < Other.FirstLessLess; 415 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 416 return BreakBeforeClosingBrace; 417 if (QuestionColumn != Other.QuestionColumn) 418 return QuestionColumn < Other.QuestionColumn; 419 if (AvoidBinPacking != Other.AvoidBinPacking) 420 return AvoidBinPacking; 421 if (BreakBeforeParameter != Other.BreakBeforeParameter) 422 return BreakBeforeParameter; 423 if (NoLineBreak != Other.NoLineBreak) 424 return NoLineBreak; 425 if (ColonPos != Other.ColonPos) 426 return ColonPos < Other.ColonPos; 427 if (StartOfFunctionCall != Other.StartOfFunctionCall) 428 return StartOfFunctionCall < Other.StartOfFunctionCall; 429 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 430 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 431 if (CallContinuation != Other.CallContinuation) 432 return CallContinuation < Other.CallContinuation; 433 if (VariablePos != Other.VariablePos) 434 return VariablePos < Other.VariablePos; 435 if (ContainsLineBreak != Other.ContainsLineBreak) 436 return ContainsLineBreak < Other.ContainsLineBreak; 437 return false; 438 } 439 }; 440 441 /// \brief The current state when indenting a unwrapped line. 442 /// 443 /// As the indenting tries different combinations this is copied by value. 444 struct LineState { 445 /// \brief The number of used columns in the current line. 446 unsigned Column; 447 448 /// \brief The token that needs to be next formatted. 449 const FormatToken *NextToken; 450 451 /// \brief \c true if this line contains a continued for-loop section. 452 bool LineContainsContinuedForLoopSection; 453 454 /// \brief The level of nesting inside (), [], <> and {}. 455 unsigned ParenLevel; 456 457 /// \brief The \c ParenLevel at the start of this line. 458 unsigned StartOfLineLevel; 459 460 /// \brief The lowest \c ParenLevel on the current line. 461 unsigned LowestLevelOnLine; 462 463 /// \brief The start column of the string literal, if we're in a string 464 /// literal sequence, 0 otherwise. 465 unsigned StartOfStringLiteral; 466 467 /// \brief A stack keeping track of properties applying to parenthesis 468 /// levels. 469 std::vector<ParenState> Stack; 470 471 /// \brief Ignore the stack of \c ParenStates for state comparison. 472 /// 473 /// In long and deeply nested unwrapped lines, the current algorithm can 474 /// be insufficient for finding the best formatting with a reasonable amount 475 /// of time and memory. Setting this flag will effectively lead to the 476 /// algorithm not analyzing some combinations. However, these combinations 477 /// rarely contain the optimal solution: In short, accepting a higher 478 /// penalty early would need to lead to different values in the \c 479 /// ParenState stack (in an otherwise identical state) and these different 480 /// values would need to lead to a significant amount of avoided penalty 481 /// later. 482 /// 483 /// FIXME: Come up with a better algorithm instead. 484 bool IgnoreStackForComparison; 485 486 /// \brief Comparison operator to be able to used \c LineState in \c map. 487 bool operator<(const LineState &Other) const { 488 if (NextToken != Other.NextToken) 489 return NextToken < Other.NextToken; 490 if (Column != Other.Column) 491 return Column < Other.Column; 492 if (LineContainsContinuedForLoopSection != 493 Other.LineContainsContinuedForLoopSection) 494 return LineContainsContinuedForLoopSection; 495 if (ParenLevel != Other.ParenLevel) 496 return ParenLevel < Other.ParenLevel; 497 if (StartOfLineLevel != Other.StartOfLineLevel) 498 return StartOfLineLevel < Other.StartOfLineLevel; 499 if (LowestLevelOnLine != Other.LowestLevelOnLine) 500 return LowestLevelOnLine < Other.LowestLevelOnLine; 501 if (StartOfStringLiteral != Other.StartOfStringLiteral) 502 return StartOfStringLiteral < Other.StartOfStringLiteral; 503 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 504 return false; 505 return Stack < Other.Stack; 506 } 507 }; 508 509 /// \brief Appends the next token to \p State and updates information 510 /// necessary for indentation. 511 /// 512 /// Puts the token on the current line if \p Newline is \c false and adds a 513 /// line break and necessary indentation otherwise. 514 /// 515 /// If \p DryRun is \c false, also creates and stores the required 516 /// \c Replacement. 517 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 518 const FormatToken &Current = *State.NextToken; 519 const FormatToken &Previous = *State.NextToken->Previous; 520 521 // Extra penalty that needs to be added because of the way certain line 522 // breaks are chosen. 523 unsigned ExtraPenalty = 0; 524 525 if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) { 526 // FIXME: Is this correct? 527 int WhitespaceLength = SourceMgr.getSpellingColumnNumber( 528 State.NextToken->WhitespaceRange.getEnd()) - 529 SourceMgr.getSpellingColumnNumber( 530 State.NextToken->WhitespaceRange.getBegin()); 531 State.Column += WhitespaceLength + State.NextToken->CodePointCount; 532 State.NextToken = State.NextToken->Next; 533 return 0; 534 } 535 536 // If we are continuing an expression, we want to indent an extra 4 spaces. 537 unsigned ContinuationIndent = 538 std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4; 539 if (Newline) { 540 State.Stack.back().ContainsLineBreak = true; 541 if (Current.is(tok::r_brace)) { 542 if (Current.BlockKind == BK_BracedInit) 543 State.Column = State.Stack[State.Stack.size() - 2].LastSpace; 544 else 545 State.Column = FirstIndent; 546 } else if (Current.is(tok::string_literal) && 547 State.StartOfStringLiteral != 0) { 548 State.Column = State.StartOfStringLiteral; 549 State.Stack.back().BreakBeforeParameter = true; 550 } else if (Current.is(tok::lessless) && 551 State.Stack.back().FirstLessLess != 0) { 552 State.Column = State.Stack.back().FirstLessLess; 553 } else if (Current.isOneOf(tok::period, tok::arrow) && 554 Current.Type != TT_DesignatedInitializerPeriod) { 555 if (State.Stack.back().CallContinuation == 0) { 556 State.Column = ContinuationIndent; 557 State.Stack.back().CallContinuation = State.Column; 558 } else { 559 State.Column = State.Stack.back().CallContinuation; 560 } 561 } else if (Current.Type == TT_ConditionalExpr) { 562 State.Column = State.Stack.back().QuestionColumn; 563 } else if (Previous.is(tok::comma) && 564 State.Stack.back().VariablePos != 0) { 565 State.Column = State.Stack.back().VariablePos; 566 } else if (Previous.ClosesTemplateDeclaration || 567 ((Current.Type == TT_StartOfName || 568 Current.is(tok::kw_operator)) && 569 State.ParenLevel == 0 && 570 (!Style.IndentFunctionDeclarationAfterType || 571 Line.StartsDefinition))) { 572 State.Column = State.Stack.back().Indent; 573 } else if (Current.Type == TT_ObjCSelectorName) { 574 if (State.Stack.back().ColonPos > Current.CodePointCount) { 575 State.Column = State.Stack.back().ColonPos - Current.CodePointCount; 576 } else { 577 State.Column = State.Stack.back().Indent; 578 State.Stack.back().ColonPos = State.Column + Current.CodePointCount; 579 } 580 } else if (Current.is(tok::l_square) && 581 Current.Type != TT_ObjCMethodExpr) { 582 if (State.Stack.back().StartOfArraySubscripts != 0) 583 State.Column = State.Stack.back().StartOfArraySubscripts; 584 else 585 State.Column = ContinuationIndent; 586 } else if (Current.Type == TT_StartOfName || 587 Previous.isOneOf(tok::coloncolon, tok::equal) || 588 Previous.Type == TT_ObjCMethodExpr) { 589 State.Column = ContinuationIndent; 590 } else { 591 State.Column = State.Stack.back().Indent; 592 // Ensure that we fall back to indenting 4 spaces instead of just 593 // flushing continuations left. 594 if (State.Column == FirstIndent) 595 State.Column += 4; 596 } 597 598 if (Current.is(tok::question)) 599 State.Stack.back().BreakBeforeParameter = true; 600 if ((Previous.isOneOf(tok::comma, tok::semi) && 601 !State.Stack.back().AvoidBinPacking) || 602 Previous.Type == TT_BinaryOperator) 603 State.Stack.back().BreakBeforeParameter = false; 604 if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0) 605 State.Stack.back().BreakBeforeParameter = false; 606 607 if (!DryRun) { 608 unsigned NewLines = 1; 609 if (Current.is(tok::comment)) 610 NewLines = std::max( 611 NewLines, 612 std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); 613 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 614 State.Column, Line.InPPDirective); 615 } 616 617 if (!Current.isTrailingComment()) 618 State.Stack.back().LastSpace = State.Column; 619 if (Current.isOneOf(tok::arrow, tok::period) && 620 Current.Type != TT_DesignatedInitializerPeriod) 621 State.Stack.back().LastSpace += Current.CodePointCount; 622 State.StartOfLineLevel = State.ParenLevel; 623 State.LowestLevelOnLine = State.ParenLevel; 624 625 // Any break on this level means that the parent level has been broken 626 // and we need to avoid bin packing there. 627 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 628 State.Stack[i].BreakBeforeParameter = true; 629 } 630 const FormatToken *TokenBefore = Current.getPreviousNonComment(); 631 if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) && 632 TokenBefore->Type != TT_TemplateCloser && 633 TokenBefore->Type != TT_BinaryOperator && !TokenBefore->opensScope()) 634 State.Stack.back().BreakBeforeParameter = true; 635 636 // If we break after {, we should also break before the corresponding }. 637 if (Previous.is(tok::l_brace)) 638 State.Stack.back().BreakBeforeClosingBrace = true; 639 640 if (State.Stack.back().AvoidBinPacking) { 641 // If we are breaking after '(', '{', '<', this is not bin packing 642 // unless AllowAllParametersOfDeclarationOnNextLine is false. 643 if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || 644 Previous.Type == TT_BinaryOperator) || 645 (!Style.AllowAllParametersOfDeclarationOnNextLine && 646 Line.MustBeDeclaration)) 647 State.Stack.back().BreakBeforeParameter = true; 648 } 649 650 // Breaking before the first "<<" is generally not desirable. 651 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 652 ExtraPenalty += Style.PenaltyBreakFirstLessLess; 653 654 } else { 655 if (Current.is(tok::equal) && 656 (RootToken->is(tok::kw_for) || State.ParenLevel == 0) && 657 State.Stack.back().VariablePos == 0) { 658 State.Stack.back().VariablePos = State.Column; 659 // Move over * and & if they are bound to the variable name. 660 const FormatToken *Tok = &Previous; 661 while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) { 662 State.Stack.back().VariablePos -= Tok->CodePointCount; 663 if (Tok->SpacesRequiredBefore != 0) 664 break; 665 Tok = Tok->Previous; 666 } 667 if (Previous.PartOfMultiVariableDeclStmt) 668 State.Stack.back().LastSpace = State.Stack.back().VariablePos; 669 } 670 671 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 672 673 if (!DryRun) 674 Whitespaces.replaceWhitespace(Current, 0, Spaces, 675 State.Column + Spaces); 676 677 if (Current.Type == TT_ObjCSelectorName && 678 State.Stack.back().ColonPos == 0) { 679 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 680 State.Column + Spaces + Current.CodePointCount) 681 State.Stack.back().ColonPos = 682 State.Stack.back().Indent + Current.LongestObjCSelectorName; 683 else 684 State.Stack.back().ColonPos = 685 State.Column + Spaces + Current.CodePointCount; 686 } 687 688 if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && 689 Current.Type != TT_LineComment) 690 State.Stack.back().Indent = State.Column + Spaces; 691 if (Previous.is(tok::comma) && !Current.isTrailingComment() && 692 State.Stack.back().AvoidBinPacking) 693 State.Stack.back().NoLineBreak = true; 694 695 State.Column += Spaces; 696 if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) 697 // Treat the condition inside an if as if it was a second function 698 // parameter, i.e. let nested calls have an indent of 4. 699 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 700 else if (Previous.is(tok::comma)) 701 State.Stack.back().LastSpace = State.Column; 702 else if ((Previous.Type == TT_BinaryOperator || 703 Previous.Type == TT_ConditionalExpr || 704 Previous.Type == TT_CtorInitializerColon) && 705 !(Previous.getPrecedence() == prec::Assignment && 706 Current.FakeLParens.empty())) 707 // Always indent relative to the RHS of the expression unless this is a 708 // simple assignment without binary expression on the RHS. 709 State.Stack.back().LastSpace = State.Column; 710 else if (Previous.Type == TT_InheritanceColon) 711 State.Stack.back().Indent = State.Column; 712 else if (Previous.opensScope()) { 713 // If a function has multiple parameters (including a single parameter 714 // that is a binary expression) or a trailing call, indent all 715 // parameters from the opening parenthesis. This avoids confusing 716 // indents like: 717 // OuterFunction(InnerFunctionCall( 718 // ParameterToInnerFunction), 719 // SecondParameterToOuterFunction); 720 bool HasMultipleParameters = !Current.FakeLParens.empty(); 721 bool HasTrailingCall = false; 722 if (Previous.MatchingParen) { 723 const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); 724 if (Next && Next->isOneOf(tok::period, tok::arrow)) 725 HasTrailingCall = true; 726 } 727 if (HasMultipleParameters || HasTrailingCall) 728 State.Stack.back().LastSpace = State.Column; 729 } 730 } 731 732 return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty; 733 } 734 735 /// \brief Mark the next token as consumed in \p State and modify its stacks 736 /// accordingly. 737 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline) { 738 const FormatToken &Current = *State.NextToken; 739 assert(State.Stack.size()); 740 741 if (Current.Type == TT_InheritanceColon) 742 State.Stack.back().AvoidBinPacking = true; 743 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 744 State.Stack.back().FirstLessLess = State.Column; 745 if (Current.is(tok::l_square) && 746 State.Stack.back().StartOfArraySubscripts == 0) 747 State.Stack.back().StartOfArraySubscripts = State.Column; 748 if (Current.is(tok::question)) 749 State.Stack.back().QuestionColumn = State.Column; 750 if (!Current.opensScope() && !Current.closesScope()) 751 State.LowestLevelOnLine = 752 std::min(State.LowestLevelOnLine, State.ParenLevel); 753 if (Current.isOneOf(tok::period, tok::arrow) && 754 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 755 State.Stack.back().StartOfFunctionCall = 756 Current.LastInChainOfCalls ? 0 757 : State.Column + Current.CodePointCount; 758 if (Current.Type == TT_CtorInitializerColon) { 759 // Indent 2 from the column, so: 760 // SomeClass::SomeClass() 761 // : First(...), ... 762 // Next(...) 763 // ^ line up here. 764 State.Stack.back().Indent = State.Column + 2; 765 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 766 State.Stack.back().AvoidBinPacking = true; 767 State.Stack.back().BreakBeforeParameter = false; 768 } 769 770 // If return returns a binary expression, align after it. 771 if (Current.is(tok::kw_return) && !Current.FakeLParens.empty()) 772 State.Stack.back().LastSpace = State.Column + 7; 773 774 // In ObjC method declaration we align on the ":" of parameters, but we need 775 // to ensure that we indent parameters on subsequent lines by at least 4. 776 if (Current.Type == TT_ObjCMethodSpecifier) 777 State.Stack.back().Indent += 4; 778 779 // Insert scopes created by fake parenthesis. 780 const FormatToken *Previous = Current.getPreviousNonComment(); 781 // Don't add extra indentation for the first fake parenthesis after 782 // 'return', assignements or opening <({[. The indentation for these cases 783 // is special cased. 784 bool SkipFirstExtraIndent = 785 Current.is(tok::kw_return) || 786 (Previous && (Previous->opensScope() || 787 Previous->getPrecedence() == prec::Assignment)); 788 for (SmallVectorImpl<prec::Level>::const_reverse_iterator 789 I = Current.FakeLParens.rbegin(), 790 E = Current.FakeLParens.rend(); 791 I != E; ++I) { 792 ParenState NewParenState = State.Stack.back(); 793 NewParenState.ContainsLineBreak = false; 794 NewParenState.Indent = 795 std::max(std::max(State.Column, NewParenState.Indent), 796 State.Stack.back().LastSpace); 797 798 // Always indent conditional expressions. Never indent expression where 799 // the 'operator' is ',', ';' or an assignment (i.e. *I <= 800 // prec::Assignment) as those have different indentation rules. Indent 801 // other expression, unless the indentation needs to be skipped. 802 if (*I == prec::Conditional || 803 (!SkipFirstExtraIndent && *I > prec::Assignment)) 804 NewParenState.Indent += 4; 805 if (Previous && !Previous->opensScope()) 806 NewParenState.BreakBeforeParameter = false; 807 State.Stack.push_back(NewParenState); 808 SkipFirstExtraIndent = false; 809 } 810 811 // If we encounter an opening (, [, { or <, we add a level to our stacks to 812 // prepare for the following tokens. 813 if (Current.opensScope()) { 814 unsigned NewIndent; 815 unsigned LastSpace = State.Stack.back().LastSpace; 816 bool AvoidBinPacking; 817 if (Current.is(tok::l_brace)) { 818 NewIndent = Style.IndentWidth + LastSpace; 819 const FormatToken *NextNoComment = Current.getNextNonComment(); 820 AvoidBinPacking = NextNoComment && 821 NextNoComment->Type == TT_DesignatedInitializerPeriod; 822 } else { 823 NewIndent = 824 4 + std::max(LastSpace, State.Stack.back().StartOfFunctionCall); 825 AvoidBinPacking = !Style.BinPackParameters || 826 (Style.ExperimentalAutoDetectBinPacking && 827 (Current.PackingKind == PPK_OnePerLine || 828 (!BinPackInconclusiveFunctions && 829 Current.PackingKind == PPK_Inconclusive))); 830 } 831 832 State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking, 833 State.Stack.back().NoLineBreak)); 834 ++State.ParenLevel; 835 } 836 837 // If this '[' opens an ObjC call, determine whether all parameters fit into 838 // one line and put one per line if they don't. 839 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 840 Current.MatchingParen != NULL) { 841 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 842 State.Stack.back().BreakBeforeParameter = true; 843 } 844 845 // If we encounter a closing ), ], } or >, we can remove a level from our 846 // stacks. 847 if (Current.isOneOf(tok::r_paren, tok::r_square) || 848 (Current.is(tok::r_brace) && State.NextToken != RootToken) || 849 State.NextToken->Type == TT_TemplateCloser) { 850 State.Stack.pop_back(); 851 --State.ParenLevel; 852 } 853 if (Current.is(tok::r_square)) { 854 // If this ends the array subscript expr, reset the corresponding value. 855 const FormatToken *NextNonComment = Current.getNextNonComment(); 856 if (NextNonComment && NextNonComment->isNot(tok::l_square)) 857 State.Stack.back().StartOfArraySubscripts = 0; 858 } 859 860 // Remove scopes created by fake parenthesis. 861 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 862 unsigned VariablePos = State.Stack.back().VariablePos; 863 State.Stack.pop_back(); 864 State.Stack.back().VariablePos = VariablePos; 865 } 866 867 if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) { 868 State.StartOfStringLiteral = State.Column; 869 } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash, 870 tok::string_literal)) { 871 State.StartOfStringLiteral = 0; 872 } 873 874 State.Column += Current.CodePointCount; 875 876 State.NextToken = State.NextToken->Next; 877 878 if (!Newline && Style.AlwaysBreakBeforeMultilineStrings && 879 Current.is(tok::string_literal)) 880 return 0; 881 882 return breakProtrudingToken(Current, State, DryRun); 883 } 884 885 /// \brief If the current token sticks out over the end of the line, break 886 /// it if possible. 887 /// 888 /// \returns An extra penalty if a token was broken, otherwise 0. 889 /// 890 /// The returned penalty will cover the cost of the additional line breaks and 891 /// column limit violation in all lines except for the last one. The penalty 892 /// for the column limit violation in the last line (and in single line 893 /// tokens) is handled in \c addNextStateToQueue. 894 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 895 bool DryRun) { 896 llvm::OwningPtr<BreakableToken> Token; 897 unsigned StartColumn = State.Column - Current.CodePointCount; 898 unsigned OriginalStartColumn = 899 SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 900 1; 901 902 if (Current.is(tok::string_literal) && 903 Current.Type != TT_ImplicitStringLiteral) { 904 // Only break up default narrow strings. 905 if (!Current.TokenText.startswith("\"")) 906 return 0; 907 908 Token.reset(new BreakableStringLiteral(Current, StartColumn, 909 Line.InPPDirective, Encoding)); 910 } else if (Current.Type == TT_BlockComment) { 911 Token.reset(new BreakableBlockComment( 912 Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, 913 Line.InPPDirective, Encoding)); 914 } else if (Current.Type == TT_LineComment && 915 (Current.Previous == NULL || 916 Current.Previous->Type != TT_ImplicitStringLiteral)) { 917 Token.reset(new BreakableLineComment(Current, StartColumn, 918 Line.InPPDirective, Encoding)); 919 } else { 920 return 0; 921 } 922 if (Current.UnbreakableTailLength >= getColumnLimit()) 923 return 0; 924 925 unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength; 926 bool BreakInserted = false; 927 unsigned Penalty = 0; 928 unsigned RemainingTokenColumns = 0; 929 for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); 930 LineIndex != EndIndex; ++LineIndex) { 931 if (!DryRun) 932 Token->replaceWhitespaceBefore(LineIndex, Whitespaces); 933 unsigned TailOffset = 0; 934 RemainingTokenColumns = Token->getLineLengthAfterSplit( 935 LineIndex, TailOffset, StringRef::npos); 936 while (RemainingTokenColumns > RemainingSpace) { 937 BreakableToken::Split Split = 938 Token->getSplit(LineIndex, TailOffset, getColumnLimit()); 939 if (Split.first == StringRef::npos) { 940 // The last line's penalty is handled in addNextStateToQueue(). 941 if (LineIndex < EndIndex - 1) 942 Penalty += Style.PenaltyExcessCharacter * 943 (RemainingTokenColumns - RemainingSpace); 944 break; 945 } 946 assert(Split.first != 0); 947 unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( 948 LineIndex, TailOffset + Split.first + Split.second, 949 StringRef::npos); 950 assert(NewRemainingTokenColumns < RemainingTokenColumns); 951 if (!DryRun) 952 Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); 953 Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString 954 : Style.PenaltyBreakComment; 955 unsigned ColumnsUsed = 956 Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); 957 if (ColumnsUsed > getColumnLimit()) { 958 Penalty += 959 Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit()); 960 } 961 TailOffset += Split.first + Split.second; 962 RemainingTokenColumns = NewRemainingTokenColumns; 963 BreakInserted = true; 964 } 965 } 966 967 State.Column = RemainingTokenColumns; 968 969 if (BreakInserted) { 970 // If we break the token inside a parameter list, we need to break before 971 // the next parameter on all levels, so that the next parameter is clearly 972 // visible. Line comments already introduce a break. 973 if (Current.Type != TT_LineComment) { 974 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 975 State.Stack[i].BreakBeforeParameter = true; 976 } 977 978 State.Stack.back().LastSpace = StartColumn; 979 } 980 return Penalty; 981 } 982 983 unsigned getColumnLimit() { 984 // In preprocessor directives reserve two chars for trailing " \" 985 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 986 } 987 988 /// \brief An edge in the solution space from \c Previous->State to \c State, 989 /// inserting a newline dependent on the \c NewLine. 990 struct StateNode { 991 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 992 : State(State), NewLine(NewLine), Previous(Previous) {} 993 LineState State; 994 bool NewLine; 995 StateNode *Previous; 996 }; 997 998 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 999 /// 1000 /// In case of equal penalties, we want to prefer states that were inserted 1001 /// first. During state generation we make sure that we insert states first 1002 /// that break the line as late as possible. 1003 typedef std::pair<unsigned, unsigned> OrderedPenalty; 1004 1005 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 1006 /// \c State has the given \c OrderedPenalty. 1007 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 1008 1009 /// \brief The BFS queue type. 1010 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 1011 std::greater<QueueItem> > QueueType; 1012 1013 /// \brief Analyze the entire solution space starting from \p InitialState. 1014 /// 1015 /// This implements a variant of Dijkstra's algorithm on the graph that spans 1016 /// the solution space (\c LineStates are the nodes). The algorithm tries to 1017 /// find the shortest path (the one with lowest penalty) from \p InitialState 1018 /// to a state where all tokens are placed. 1019 void analyzeSolutionSpace(LineState &InitialState) { 1020 std::set<LineState> Seen; 1021 1022 // Insert start element into queue. 1023 StateNode *Node = 1024 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 1025 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 1026 ++Count; 1027 1028 // While not empty, take first element and follow edges. 1029 while (!Queue.empty()) { 1030 unsigned Penalty = Queue.top().first.first; 1031 StateNode *Node = Queue.top().second; 1032 if (Node->State.NextToken == NULL) { 1033 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 1034 break; 1035 } 1036 Queue.pop(); 1037 1038 // Cut off the analysis of certain solutions if the analysis gets too 1039 // complex. See description of IgnoreStackForComparison. 1040 if (Count > 10000) 1041 Node->State.IgnoreStackForComparison = true; 1042 1043 if (!Seen.insert(Node->State).second) 1044 // State already examined with lower penalty. 1045 continue; 1046 1047 addNextStateToQueue(Penalty, Node, /*NewLine=*/false); 1048 addNextStateToQueue(Penalty, Node, /*NewLine=*/true); 1049 } 1050 1051 if (Queue.empty()) 1052 // We were unable to find a solution, do nothing. 1053 // FIXME: Add diagnostic? 1054 return; 1055 1056 // Reconstruct the solution. 1057 reconstructPath(InitialState, Queue.top().second); 1058 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 1059 DEBUG(llvm::dbgs() << "---\n"); 1060 } 1061 1062 void reconstructPath(LineState &State, StateNode *Current) { 1063 std::deque<StateNode *> Path; 1064 // We do not need a break before the initial token. 1065 while (Current->Previous) { 1066 Path.push_front(Current); 1067 Current = Current->Previous; 1068 } 1069 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 1070 I != E; ++I) { 1071 DEBUG({ 1072 if ((*I)->NewLine) { 1073 llvm::dbgs() << "Penalty for splitting before " 1074 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1075 << (*I)->Previous->State.NextToken->SplitPenalty << "\n"; 1076 } 1077 }); 1078 addTokenToState((*I)->NewLine, false, State); 1079 } 1080 } 1081 1082 /// \brief Add the following state to the analysis queue \c Queue. 1083 /// 1084 /// Assume the current state is \p PreviousNode and has been reached with a 1085 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1086 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1087 bool NewLine) { 1088 if (NewLine && !canBreak(PreviousNode->State)) 1089 return; 1090 if (!NewLine && mustBreak(PreviousNode->State)) 1091 return; 1092 if (NewLine) { 1093 if (!PreviousNode->State.Stack.back().ContainsLineBreak) 1094 Penalty += 15; 1095 Penalty += PreviousNode->State.NextToken->SplitPenalty; 1096 } 1097 1098 StateNode *Node = new (Allocator.Allocate()) 1099 StateNode(PreviousNode->State, NewLine, PreviousNode); 1100 Penalty += addTokenToState(NewLine, true, Node->State); 1101 if (Node->State.Column > getColumnLimit()) { 1102 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 1103 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 1104 } 1105 1106 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 1107 ++Count; 1108 } 1109 1110 /// \brief Returns \c true, if a line break after \p State is allowed. 1111 bool canBreak(const LineState &State) { 1112 const FormatToken &Current = *State.NextToken; 1113 const FormatToken &Previous = *Current.Previous; 1114 assert(&Previous == Current.Previous); 1115 if (!Current.CanBreakBefore && 1116 !(Current.is(tok::r_brace) && 1117 State.Stack.back().BreakBeforeClosingBrace)) 1118 return false; 1119 // The opening "{" of a braced list has to be on the same line as the first 1120 // element if it is nested in another braced init list or function call. 1121 if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && 1122 Previous.Previous && 1123 Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) 1124 return false; 1125 // This prevents breaks like: 1126 // ... 1127 // SomeParameter, OtherParameter).DoSomething( 1128 // ... 1129 // As they hide "DoSomething" and are generally bad for readability. 1130 if (Previous.opensScope() && 1131 State.LowestLevelOnLine < State.StartOfLineLevel) 1132 return false; 1133 return !State.Stack.back().NoLineBreak; 1134 } 1135 1136 /// \brief Returns \c true, if a line break after \p State is mandatory. 1137 bool mustBreak(const LineState &State) { 1138 const FormatToken &Current = *State.NextToken; 1139 const FormatToken &Previous = *Current.Previous; 1140 if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon) 1141 return true; 1142 if (Current.is(tok::r_brace) && State.Stack.back().BreakBeforeClosingBrace) 1143 return true; 1144 if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) 1145 return true; 1146 if ((Previous.isOneOf(tok::comma, tok::semi) || Current.is(tok::question) || 1147 Current.Type == TT_ConditionalExpr) && 1148 State.Stack.back().BreakBeforeParameter && 1149 !Current.isTrailingComment() && 1150 !Current.isOneOf(tok::r_paren, tok::r_brace)) 1151 return true; 1152 1153 // If we need to break somewhere inside the LHS of a binary expression, we 1154 // should also break after the operator. Otherwise, the formatting would 1155 // hide the operator precedence, e.g. in: 1156 // if (aaaaaaaaaaaaaa == 1157 // bbbbbbbbbbbbbb && c) {.. 1158 // For comparisons, we only apply this rule, if the LHS is a binary 1159 // expression itself as otherwise, the line breaks seem superfluous. 1160 // We need special cases for ">>" which we have split into two ">" while 1161 // lexing in order to make template parsing easier. 1162 bool IsComparison = (Previous.getPrecedence() == prec::Relational || 1163 Previous.getPrecedence() == prec::Equality) && 1164 Previous.Previous && 1165 Previous.Previous->Type != TT_BinaryOperator; // For >>. 1166 bool LHSIsBinaryExpr = 1167 Previous.Previous && Previous.Previous->FakeRParens > 0; 1168 if (Previous.Type == TT_BinaryOperator && 1169 (!IsComparison || LHSIsBinaryExpr) && 1170 Current.Type != TT_BinaryOperator && // For >>. 1171 !Current.isTrailingComment() && 1172 !Previous.isOneOf(tok::lessless, tok::question) && 1173 Previous.getPrecedence() != prec::Assignment && 1174 State.Stack.back().BreakBeforeParameter) 1175 return true; 1176 1177 // Same as above, but for the first "<<" operator. 1178 if (Current.is(tok::lessless) && State.Stack.back().BreakBeforeParameter && 1179 State.Stack.back().FirstLessLess == 0) 1180 return true; 1181 1182 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 1183 // out whether it is the first parameter. Clean this up. 1184 if (Current.Type == TT_ObjCSelectorName && 1185 Current.LongestObjCSelectorName == 0 && 1186 State.Stack.back().BreakBeforeParameter) 1187 return true; 1188 if ((Current.Type == TT_CtorInitializerColon || 1189 (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0))) 1190 return true; 1191 1192 if ((Current.Type == TT_StartOfName || Current.is(tok::kw_operator)) && 1193 Line.MightBeFunctionDecl && State.Stack.back().BreakBeforeParameter && 1194 State.ParenLevel == 0) 1195 return true; 1196 return false; 1197 } 1198 1199 // Returns the total number of columns required for the remaining tokens. 1200 unsigned getRemainingLength(const LineState &State) { 1201 if (State.NextToken && State.NextToken->Previous) 1202 return Line.Last->TotalLength - State.NextToken->Previous->TotalLength; 1203 return 0; 1204 } 1205 1206 FormatStyle Style; 1207 SourceManager &SourceMgr; 1208 const AnnotatedLine &Line; 1209 const unsigned FirstIndent; 1210 const FormatToken *RootToken; 1211 WhitespaceManager &Whitespaces; 1212 1213 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1214 QueueType Queue; 1215 // Increasing count of \c StateNode items we have created. This is used 1216 // to create a deterministic order independent of the container. 1217 unsigned Count; 1218 encoding::Encoding Encoding; 1219 bool BinPackInconclusiveFunctions; 1220 }; 1221 1222 class FormatTokenLexer { 1223 public: 1224 FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, 1225 encoding::Encoding Encoding) 1226 : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), 1227 SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()), 1228 Encoding(Encoding) { 1229 Lex.SetKeepWhitespaceMode(true); 1230 } 1231 1232 ArrayRef<FormatToken *> lex() { 1233 assert(Tokens.empty()); 1234 do { 1235 Tokens.push_back(getNextToken()); 1236 } while (Tokens.back()->Tok.isNot(tok::eof)); 1237 return Tokens; 1238 } 1239 1240 IdentifierTable &getIdentTable() { return IdentTable; } 1241 1242 private: 1243 FormatToken *getNextToken() { 1244 if (GreaterStashed) { 1245 // Create a synthesized second '>' token. 1246 Token Greater = FormatTok->Tok; 1247 FormatTok = new (Allocator.Allocate()) FormatToken; 1248 FormatTok->Tok = Greater; 1249 SourceLocation GreaterLocation = 1250 FormatTok->Tok.getLocation().getLocWithOffset(1); 1251 FormatTok->WhitespaceRange = 1252 SourceRange(GreaterLocation, GreaterLocation); 1253 FormatTok->TokenText = ">"; 1254 FormatTok->CodePointCount = 1; 1255 GreaterStashed = false; 1256 return FormatTok; 1257 } 1258 1259 FormatTok = new (Allocator.Allocate()) FormatToken; 1260 Lex.LexFromRawLexer(FormatTok->Tok); 1261 StringRef Text = rawTokenText(FormatTok->Tok); 1262 SourceLocation WhitespaceStart = 1263 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1264 if (SourceMgr.getFileOffset(WhitespaceStart) == 0) 1265 FormatTok->IsFirst = true; 1266 1267 // Consume and record whitespace until we find a significant token. 1268 unsigned WhitespaceLength = TrailingWhitespace; 1269 while (FormatTok->Tok.is(tok::unknown)) { 1270 unsigned Newlines = Text.count('\n'); 1271 if (Newlines > 0) 1272 FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1; 1273 FormatTok->NewlinesBefore += Newlines; 1274 unsigned EscapedNewlines = Text.count("\\\n"); 1275 FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; 1276 WhitespaceLength += FormatTok->Tok.getLength(); 1277 1278 Lex.LexFromRawLexer(FormatTok->Tok); 1279 Text = rawTokenText(FormatTok->Tok); 1280 } 1281 1282 // In case the token starts with escaped newlines, we want to 1283 // take them into account as whitespace - this pattern is quite frequent 1284 // in macro definitions. 1285 // FIXME: What do we want to do with other escaped spaces, and escaped 1286 // spaces or newlines in the middle of tokens? 1287 // FIXME: Add a more explicit test. 1288 while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') { 1289 // FIXME: ++FormatTok->NewlinesBefore is missing... 1290 WhitespaceLength += 2; 1291 Text = Text.substr(2); 1292 } 1293 1294 TrailingWhitespace = 0; 1295 if (FormatTok->Tok.is(tok::comment)) { 1296 StringRef UntrimmedText = Text; 1297 Text = Text.rtrim(); 1298 TrailingWhitespace = UntrimmedText.size() - Text.size(); 1299 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1300 IdentifierInfo &Info = IdentTable.get(Text); 1301 FormatTok->Tok.setIdentifierInfo(&Info); 1302 FormatTok->Tok.setKind(Info.getTokenID()); 1303 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1304 FormatTok->Tok.setKind(tok::greater); 1305 Text = Text.substr(0, 1); 1306 GreaterStashed = true; 1307 } 1308 1309 // Now FormatTok is the next non-whitespace token. 1310 FormatTok->TokenText = Text; 1311 FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding); 1312 1313 FormatTok->WhitespaceRange = SourceRange( 1314 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1315 return FormatTok; 1316 } 1317 1318 FormatToken *FormatTok; 1319 bool GreaterStashed; 1320 unsigned TrailingWhitespace; 1321 Lexer &Lex; 1322 SourceManager &SourceMgr; 1323 IdentifierTable IdentTable; 1324 encoding::Encoding Encoding; 1325 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1326 SmallVector<FormatToken *, 16> Tokens; 1327 1328 /// Returns the text of \c FormatTok. 1329 StringRef rawTokenText(Token &Tok) { 1330 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1331 Tok.getLength()); 1332 } 1333 }; 1334 1335 class Formatter : public UnwrappedLineConsumer { 1336 public: 1337 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1338 const std::vector<CharSourceRange> &Ranges) 1339 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1340 Whitespaces(SourceMgr, Style), Ranges(Ranges), 1341 Encoding(encoding::detectEncoding(Lex.getBuffer())) { 1342 DEBUG(llvm::dbgs() << "File encoding: " 1343 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" 1344 : "unknown") 1345 << "\n"); 1346 } 1347 1348 virtual ~Formatter() {} 1349 1350 tooling::Replacements format() { 1351 FormatTokenLexer Tokens(Lex, SourceMgr, Encoding); 1352 1353 UnwrappedLineParser Parser(Style, Tokens.lex(), *this); 1354 bool StructuralError = Parser.parse(); 1355 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); 1356 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1357 Annotator.annotate(AnnotatedLines[i]); 1358 } 1359 deriveLocalStyle(); 1360 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1361 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1362 } 1363 1364 // Adapt level to the next line if this is a comment. 1365 // FIXME: Can/should this be done in the UnwrappedLineParser? 1366 const AnnotatedLine *NextNonCommentLine = NULL; 1367 for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) { 1368 if (NextNonCommentLine && AnnotatedLines[i].First->is(tok::comment) && 1369 !AnnotatedLines[i].First->Next) 1370 AnnotatedLines[i].Level = NextNonCommentLine->Level; 1371 else 1372 NextNonCommentLine = AnnotatedLines[i].First->isNot(tok::r_brace) 1373 ? &AnnotatedLines[i] 1374 : NULL; 1375 } 1376 1377 std::vector<int> IndentForLevel; 1378 bool PreviousLineWasTouched = false; 1379 const FormatToken *PreviousLineLastToken = 0; 1380 bool FormatPPDirective = false; 1381 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1382 E = AnnotatedLines.end(); 1383 I != E; ++I) { 1384 const AnnotatedLine &TheLine = *I; 1385 const FormatToken *FirstTok = TheLine.First; 1386 int Offset = getIndentOffset(*TheLine.First); 1387 1388 // Check whether this line is part of a formatted preprocessor directive. 1389 if (FirstTok->HasUnescapedNewline) 1390 FormatPPDirective = false; 1391 if (!FormatPPDirective && TheLine.InPPDirective && 1392 (touchesLine(TheLine) || touchesPPDirective(I + 1, E))) 1393 FormatPPDirective = true; 1394 1395 // Determine indent and try to merge multiple unwrapped lines. 1396 while (IndentForLevel.size() <= TheLine.Level) 1397 IndentForLevel.push_back(-1); 1398 IndentForLevel.resize(TheLine.Level + 1); 1399 unsigned Indent = getIndent(IndentForLevel, TheLine.Level); 1400 if (static_cast<int>(Indent) + Offset >= 0) 1401 Indent += Offset; 1402 tryFitMultipleLinesInOne(Indent, I, E); 1403 1404 bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0; 1405 if (TheLine.First->is(tok::eof)) { 1406 if (PreviousLineWasTouched) { 1407 unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u); 1408 Whitespaces.replaceWhitespace(*TheLine.First, NewLines, /*Indent*/ 0, 1409 /*TargetColumn*/ 0); 1410 } 1411 } else if (TheLine.Type != LT_Invalid && 1412 (WasMoved || FormatPPDirective || touchesLine(TheLine))) { 1413 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1414 if (FirstTok->WhitespaceRange.isValid() && 1415 // Insert a break even if there is a structural error in case where 1416 // we break apart a line consisting of multiple unwrapped lines. 1417 (FirstTok->NewlinesBefore == 0 || !StructuralError)) { 1418 formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent, 1419 TheLine.InPPDirective); 1420 } else { 1421 Indent = LevelIndent = 1422 SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) - 1423 1; 1424 } 1425 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1426 TheLine.First, Whitespaces, Encoding, 1427 BinPackInconclusiveFunctions); 1428 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1429 IndentForLevel[TheLine.Level] = LevelIndent; 1430 PreviousLineWasTouched = true; 1431 } else { 1432 // Format the first token if necessary, and notify the WhitespaceManager 1433 // about the unchanged whitespace. 1434 for (const FormatToken *Tok = TheLine.First; Tok != NULL; 1435 Tok = Tok->Next) { 1436 if (Tok == TheLine.First && 1437 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 1438 unsigned LevelIndent = 1439 SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1; 1440 // Remove trailing whitespace of the previous line if it was 1441 // touched. 1442 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { 1443 formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent, 1444 TheLine.InPPDirective); 1445 } else { 1446 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1447 } 1448 1449 if (static_cast<int>(LevelIndent) - Offset >= 0) 1450 LevelIndent -= Offset; 1451 if (Tok->isNot(tok::comment)) 1452 IndentForLevel[TheLine.Level] = LevelIndent; 1453 } else { 1454 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1455 } 1456 } 1457 // If we did not reformat this unwrapped line, the column at the end of 1458 // the last token is unchanged - thus, we can calculate the end of the 1459 // last token. 1460 PreviousLineWasTouched = false; 1461 } 1462 PreviousLineLastToken = I->Last; 1463 } 1464 return Whitespaces.generateReplacements(); 1465 } 1466 1467 private: 1468 void deriveLocalStyle() { 1469 unsigned CountBoundToVariable = 0; 1470 unsigned CountBoundToType = 0; 1471 bool HasCpp03IncompatibleFormat = false; 1472 bool HasBinPackedFunction = false; 1473 bool HasOnePerLineFunction = false; 1474 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1475 if (!AnnotatedLines[i].First->Next) 1476 continue; 1477 FormatToken *Tok = AnnotatedLines[i].First->Next; 1478 while (Tok->Next) { 1479 if (Tok->Type == TT_PointerOrReference) { 1480 bool SpacesBefore = 1481 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1482 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1483 Tok->Next->WhitespaceRange.getEnd(); 1484 if (SpacesBefore && !SpacesAfter) 1485 ++CountBoundToVariable; 1486 else if (!SpacesBefore && SpacesAfter) 1487 ++CountBoundToType; 1488 } 1489 1490 if (Tok->Type == TT_TemplateCloser && 1491 Tok->Previous->Type == TT_TemplateCloser && 1492 Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) 1493 HasCpp03IncompatibleFormat = true; 1494 1495 if (Tok->PackingKind == PPK_BinPacked) 1496 HasBinPackedFunction = true; 1497 if (Tok->PackingKind == PPK_OnePerLine) 1498 HasOnePerLineFunction = true; 1499 1500 Tok = Tok->Next; 1501 } 1502 } 1503 if (Style.DerivePointerBinding) { 1504 if (CountBoundToType > CountBoundToVariable) 1505 Style.PointerBindsToType = true; 1506 else if (CountBoundToType < CountBoundToVariable) 1507 Style.PointerBindsToType = false; 1508 } 1509 if (Style.Standard == FormatStyle::LS_Auto) { 1510 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1511 : FormatStyle::LS_Cpp03; 1512 } 1513 BinPackInconclusiveFunctions = 1514 HasBinPackedFunction || !HasOnePerLineFunction; 1515 } 1516 1517 /// \brief Get the indent of \p Level from \p IndentForLevel. 1518 /// 1519 /// \p IndentForLevel must contain the indent for the level \c l 1520 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1521 /// that level is unknown. 1522 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1523 if (IndentForLevel[Level] != -1) 1524 return IndentForLevel[Level]; 1525 if (Level == 0) 1526 return 0; 1527 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 1528 } 1529 1530 /// \brief Get the offset of the line relatively to the level. 1531 /// 1532 /// For example, 'public:' labels in classes are offset by 1 or 2 1533 /// characters to the left from their level. 1534 int getIndentOffset(const FormatToken &RootToken) { 1535 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 1536 return Style.AccessModifierOffset; 1537 return 0; 1538 } 1539 1540 /// \brief Tries to merge lines into one. 1541 /// 1542 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1543 /// if possible; note that \c I will be incremented when lines are merged. 1544 void tryFitMultipleLinesInOne(unsigned Indent, 1545 std::vector<AnnotatedLine>::iterator &I, 1546 std::vector<AnnotatedLine>::iterator E) { 1547 // We can never merge stuff if there are trailing line comments. 1548 if (I->Last->Type == TT_LineComment) 1549 return; 1550 1551 unsigned Limit = Style.ColumnLimit - Indent; 1552 // If we already exceed the column limit, we set 'Limit' to 0. The different 1553 // tryMerge..() functions can then decide whether to still do merging. 1554 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1555 1556 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1557 return; 1558 1559 if (I->Last->is(tok::l_brace)) { 1560 tryMergeSimpleBlock(I, E, Limit); 1561 } else if (Style.AllowShortIfStatementsOnASingleLine && 1562 I->First->is(tok::kw_if)) { 1563 tryMergeSimpleControlStatement(I, E, Limit); 1564 } else if (Style.AllowShortLoopsOnASingleLine && 1565 I->First->isOneOf(tok::kw_for, tok::kw_while)) { 1566 tryMergeSimpleControlStatement(I, E, Limit); 1567 } else if (I->InPPDirective && 1568 (I->First->HasUnescapedNewline || I->First->IsFirst)) { 1569 tryMergeSimplePPDirective(I, E, Limit); 1570 } 1571 } 1572 1573 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1574 std::vector<AnnotatedLine>::iterator E, 1575 unsigned Limit) { 1576 if (Limit == 0) 1577 return; 1578 AnnotatedLine &Line = *I; 1579 if (!(I + 1)->InPPDirective || (I + 1)->First->HasUnescapedNewline) 1580 return; 1581 if (I + 2 != E && (I + 2)->InPPDirective && 1582 !(I + 2)->First->HasUnescapedNewline) 1583 return; 1584 if (1 + (I + 1)->Last->TotalLength > Limit) 1585 return; 1586 join(Line, *(++I)); 1587 } 1588 1589 void tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator &I, 1590 std::vector<AnnotatedLine>::iterator E, 1591 unsigned Limit) { 1592 if (Limit == 0) 1593 return; 1594 if ((I + 1)->InPPDirective != I->InPPDirective || 1595 ((I + 1)->InPPDirective && (I + 1)->First->HasUnescapedNewline)) 1596 return; 1597 AnnotatedLine &Line = *I; 1598 if (Line.Last->isNot(tok::r_paren)) 1599 return; 1600 if (1 + (I + 1)->Last->TotalLength > Limit) 1601 return; 1602 if ((I + 1)->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 1603 tok::kw_while) || 1604 (I + 1)->First->Type == TT_LineComment) 1605 return; 1606 // Only inline simple if's (no nested if or else). 1607 if (I + 2 != E && Line.First->is(tok::kw_if) && 1608 (I + 2)->First->is(tok::kw_else)) 1609 return; 1610 join(Line, *(++I)); 1611 } 1612 1613 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1614 std::vector<AnnotatedLine>::iterator E, 1615 unsigned Limit) { 1616 // No merging if the brace already is on the next line. 1617 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 1618 return; 1619 1620 // First, check that the current line allows merging. This is the case if 1621 // we're not in a control flow statement and the last token is an opening 1622 // brace. 1623 AnnotatedLine &Line = *I; 1624 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 1625 tok::kw_else, tok::kw_try, tok::kw_catch, 1626 tok::kw_for, 1627 // This gets rid of all ObjC @ keywords and methods. 1628 tok::at, tok::minus, tok::plus)) 1629 return; 1630 1631 FormatToken *Tok = (I + 1)->First; 1632 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 1633 (Tok->getNextNonComment() == NULL || 1634 Tok->getNextNonComment()->is(tok::semi))) { 1635 // We merge empty blocks even if the line exceeds the column limit. 1636 Tok->SpacesRequiredBefore = 0; 1637 Tok->CanBreakBefore = true; 1638 join(Line, *(I + 1)); 1639 I += 1; 1640 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 1641 // Check that we still have three lines and they fit into the limit. 1642 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1643 !nextTwoLinesFitInto(I, Limit)) 1644 return; 1645 1646 // Second, check that the next line does not contain any braces - if it 1647 // does, readability declines when putting it into a single line. 1648 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1649 return; 1650 do { 1651 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 1652 return; 1653 Tok = Tok->Next; 1654 } while (Tok != NULL); 1655 1656 // Last, check that the third line contains a single closing brace. 1657 Tok = (I + 2)->First; 1658 if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || 1659 Tok->MustBreakBefore) 1660 return; 1661 1662 join(Line, *(I + 1)); 1663 join(Line, *(I + 2)); 1664 I += 2; 1665 } 1666 } 1667 1668 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1669 unsigned Limit) { 1670 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1671 Limit; 1672 } 1673 1674 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1675 assert(!A.Last->Next); 1676 assert(!B.First->Previous); 1677 A.Last->Next = B.First; 1678 B.First->Previous = A.Last; 1679 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 1680 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 1681 Tok->TotalLength += LengthA; 1682 A.Last = Tok; 1683 } 1684 } 1685 1686 bool touchesRanges(const CharSourceRange &Range) { 1687 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1688 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1689 Ranges[i].getBegin()) && 1690 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1691 Range.getBegin())) 1692 return true; 1693 } 1694 return false; 1695 } 1696 1697 bool touchesLine(const AnnotatedLine &TheLine) { 1698 const FormatToken *First = TheLine.First; 1699 const FormatToken *Last = TheLine.Last; 1700 CharSourceRange LineRange = CharSourceRange::getCharRange( 1701 First->WhitespaceRange.getBegin().getLocWithOffset( 1702 First->LastNewlineOffset), 1703 Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1)); 1704 return touchesRanges(LineRange); 1705 } 1706 1707 bool touchesPPDirective(std::vector<AnnotatedLine>::iterator I, 1708 std::vector<AnnotatedLine>::iterator E) { 1709 for (; I != E; ++I) { 1710 if (I->First->HasUnescapedNewline) 1711 return false; 1712 if (touchesLine(*I)) 1713 return true; 1714 } 1715 return false; 1716 } 1717 1718 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1719 const FormatToken *First = TheLine.First; 1720 CharSourceRange LineRange = CharSourceRange::getCharRange( 1721 First->WhitespaceRange.getBegin(), 1722 First->WhitespaceRange.getBegin().getLocWithOffset( 1723 First->LastNewlineOffset)); 1724 return touchesRanges(LineRange); 1725 } 1726 1727 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1728 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1729 } 1730 1731 /// \brief Add a new line and the required indent before the first Token 1732 /// of the \c UnwrappedLine if there was no structural parsing error. 1733 /// Returns the indent level of the \c UnwrappedLine. 1734 void formatFirstToken(const FormatToken &RootToken, 1735 const FormatToken *PreviousToken, unsigned Indent, 1736 bool InPPDirective) { 1737 unsigned Newlines = 1738 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1739 // Remove empty lines before "}" where applicable. 1740 if (RootToken.is(tok::r_brace) && 1741 (!RootToken.Next || 1742 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 1743 Newlines = std::min(Newlines, 1u); 1744 if (Newlines == 0 && !RootToken.IsFirst) 1745 Newlines = 1; 1746 1747 // Insert extra new line before access specifiers. 1748 if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && 1749 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 1750 ++Newlines; 1751 1752 Whitespaces.replaceWhitespace( 1753 RootToken, Newlines, Indent, Indent, 1754 InPPDirective && !RootToken.HasUnescapedNewline); 1755 } 1756 1757 FormatStyle Style; 1758 Lexer &Lex; 1759 SourceManager &SourceMgr; 1760 WhitespaceManager Whitespaces; 1761 std::vector<CharSourceRange> Ranges; 1762 std::vector<AnnotatedLine> AnnotatedLines; 1763 1764 encoding::Encoding Encoding; 1765 bool BinPackInconclusiveFunctions; 1766 }; 1767 1768 } // end anonymous namespace 1769 1770 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1771 SourceManager &SourceMgr, 1772 std::vector<CharSourceRange> Ranges) { 1773 Formatter formatter(Style, Lex, SourceMgr, Ranges); 1774 return formatter.format(); 1775 } 1776 1777 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1778 std::vector<tooling::Range> Ranges, 1779 StringRef FileName) { 1780 FileManager Files((FileSystemOptions())); 1781 DiagnosticsEngine Diagnostics( 1782 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1783 new DiagnosticOptions); 1784 SourceManager SourceMgr(Diagnostics, Files); 1785 llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1786 const clang::FileEntry *Entry = 1787 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1788 SourceMgr.overrideFileContents(Entry, Buf); 1789 FileID ID = 1790 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1791 Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, 1792 getFormattingLangOpts(Style.Standard)); 1793 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1794 std::vector<CharSourceRange> CharRanges; 1795 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1796 SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); 1797 SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); 1798 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1799 } 1800 return reformat(Style, Lex, SourceMgr, CharRanges); 1801 } 1802 1803 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { 1804 LangOptions LangOpts; 1805 LangOpts.CPlusPlus = 1; 1806 LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1807 LangOpts.LineComment = 1; 1808 LangOpts.Bool = 1; 1809 LangOpts.ObjC1 = 1; 1810 LangOpts.ObjC2 = 1; 1811 return LangOpts; 1812 } 1813 1814 } // namespace format 1815 } // namespace clang 1816