1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "TokenAnnotator.h" 19 #include "UnwrappedLineParser.h" 20 #include "clang/Basic/Diagnostic.h" 21 #include "clang/Basic/OperatorPrecedence.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Format/Format.h" 24 #include "clang/Frontend/TextDiagnosticPrinter.h" 25 #include "clang/Lex/Lexer.h" 26 #include "llvm/Support/Allocator.h" 27 #include "llvm/Support/Debug.h" 28 #include <queue> 29 #include <string> 30 31 namespace clang { 32 namespace format { 33 34 FormatStyle getLLVMStyle() { 35 FormatStyle LLVMStyle; 36 LLVMStyle.ColumnLimit = 80; 37 LLVMStyle.MaxEmptyLinesToKeep = 1; 38 LLVMStyle.PointerBindsToType = false; 39 LLVMStyle.DerivePointerBinding = false; 40 LLVMStyle.AccessModifierOffset = -2; 41 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 42 LLVMStyle.IndentCaseLabels = false; 43 LLVMStyle.SpacesBeforeTrailingComments = 1; 44 LLVMStyle.BinPackParameters = true; 45 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 46 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 47 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 48 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 49 LLVMStyle.PenaltyExcessCharacter = 1000000; 50 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5; 51 return LLVMStyle; 52 } 53 54 FormatStyle getGoogleStyle() { 55 FormatStyle GoogleStyle; 56 GoogleStyle.ColumnLimit = 80; 57 GoogleStyle.MaxEmptyLinesToKeep = 1; 58 GoogleStyle.PointerBindsToType = true; 59 GoogleStyle.DerivePointerBinding = true; 60 GoogleStyle.AccessModifierOffset = -1; 61 GoogleStyle.Standard = FormatStyle::LS_Auto; 62 GoogleStyle.IndentCaseLabels = true; 63 GoogleStyle.SpacesBeforeTrailingComments = 2; 64 GoogleStyle.BinPackParameters = true; 65 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 66 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 67 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 68 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 69 GoogleStyle.PenaltyExcessCharacter = 1000000; 70 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100; 71 return GoogleStyle; 72 } 73 74 FormatStyle getChromiumStyle() { 75 FormatStyle ChromiumStyle = getGoogleStyle(); 76 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 77 ChromiumStyle.BinPackParameters = false; 78 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 79 ChromiumStyle.DerivePointerBinding = false; 80 return ChromiumStyle; 81 } 82 83 static bool isTrailingComment(const AnnotatedToken &Tok) { 84 return Tok.is(tok::comment) && 85 (Tok.Children.empty() || Tok.Children[0].MustBreakBefore); 86 } 87 88 static bool isComparison(const AnnotatedToken &Tok) { 89 prec::Level Precedence = getPrecedence(Tok); 90 return Tok.Type == TT_BinaryOperator && 91 (Precedence == prec::Equality || Precedence == prec::Relational); 92 } 93 94 // Returns the length of everything up to the first possible line break after 95 // the ), ], } or > matching \c Tok. 96 static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) { 97 if (Tok.MatchingParen == NULL) 98 return 0; 99 AnnotatedToken *End = Tok.MatchingParen; 100 while (!End->Children.empty() && !End->Children[0].CanBreakBefore) { 101 End = &End->Children[0]; 102 } 103 return End->TotalLength - Tok.TotalLength + 1; 104 } 105 106 /// \brief Manages the whitespaces around tokens and their replacements. 107 /// 108 /// This includes special handling for certain constructs, e.g. the alignment of 109 /// trailing line comments. 110 class WhitespaceManager { 111 public: 112 WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style) 113 : SourceMgr(SourceMgr), Style(Style) {} 114 115 /// \brief Replaces the whitespace in front of \p Tok. Only call once for 116 /// each \c AnnotatedToken. 117 void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 118 unsigned Spaces, unsigned WhitespaceStartColumn) { 119 // 2+ newlines mean an empty line separating logic scopes. 120 if (NewLines >= 2) 121 alignComments(); 122 123 // Align line comments if they are trailing or if they continue other 124 // trailing comments. 125 if (isTrailingComment(Tok)) { 126 // Remove the comment's trailing whitespace. 127 if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength) 128 Replaces.insert(tooling::Replacement( 129 SourceMgr, Tok.FormatTok.Tok.getLocation().getLocWithOffset( 130 Tok.FormatTok.TokenLength), 131 Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, "")); 132 133 // Align comment with other comments. 134 if (Tok.Parent != NULL || !Comments.empty()) { 135 if (Style.ColumnLimit >= 136 Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) { 137 StoredComment Comment; 138 Comment.Tok = Tok.FormatTok; 139 Comment.Spaces = Spaces; 140 Comment.NewLines = NewLines; 141 Comment.MinColumn = 142 NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces; 143 Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength; 144 Comments.push_back(Comment); 145 return; 146 } 147 } 148 } 149 150 // If this line does not have a trailing comment, align the stored comments. 151 if (Tok.Children.empty() && !isTrailingComment(Tok)) 152 alignComments(); 153 154 if (Tok.Type == TT_BlockComment) 155 indentBlockComment(Tok, Spaces, false); 156 157 storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces)); 158 } 159 160 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned 161 /// backslashes to escape newlines inside a preprocessor directive. 162 /// 163 /// This function and \c replaceWhitespace have the same behavior if 164 /// \c Newlines == 0. 165 void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 166 unsigned Spaces, unsigned WhitespaceStartColumn) { 167 if (Tok.Type == TT_BlockComment) 168 indentBlockComment(Tok, Spaces, true); 169 170 storeReplacement(Tok.FormatTok, 171 getNewLineText(NewLines, Spaces, WhitespaceStartColumn)); 172 } 173 174 /// \brief Inserts a line break into the middle of a token. 175 /// 176 /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line 177 /// break and \p Postfix before the rest of the token starts in the next line. 178 /// 179 /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are 180 /// used to generate the correct line break. 181 void breakToken(const FormatToken &Tok, unsigned Offset, 182 unsigned ReplaceChars, StringRef Prefix, StringRef Postfix, 183 bool InPPDirective, unsigned Spaces, 184 unsigned WhitespaceStartColumn) { 185 std::string NewLineText; 186 if (!InPPDirective) 187 NewLineText = getNewLineText(1, Spaces); 188 else 189 NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn); 190 std::string ReplacementText = (Prefix + NewLineText + Postfix).str(); 191 SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset); 192 Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars, 193 ReplacementText)); 194 } 195 196 /// \brief Returns all the \c Replacements created during formatting. 197 const tooling::Replacements &generateReplacements() { 198 alignComments(); 199 return Replaces; 200 } 201 202 private: 203 /// \brief Finds a common prefix of lines of a block comment to properly 204 /// indent (and possibly decorate with '*'s) added lines. 205 /// 206 /// The first line is ignored (it's special and starts with /*). 207 /// When there are less than three lines, we don't have enough information, so 208 /// better use no prefix. 209 static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines, 210 const char *PrefixChars = " *") { 211 if (Lines.size() < 3) 212 return ""; 213 StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars)); 214 for (size_t i = 2; i < Lines.size(); ++i) { 215 for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) { 216 if (Prefix[j] != Lines[i][j]) { 217 Prefix = Prefix.substr(0, j); 218 break; 219 } 220 } 221 } 222 return Prefix; 223 } 224 225 void splitLineInComment(const FormatToken &Tok, StringRef Line, 226 size_t StartColumn, StringRef LinePrefix, 227 bool InPPDirective, bool CommentHasMoreLines, 228 const char *WhiteSpaceChars = " ") { 229 size_t ColumnLimit = 230 Style.ColumnLimit - LinePrefix.size() - (InPPDirective ? 2 : 0); 231 232 if (Line.size() <= ColumnLimit) 233 return; 234 235 const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation()); 236 while (Line.rtrim().size() > ColumnLimit) { 237 // Try to break at the last whitespace before the column limit. 238 size_t SpacePos = Line.find_last_of(WhiteSpaceChars, ColumnLimit + 1); 239 if (SpacePos == StringRef::npos) { 240 // Try to find any whitespace in the line. 241 SpacePos = Line.find_first_of(WhiteSpaceChars); 242 if (SpacePos == StringRef::npos) // No whitespace found, give up. 243 break; 244 } 245 246 StringRef NextCut = Line.substr(0, SpacePos).rtrim(); 247 StringRef RemainingLine = Line.substr(SpacePos).ltrim(); 248 if (RemainingLine.empty()) 249 break; 250 Line = RemainingLine; 251 252 size_t ReplaceChars = Line.begin() - NextCut.end(); 253 breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix, 254 InPPDirective, 0, 255 NextCut.size() + LinePrefix.size() + StartColumn); 256 StartColumn = 0; 257 } 258 259 StringRef TrimmedLine = Line.rtrim(); 260 if (TrimmedLine != Line || (InPPDirective && CommentHasMoreLines)) { 261 // Remove trailing whitespace/insert backslash. 262 breakToken(Tok, TrimmedLine.end() - TokenStart, 263 Line.size() - TrimmedLine.size() + 1, "", "", InPPDirective, 0, 264 TrimmedLine.size() + LinePrefix.size()); 265 } 266 } 267 268 void indentBlockComment(const AnnotatedToken &Tok, int Indent, 269 bool InPPDirective) { 270 const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation(); 271 const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1; 272 const int IndentDelta = Indent - CurrentIndent; 273 const StringRef Text(SourceMgr.getCharacterData(TokenLoc), 274 Tok.FormatTok.TokenLength); 275 assert(Text.startswith("/*") && Text.endswith("*/")); 276 277 SmallVector<StringRef, 16> Lines; 278 Text.split(Lines, "\n"); 279 280 if (IndentDelta > 0) { 281 std::string WhiteSpace(IndentDelta, ' '); 282 for (size_t i = 1; i < Lines.size(); ++i) { 283 Replaces.insert(tooling::Replacement( 284 SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), 285 0, WhiteSpace)); 286 } 287 } else if (IndentDelta < 0) { 288 std::string WhiteSpace(-IndentDelta, ' '); 289 // Check that the line is indented enough. 290 for (size_t i = 1; i < Lines.size(); ++i) { 291 if (!Lines[i].startswith(WhiteSpace)) 292 return; 293 } 294 for (size_t i = 1; i < Lines.size(); ++i) { 295 Replaces.insert(tooling::Replacement( 296 SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()), 297 -IndentDelta, "")); 298 } 299 } 300 301 // Split long lines in comments. 302 const StringRef CurrentPrefix = findCommentLinesPrefix(Lines); 303 size_t PrefixSize = CurrentPrefix.size(); 304 std::string NewPrefix = 305 (IndentDelta < 0) ? CurrentPrefix.substr(-IndentDelta).str() 306 : std::string(IndentDelta, ' ') + CurrentPrefix.str(); 307 308 if (CurrentPrefix.endswith("*")) { 309 NewPrefix += " "; 310 ++PrefixSize; 311 } 312 313 for (size_t i = 0; i < Lines.size(); ++i) { 314 StringRef Line = (i == 0) ? Lines[i] : Lines[i].substr(PrefixSize); 315 size_t StartColumn = (i == 0) ? CurrentIndent : 0; 316 splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix, 317 InPPDirective, i != Lines.size() - 1); 318 } 319 } 320 321 std::string getNewLineText(unsigned NewLines, unsigned Spaces) { 322 return std::string(NewLines, '\n') + std::string(Spaces, ' '); 323 } 324 325 std::string getNewLineText(unsigned NewLines, unsigned Spaces, 326 unsigned WhitespaceStartColumn) { 327 std::string NewLineText; 328 if (NewLines > 0) { 329 unsigned Offset = 330 std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn); 331 for (unsigned i = 0; i < NewLines; ++i) { 332 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 333 NewLineText += "\\\n"; 334 Offset = 0; 335 } 336 } 337 return NewLineText + std::string(Spaces, ' '); 338 } 339 340 /// \brief Structure to store a comment for later layout and alignment. 341 struct StoredComment { 342 FormatToken Tok; 343 unsigned MinColumn; 344 unsigned MaxColumn; 345 unsigned NewLines; 346 unsigned Spaces; 347 }; 348 SmallVector<StoredComment, 16> Comments; 349 typedef SmallVector<StoredComment, 16>::iterator comment_iterator; 350 351 /// \brief Try to align all stashed comments. 352 void alignComments() { 353 unsigned MinColumn = 0; 354 unsigned MaxColumn = UINT_MAX; 355 comment_iterator Start = Comments.begin(); 356 for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) { 357 if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { 358 alignComments(Start, I, MinColumn); 359 MinColumn = I->MinColumn; 360 MaxColumn = I->MaxColumn; 361 Start = I; 362 } else { 363 MinColumn = std::max(MinColumn, I->MinColumn); 364 MaxColumn = std::min(MaxColumn, I->MaxColumn); 365 } 366 } 367 alignComments(Start, Comments.end(), MinColumn); 368 Comments.clear(); 369 } 370 371 /// \brief Put all the comments between \p I and \p E into \p Column. 372 void alignComments(comment_iterator I, comment_iterator E, unsigned Column) { 373 while (I != E) { 374 unsigned Spaces = I->Spaces + Column - I->MinColumn; 375 storeReplacement( 376 I->Tok, std::string(I->NewLines, '\n') + std::string(Spaces, ' ')); 377 ++I; 378 } 379 } 380 381 /// \brief Stores \p Text as the replacement for the whitespace in front of 382 /// \p Tok. 383 void storeReplacement(const FormatToken &Tok, const std::string Text) { 384 // Don't create a replacement, if it does not change anything. 385 if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart), 386 Tok.WhiteSpaceLength) == Text) 387 return; 388 389 Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart, 390 Tok.WhiteSpaceLength, Text)); 391 } 392 393 SourceManager &SourceMgr; 394 tooling::Replacements Replaces; 395 const FormatStyle &Style; 396 }; 397 398 class UnwrappedLineFormatter { 399 public: 400 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 401 const AnnotatedLine &Line, unsigned FirstIndent, 402 const AnnotatedToken &RootToken, 403 WhitespaceManager &Whitespaces, bool StructuralError) 404 : Style(Style), SourceMgr(SourceMgr), Line(Line), 405 FirstIndent(FirstIndent), RootToken(RootToken), 406 Whitespaces(Whitespaces), Count(0) {} 407 408 /// \brief Formats an \c UnwrappedLine. 409 /// 410 /// \returns The column after the last token in the last line of the 411 /// \c UnwrappedLine. 412 unsigned format(const AnnotatedLine *NextLine) { 413 // Initialize state dependent on indent. 414 LineState State; 415 State.Column = FirstIndent; 416 State.NextToken = &RootToken; 417 State.Stack.push_back( 418 ParenState(FirstIndent + 4, FirstIndent, !Style.BinPackParameters, 419 /*HasMultiParameterLine=*/ false)); 420 State.VariablePos = 0; 421 State.LineContainsContinuedForLoopSection = false; 422 State.ParenLevel = 0; 423 State.StartOfStringLiteral = 0; 424 State.StartOfLineLevel = State.ParenLevel; 425 426 DEBUG({ 427 DebugTokenState(*State.NextToken); 428 }); 429 430 // The first token has already been indented and thus consumed. 431 moveStateToNextToken(State, /*DryRun=*/ false); 432 433 // If everything fits on a single line, just put it there. 434 unsigned ColumnLimit = Style.ColumnLimit; 435 if (NextLine && NextLine->InPPDirective && 436 !NextLine->First.FormatTok.HasUnescapedNewline) 437 ColumnLimit = getColumnLimit(); 438 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 439 while (State.NextToken != NULL) { 440 addTokenToState(false, false, State); 441 } 442 return State.Column; 443 } 444 445 // If the ObjC method declaration does not fit on a line, we should format 446 // it with one arg per line. 447 if (Line.Type == LT_ObjCMethodDecl) 448 State.Stack.back().BreakBeforeParameter = true; 449 450 // Find best solution in solution space. 451 return analyzeSolutionSpace(State); 452 } 453 454 private: 455 void DebugTokenState(const AnnotatedToken &AnnotatedTok) { 456 const Token &Tok = AnnotatedTok.FormatTok.Tok; 457 llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 458 Tok.getLength()); 459 llvm::errs(); 460 } 461 462 struct ParenState { 463 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 464 bool HasMultiParameterLine) 465 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 466 BreakBeforeClosingBrace(false), QuestionColumn(0), 467 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 468 HasMultiParameterLine(HasMultiParameterLine), ColonPos(0), 469 StartOfFunctionCall(0) {} 470 471 /// \brief The position to which a specific parenthesis level needs to be 472 /// indented. 473 unsigned Indent; 474 475 /// \brief The position of the last space on each level. 476 /// 477 /// Used e.g. to break like: 478 /// functionCall(Parameter, otherCall( 479 /// OtherParameter)); 480 unsigned LastSpace; 481 482 /// \brief The position the first "<<" operator encountered on each level. 483 /// 484 /// Used to align "<<" operators. 0 if no such operator has been encountered 485 /// on a level. 486 unsigned FirstLessLess; 487 488 /// \brief Whether a newline needs to be inserted before the block's closing 489 /// brace. 490 /// 491 /// We only want to insert a newline before the closing brace if there also 492 /// was a newline after the beginning left brace. 493 bool BreakBeforeClosingBrace; 494 495 /// \brief The column of a \c ? in a conditional expression; 496 unsigned QuestionColumn; 497 498 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 499 /// lines, in this context. 500 bool AvoidBinPacking; 501 502 /// \brief Break after the next comma (or all the commas in this context if 503 /// \c AvoidBinPacking is \c true). 504 bool BreakBeforeParameter; 505 506 /// \brief This context already has a line with more than one parameter. 507 bool HasMultiParameterLine; 508 509 /// \brief The position of the colon in an ObjC method declaration/call. 510 unsigned ColonPos; 511 512 /// \brief The start of the most recent function in a builder-type call. 513 unsigned StartOfFunctionCall; 514 515 bool operator<(const ParenState &Other) const { 516 if (Indent != Other.Indent) 517 return Indent < Other.Indent; 518 if (LastSpace != Other.LastSpace) 519 return LastSpace < Other.LastSpace; 520 if (FirstLessLess != Other.FirstLessLess) 521 return FirstLessLess < Other.FirstLessLess; 522 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 523 return BreakBeforeClosingBrace; 524 if (QuestionColumn != Other.QuestionColumn) 525 return QuestionColumn < Other.QuestionColumn; 526 if (AvoidBinPacking != Other.AvoidBinPacking) 527 return AvoidBinPacking; 528 if (BreakBeforeParameter != Other.BreakBeforeParameter) 529 return BreakBeforeParameter; 530 if (HasMultiParameterLine != Other.HasMultiParameterLine) 531 return HasMultiParameterLine; 532 if (ColonPos != Other.ColonPos) 533 return ColonPos < Other.ColonPos; 534 if (StartOfFunctionCall != Other.StartOfFunctionCall) 535 return StartOfFunctionCall < Other.StartOfFunctionCall; 536 return false; 537 } 538 }; 539 540 /// \brief The current state when indenting a unwrapped line. 541 /// 542 /// As the indenting tries different combinations this is copied by value. 543 struct LineState { 544 /// \brief The number of used columns in the current line. 545 unsigned Column; 546 547 /// \brief The token that needs to be next formatted. 548 const AnnotatedToken *NextToken; 549 550 /// \brief The column of the first variable name in a variable declaration. 551 /// 552 /// Used to align further variables if necessary. 553 unsigned VariablePos; 554 555 /// \brief \c true if this line contains a continued for-loop section. 556 bool LineContainsContinuedForLoopSection; 557 558 /// \brief The level of nesting inside (), [], <> and {}. 559 unsigned ParenLevel; 560 561 /// \brief The \c ParenLevel at the start of this line. 562 unsigned StartOfLineLevel; 563 564 /// \brief The start column of the string literal, if we're in a string 565 /// literal sequence, 0 otherwise. 566 unsigned StartOfStringLiteral; 567 568 /// \brief A stack keeping track of properties applying to parenthesis 569 /// levels. 570 std::vector<ParenState> Stack; 571 572 /// \brief Comparison operator to be able to used \c LineState in \c map. 573 bool operator<(const LineState &Other) const { 574 if (NextToken != Other.NextToken) 575 return NextToken < Other.NextToken; 576 if (Column != Other.Column) 577 return Column < Other.Column; 578 if (VariablePos != Other.VariablePos) 579 return VariablePos < Other.VariablePos; 580 if (LineContainsContinuedForLoopSection != 581 Other.LineContainsContinuedForLoopSection) 582 return LineContainsContinuedForLoopSection; 583 if (ParenLevel != Other.ParenLevel) 584 return ParenLevel < Other.ParenLevel; 585 if (StartOfLineLevel != Other.StartOfLineLevel) 586 return StartOfLineLevel < Other.StartOfLineLevel; 587 if (StartOfStringLiteral != Other.StartOfStringLiteral) 588 return StartOfStringLiteral < Other.StartOfStringLiteral; 589 return Stack < Other.Stack; 590 } 591 }; 592 593 /// \brief Appends the next token to \p State and updates information 594 /// necessary for indentation. 595 /// 596 /// Puts the token on the current line if \p Newline is \c true and adds a 597 /// line break and necessary indentation otherwise. 598 /// 599 /// If \p DryRun is \c false, also creates and stores the required 600 /// \c Replacement. 601 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 602 const AnnotatedToken &Current = *State.NextToken; 603 const AnnotatedToken &Previous = *State.NextToken->Parent; 604 605 if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) { 606 State.Column += State.NextToken->FormatTok.WhiteSpaceLength + 607 State.NextToken->FormatTok.TokenLength; 608 if (State.NextToken->Children.empty()) 609 State.NextToken = NULL; 610 else 611 State.NextToken = &State.NextToken->Children[0]; 612 return 0; 613 } 614 615 if (Newline) { 616 unsigned WhitespaceStartColumn = State.Column; 617 if (Current.is(tok::r_brace)) { 618 State.Column = Line.Level * 2; 619 } else if (Current.is(tok::string_literal) && 620 State.StartOfStringLiteral != 0) { 621 State.Column = State.StartOfStringLiteral; 622 State.Stack.back().BreakBeforeParameter = true; 623 } else if (Current.is(tok::lessless) && 624 State.Stack.back().FirstLessLess != 0) { 625 State.Column = State.Stack.back().FirstLessLess; 626 } else if (State.ParenLevel != 0 && 627 (Previous.isOneOf(tok::equal, tok::coloncolon) || 628 Current.isOneOf(tok::period, tok::arrow, tok::question) || 629 isComparison(Previous))) { 630 // Indent and extra 4 spaces after if we know the current expression is 631 // continued. Don't do that on the top level, as we already indent 4 632 // there. 633 State.Column = std::max(State.Stack.back().LastSpace, 634 State.Stack.back().Indent) + 4; 635 } else if (Current.Type == TT_ConditionalExpr) { 636 State.Column = State.Stack.back().QuestionColumn; 637 } else if (Previous.is(tok::comma) && State.VariablePos != 0 && 638 ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) || 639 State.ParenLevel == 0)) { 640 State.Column = State.VariablePos; 641 } else if (Previous.ClosesTemplateDeclaration || 642 (Current.Type == TT_StartOfName && State.ParenLevel == 0)) { 643 State.Column = State.Stack.back().Indent - 4; 644 } else if (Current.Type == TT_ObjCSelectorName) { 645 if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) { 646 State.Column = 647 State.Stack.back().ColonPos - Current.FormatTok.TokenLength; 648 } else { 649 State.Column = State.Stack.back().Indent; 650 State.Stack.back().ColonPos = 651 State.Column + Current.FormatTok.TokenLength; 652 } 653 } else if (Previous.Type == TT_ObjCMethodExpr || 654 Current.Type == TT_StartOfName) { 655 State.Column = State.Stack.back().Indent + 4; 656 } else { 657 State.Column = State.Stack.back().Indent; 658 } 659 660 if (Current.is(tok::question)) 661 State.Stack.back().BreakBeforeParameter = true; 662 if (Previous.isOneOf(tok::comma, tok::semi) && 663 !State.Stack.back().AvoidBinPacking) 664 State.Stack.back().BreakBeforeParameter = false; 665 666 if (!DryRun) { 667 unsigned NewLines = 1; 668 if (Current.Type == TT_LineComment) 669 NewLines = 670 std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore, 671 Style.MaxEmptyLinesToKeep + 1)); 672 if (!Line.InPPDirective) 673 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 674 WhitespaceStartColumn); 675 else 676 Whitespaces.replacePPWhitespace(Current, NewLines, State.Column, 677 WhitespaceStartColumn); 678 } 679 680 State.Stack.back().LastSpace = State.Column; 681 State.StartOfLineLevel = State.ParenLevel; 682 683 // Any break on this level means that the parent level has been broken 684 // and we need to avoid bin packing there. 685 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 686 State.Stack[i].BreakBeforeParameter = true; 687 } 688 if (Current.isOneOf(tok::period, tok::arrow)) 689 State.Stack.back().BreakBeforeParameter = true; 690 691 // If we break after {, we should also break before the corresponding }. 692 if (Previous.is(tok::l_brace)) 693 State.Stack.back().BreakBeforeClosingBrace = true; 694 695 if (State.Stack.back().AvoidBinPacking) { 696 // If we are breaking after '(', '{', '<', this is not bin packing 697 // unless AllowAllParametersOfDeclarationOnNextLine is false. 698 if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) || 699 (!Style.AllowAllParametersOfDeclarationOnNextLine && 700 Line.MustBeDeclaration)) 701 State.Stack.back().BreakBeforeParameter = true; 702 } 703 } else { 704 // FIXME: Put VariablePos into ParenState and remove second part of if(). 705 if (Current.is(tok::equal) && 706 (RootToken.is(tok::kw_for) || State.ParenLevel == 0)) 707 State.VariablePos = State.Column - Previous.FormatTok.TokenLength; 708 709 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 710 711 if (!DryRun) 712 Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column); 713 714 if (Current.Type == TT_ObjCSelectorName && 715 State.Stack.back().ColonPos == 0) { 716 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 717 State.Column + Spaces + Current.FormatTok.TokenLength) 718 State.Stack.back().ColonPos = 719 State.Stack.back().Indent + Current.LongestObjCSelectorName; 720 else 721 State.Stack.back().ColonPos = 722 State.Column + Spaces + Current.FormatTok.TokenLength; 723 } 724 725 if (Current.Type != TT_LineComment && 726 (Previous.isOneOf(tok::l_paren, tok::l_brace) || 727 State.NextToken->Parent->Type == TT_TemplateOpener)) 728 State.Stack.back().Indent = State.Column + Spaces; 729 if (Previous.is(tok::comma) && !isTrailingComment(Current)) 730 State.Stack.back().HasMultiParameterLine = true; 731 732 State.Column += Spaces; 733 if (Current.is(tok::l_paren) && Previous.is(tok::kw_if)) 734 // Treat the condition inside an if as if it was a second function 735 // parameter, i.e. let nested calls have an indent of 4. 736 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 737 else if (Previous.is(tok::comma)) 738 // Top-level spaces are exempt as that mostly leads to better results. 739 State.Stack.back().LastSpace = State.Column; 740 else if ((Previous.Type == TT_BinaryOperator || 741 Previous.Type == TT_ConditionalExpr || 742 Previous.Type == TT_CtorInitializerColon) && 743 getPrecedence(Previous) != prec::Assignment) 744 State.Stack.back().LastSpace = State.Column; 745 else if (Previous.Type == TT_InheritanceColon) 746 State.Stack.back().Indent = State.Column; 747 else if (Previous.ParameterCount > 1 && 748 (Previous.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) || 749 Previous.Type == TT_TemplateOpener)) 750 // If this function has multiple parameters, indent nested calls from 751 // the start of the first parameter. 752 State.Stack.back().LastSpace = State.Column; 753 } 754 755 return moveStateToNextToken(State, DryRun); 756 } 757 758 /// \brief Mark the next token as consumed in \p State and modify its stacks 759 /// accordingly. 760 unsigned moveStateToNextToken(LineState &State, bool DryRun) { 761 const AnnotatedToken &Current = *State.NextToken; 762 assert(State.Stack.size()); 763 764 if (Current.Type == TT_InheritanceColon) 765 State.Stack.back().AvoidBinPacking = true; 766 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 767 State.Stack.back().FirstLessLess = State.Column; 768 if (Current.is(tok::question)) 769 State.Stack.back().QuestionColumn = State.Column; 770 if (Current.isOneOf(tok::period, tok::arrow) && 771 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 772 State.Stack.back().StartOfFunctionCall = 773 Current.LastInChainOfCalls ? 0 : State.Column; 774 if (Current.Type == TT_CtorInitializerColon) { 775 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 776 State.Stack.back().AvoidBinPacking = true; 777 State.Stack.back().BreakBeforeParameter = false; 778 } 779 780 // Insert scopes created by fake parenthesis. 781 for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) { 782 ParenState NewParenState = State.Stack.back(); 783 NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent); 784 NewParenState.BreakBeforeParameter = false; 785 State.Stack.push_back(NewParenState); 786 } 787 788 // If we encounter an opening (, [, { or <, we add a level to our stacks to 789 // prepare for the following tokens. 790 if (Current.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) || 791 State.NextToken->Type == TT_TemplateOpener) { 792 unsigned NewIndent; 793 bool AvoidBinPacking; 794 if (Current.is(tok::l_brace)) { 795 NewIndent = 2 + State.Stack.back().LastSpace; 796 AvoidBinPacking = false; 797 } else { 798 NewIndent = 4 + std::max(State.Stack.back().LastSpace, 799 State.Stack.back().StartOfFunctionCall); 800 AvoidBinPacking = 801 !Style.BinPackParameters || State.Stack.back().AvoidBinPacking; 802 } 803 State.Stack.push_back( 804 ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking, 805 State.Stack.back().HasMultiParameterLine)); 806 ++State.ParenLevel; 807 } 808 809 // If this '[' opens an ObjC call, determine whether all parameters fit into 810 // one line and put one per line if they don't. 811 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 812 Current.MatchingParen != NULL) { 813 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 814 State.Stack.back().BreakBeforeParameter = true; 815 } 816 817 // If we encounter a closing ), ], } or >, we can remove a level from our 818 // stacks. 819 if (Current.isOneOf(tok::r_paren, tok::r_square) || 820 (Current.is(tok::r_brace) && State.NextToken != &RootToken) || 821 State.NextToken->Type == TT_TemplateCloser) { 822 State.Stack.pop_back(); 823 --State.ParenLevel; 824 } 825 826 // Remove scopes created by fake parenthesis. 827 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 828 State.Stack.pop_back(); 829 } 830 831 if (Current.is(tok::string_literal)) { 832 State.StartOfStringLiteral = State.Column; 833 } else if (Current.isNot(tok::comment)) { 834 State.StartOfStringLiteral = 0; 835 } 836 837 State.Column += Current.FormatTok.TokenLength; 838 839 if (State.NextToken->Children.empty()) 840 State.NextToken = NULL; 841 else 842 State.NextToken = &State.NextToken->Children[0]; 843 844 return breakProtrudingToken(Current, State, DryRun); 845 } 846 847 /// \brief If the current token sticks out over the end of the line, break 848 /// it if possible. 849 unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State, 850 bool DryRun) { 851 if (Current.isNot(tok::string_literal)) 852 return 0; 853 // Only break up default narrow strings. 854 const char *LiteralData = Current.FormatTok.Tok.getLiteralData(); 855 if (!LiteralData || *LiteralData != '"') 856 return 0; 857 858 unsigned Penalty = 0; 859 unsigned TailOffset = 0; 860 unsigned TailLength = Current.FormatTok.TokenLength; 861 unsigned StartColumn = State.Column - Current.FormatTok.TokenLength; 862 unsigned OffsetFromStart = 0; 863 while (StartColumn + TailLength > getColumnLimit()) { 864 StringRef Text = StringRef(LiteralData + TailOffset, TailLength); 865 if (StartColumn + OffsetFromStart + 1 > getColumnLimit()) 866 break; 867 StringRef::size_type SplitPoint = getSplitPoint( 868 Text, getColumnLimit() - StartColumn - OffsetFromStart - 1); 869 if (SplitPoint == StringRef::npos) 870 break; 871 assert(SplitPoint != 0); 872 // +2, because 'Text' starts after the opening quotes, and does not 873 // include the closing quote we need to insert. 874 unsigned WhitespaceStartColumn = 875 StartColumn + OffsetFromStart + SplitPoint + 2; 876 State.Stack.back().LastSpace = StartColumn; 877 if (!DryRun) { 878 Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1, 879 0, "\"", "\"", Line.InPPDirective, StartColumn, 880 WhitespaceStartColumn); 881 } 882 TailOffset += SplitPoint + 1; 883 TailLength -= SplitPoint + 1; 884 OffsetFromStart = 1; 885 Penalty += Style.PenaltyExcessCharacter; 886 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 887 State.Stack[i].BreakBeforeParameter = true; 888 } 889 State.Column = StartColumn + TailLength; 890 return Penalty; 891 } 892 893 StringRef::size_type 894 getSplitPoint(StringRef Text, StringRef::size_type Offset) { 895 StringRef::size_type SpaceOffset = Text.rfind(' ', Offset); 896 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) 897 return SpaceOffset; 898 StringRef::size_type SlashOffset = Text.rfind('/', Offset); 899 if (SlashOffset != StringRef::npos && SlashOffset != 0) 900 return SlashOffset; 901 StringRef::size_type Split = getStartOfCharacter(Text, Offset); 902 if (Split != StringRef::npos && Split > 1) 903 // Do not split at 0. 904 return Split - 1; 905 return StringRef::npos; 906 } 907 908 StringRef::size_type 909 getStartOfCharacter(StringRef Text, StringRef::size_type Offset) { 910 StringRef::size_type NextEscape = Text.find('\\'); 911 while (NextEscape != StringRef::npos && NextEscape < Offset) { 912 StringRef::size_type SequenceLength = 913 getEscapeSequenceLength(Text.substr(NextEscape)); 914 if (Offset < NextEscape + SequenceLength) 915 return NextEscape; 916 NextEscape = Text.find('\\', NextEscape + SequenceLength); 917 } 918 return Offset; 919 } 920 921 unsigned getEscapeSequenceLength(StringRef Text) { 922 assert(Text[0] == '\\'); 923 if (Text.size() < 2) 924 return 1; 925 926 switch (Text[1]) { 927 case 'u': 928 return 6; 929 case 'U': 930 return 10; 931 case 'x': 932 return getHexLength(Text); 933 default: 934 if (Text[1] >= '0' && Text[1] <= '7') 935 return getOctalLength(Text); 936 return 2; 937 } 938 } 939 940 unsigned getHexLength(StringRef Text) { 941 unsigned I = 2; // Point after '\x'. 942 while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') || 943 (Text[I] >= 'a' && Text[I] <= 'f') || 944 (Text[I] >= 'A' && Text[I] <= 'F'))) { 945 ++I; 946 } 947 return I; 948 } 949 950 unsigned getOctalLength(StringRef Text) { 951 unsigned I = 1; 952 while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) { 953 ++I; 954 } 955 return I; 956 } 957 958 unsigned getColumnLimit() { 959 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 960 } 961 962 /// \brief An edge in the solution space from \c Previous->State to \c State, 963 /// inserting a newline dependent on the \c NewLine. 964 struct StateNode { 965 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 966 : State(State), NewLine(NewLine), Previous(Previous) {} 967 LineState State; 968 bool NewLine; 969 StateNode *Previous; 970 }; 971 972 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 973 /// 974 /// In case of equal penalties, we want to prefer states that were inserted 975 /// first. During state generation we make sure that we insert states first 976 /// that break the line as late as possible. 977 typedef std::pair<unsigned, unsigned> OrderedPenalty; 978 979 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 980 /// \c State has the given \c OrderedPenalty. 981 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 982 983 /// \brief The BFS queue type. 984 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 985 std::greater<QueueItem> > QueueType; 986 987 /// \brief Analyze the entire solution space starting from \p InitialState. 988 /// 989 /// This implements a variant of Dijkstra's algorithm on the graph that spans 990 /// the solution space (\c LineStates are the nodes). The algorithm tries to 991 /// find the shortest path (the one with lowest penalty) from \p InitialState 992 /// to a state where all tokens are placed. 993 unsigned analyzeSolutionSpace(LineState &InitialState) { 994 std::set<LineState> Seen; 995 996 // Insert start element into queue. 997 StateNode *Node = 998 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 999 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 1000 ++Count; 1001 1002 // While not empty, take first element and follow edges. 1003 while (!Queue.empty()) { 1004 unsigned Penalty = Queue.top().first.first; 1005 StateNode *Node = Queue.top().second; 1006 if (Node->State.NextToken == NULL) { 1007 DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n"); 1008 break; 1009 } 1010 Queue.pop(); 1011 1012 if (!Seen.insert(Node->State).second) 1013 // State already examined with lower penalty. 1014 continue; 1015 1016 addNextStateToQueue(Penalty, Node, /*NewLine=*/ false); 1017 addNextStateToQueue(Penalty, Node, /*NewLine=*/ true); 1018 } 1019 1020 if (Queue.empty()) 1021 // We were unable to find a solution, do nothing. 1022 // FIXME: Add diagnostic? 1023 return 0; 1024 1025 // Reconstruct the solution. 1026 reconstructPath(InitialState, Queue.top().second); 1027 DEBUG(llvm::errs() << "---\n"); 1028 1029 // Return the column after the last token of the solution. 1030 return Queue.top().second->State.Column; 1031 } 1032 1033 void reconstructPath(LineState &State, StateNode *Current) { 1034 // FIXME: This recursive implementation limits the possible number 1035 // of tokens per line if compiled into a binary with small stack space. 1036 // To become more independent of stack frame limitations we would need 1037 // to also change the TokenAnnotator. 1038 if (Current->Previous == NULL) 1039 return; 1040 reconstructPath(State, Current->Previous); 1041 DEBUG({ 1042 if (Current->NewLine) { 1043 llvm::errs() 1044 << "Penalty for splitting before " 1045 << Current->Previous->State.NextToken->FormatTok.Tok.getName() 1046 << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n"; 1047 } 1048 }); 1049 addTokenToState(Current->NewLine, false, State); 1050 } 1051 1052 /// \brief Add the following state to the analysis queue \c Queue. 1053 /// 1054 /// Assume the current state is \p PreviousNode and has been reached with a 1055 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1056 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1057 bool NewLine) { 1058 if (NewLine && !canBreak(PreviousNode->State)) 1059 return; 1060 if (!NewLine && mustBreak(PreviousNode->State)) 1061 return; 1062 if (NewLine) 1063 Penalty += PreviousNode->State.NextToken->SplitPenalty; 1064 1065 StateNode *Node = new (Allocator.Allocate()) 1066 StateNode(PreviousNode->State, NewLine, PreviousNode); 1067 Penalty += addTokenToState(NewLine, true, Node->State); 1068 if (Node->State.Column > getColumnLimit()) { 1069 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 1070 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 1071 } 1072 1073 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 1074 ++Count; 1075 } 1076 1077 /// \brief Returns \c true, if a line break after \p State is allowed. 1078 bool canBreak(const LineState &State) { 1079 if (!State.NextToken->CanBreakBefore && 1080 !(State.NextToken->is(tok::r_brace) && 1081 State.Stack.back().BreakBeforeClosingBrace)) 1082 return false; 1083 // Trying to insert a parameter on a new line if there are already more than 1084 // one parameter on the current line is bin packing. 1085 if (State.Stack.back().HasMultiParameterLine && 1086 State.Stack.back().AvoidBinPacking) 1087 return false; 1088 return true; 1089 } 1090 1091 /// \brief Returns \c true, if a line break after \p State is mandatory. 1092 bool mustBreak(const LineState &State) { 1093 if (State.NextToken->MustBreakBefore) 1094 return true; 1095 if (State.NextToken->is(tok::r_brace) && 1096 State.Stack.back().BreakBeforeClosingBrace) 1097 return true; 1098 if (State.NextToken->Parent->is(tok::semi) && 1099 State.LineContainsContinuedForLoopSection) 1100 return true; 1101 if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) || 1102 State.NextToken->is(tok::question) || 1103 State.NextToken->Type == TT_ConditionalExpr) && 1104 State.Stack.back().BreakBeforeParameter && 1105 !isTrailingComment(*State.NextToken) && 1106 State.NextToken->isNot(tok::r_paren) && 1107 State.NextToken->isNot(tok::r_brace)) 1108 return true; 1109 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 1110 // out whether it is the first parameter. Clean this up. 1111 if (State.NextToken->Type == TT_ObjCSelectorName && 1112 State.NextToken->LongestObjCSelectorName == 0 && 1113 State.Stack.back().BreakBeforeParameter) 1114 return true; 1115 if ((State.NextToken->Type == TT_CtorInitializerColon || 1116 (State.NextToken->Parent->ClosesTemplateDeclaration && 1117 State.ParenLevel == 0))) 1118 return true; 1119 if (State.NextToken->Type == TT_InlineASMColon) 1120 return true; 1121 // This prevents breaks like: 1122 // ... 1123 // SomeParameter, OtherParameter).DoSomething( 1124 // ... 1125 // As they hide "DoSomething" and generally bad for readability. 1126 if (State.NextToken->isOneOf(tok::period, tok::arrow) && 1127 getRemainingLength(State) + State.Column > getColumnLimit() && 1128 State.ParenLevel < State.StartOfLineLevel) 1129 return true; 1130 return false; 1131 } 1132 1133 // Returns the total number of columns required for the remaining tokens. 1134 unsigned getRemainingLength(const LineState &State) { 1135 if (State.NextToken && State.NextToken->Parent) 1136 return Line.Last->TotalLength - State.NextToken->Parent->TotalLength; 1137 return 0; 1138 } 1139 1140 FormatStyle Style; 1141 SourceManager &SourceMgr; 1142 const AnnotatedLine &Line; 1143 const unsigned FirstIndent; 1144 const AnnotatedToken &RootToken; 1145 WhitespaceManager &Whitespaces; 1146 1147 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1148 QueueType Queue; 1149 // Increasing count of \c StateNode items we have created. This is used 1150 // to create a deterministic order independent of the container. 1151 unsigned Count; 1152 }; 1153 1154 class LexerBasedFormatTokenSource : public FormatTokenSource { 1155 public: 1156 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1157 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1158 IdentTable(Lex.getLangOpts()) { 1159 Lex.SetKeepWhitespaceMode(true); 1160 } 1161 1162 virtual FormatToken getNextToken() { 1163 if (GreaterStashed) { 1164 FormatTok.NewlinesBefore = 0; 1165 FormatTok.WhiteSpaceStart = 1166 FormatTok.Tok.getLocation().getLocWithOffset(1); 1167 FormatTok.WhiteSpaceLength = 0; 1168 GreaterStashed = false; 1169 return FormatTok; 1170 } 1171 1172 FormatTok = FormatToken(); 1173 Lex.LexFromRawLexer(FormatTok.Tok); 1174 StringRef Text = rawTokenText(FormatTok.Tok); 1175 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1176 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) 1177 FormatTok.IsFirst = true; 1178 1179 // Consume and record whitespace until we find a significant token. 1180 while (FormatTok.Tok.is(tok::unknown)) { 1181 unsigned Newlines = Text.count('\n'); 1182 if (Newlines > 0) 1183 FormatTok.LastNewlineOffset = 1184 FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1; 1185 unsigned EscapedNewlines = Text.count("\\\n"); 1186 FormatTok.NewlinesBefore += Newlines; 1187 FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines; 1188 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1189 1190 if (FormatTok.Tok.is(tok::eof)) 1191 return FormatTok; 1192 Lex.LexFromRawLexer(FormatTok.Tok); 1193 Text = rawTokenText(FormatTok.Tok); 1194 } 1195 1196 // Now FormatTok is the next non-whitespace token. 1197 FormatTok.TokenLength = Text.size(); 1198 1199 // In case the token starts with escaped newlines, we want to 1200 // take them into account as whitespace - this pattern is quite frequent 1201 // in macro definitions. 1202 // FIXME: What do we want to do with other escaped spaces, and escaped 1203 // spaces or newlines in the middle of tokens? 1204 // FIXME: Add a more explicit test. 1205 unsigned i = 0; 1206 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { 1207 // FIXME: ++FormatTok.NewlinesBefore is missing... 1208 FormatTok.WhiteSpaceLength += 2; 1209 FormatTok.TokenLength -= 2; 1210 i += 2; 1211 } 1212 1213 if (FormatTok.Tok.is(tok::raw_identifier)) { 1214 IdentifierInfo &Info = IdentTable.get(Text); 1215 FormatTok.Tok.setIdentifierInfo(&Info); 1216 FormatTok.Tok.setKind(Info.getTokenID()); 1217 } 1218 1219 if (FormatTok.Tok.is(tok::greatergreater)) { 1220 FormatTok.Tok.setKind(tok::greater); 1221 FormatTok.TokenLength = 1; 1222 GreaterStashed = true; 1223 } 1224 1225 // If we reformat comments, we remove trailing whitespace. Update the length 1226 // accordingly. 1227 if (FormatTok.Tok.is(tok::comment)) 1228 FormatTok.TokenLength = Text.rtrim().size(); 1229 1230 return FormatTok; 1231 } 1232 1233 IdentifierTable &getIdentTable() { return IdentTable; } 1234 1235 private: 1236 FormatToken FormatTok; 1237 bool GreaterStashed; 1238 Lexer &Lex; 1239 SourceManager &SourceMgr; 1240 IdentifierTable IdentTable; 1241 1242 /// Returns the text of \c FormatTok. 1243 StringRef rawTokenText(Token &Tok) { 1244 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1245 Tok.getLength()); 1246 } 1247 }; 1248 1249 class Formatter : public UnwrappedLineConsumer { 1250 public: 1251 Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex, 1252 SourceManager &SourceMgr, 1253 const std::vector<CharSourceRange> &Ranges) 1254 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1255 Whitespaces(SourceMgr, Style), Ranges(Ranges) {} 1256 1257 virtual ~Formatter() {} 1258 1259 tooling::Replacements format() { 1260 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1261 UnwrappedLineParser Parser(Diag, Style, Tokens, *this); 1262 StructuralError = Parser.parse(); 1263 unsigned PreviousEndOfLineColumn = 0; 1264 TokenAnnotator Annotator(Style, SourceMgr, Lex, 1265 Tokens.getIdentTable().get("in")); 1266 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1267 Annotator.annotate(AnnotatedLines[i]); 1268 } 1269 deriveLocalStyle(); 1270 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1271 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1272 1273 // Adapt level to the next line if this is a comment. 1274 // FIXME: Can/should this be done in the UnwrappedLineParser? 1275 if (i + 1 != e && AnnotatedLines[i].First.is(tok::comment) && 1276 AnnotatedLines[i].First.Children.empty() && 1277 AnnotatedLines[i + 1].First.isNot(tok::r_brace)) 1278 AnnotatedLines[i].Level = AnnotatedLines[i + 1].Level; 1279 } 1280 std::vector<int> IndentForLevel; 1281 bool PreviousLineWasTouched = false; 1282 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1283 E = AnnotatedLines.end(); 1284 I != E; ++I) { 1285 const AnnotatedLine &TheLine = *I; 1286 const FormatToken &FirstTok = TheLine.First.FormatTok; 1287 int Offset = getIndentOffset(TheLine.First); 1288 while (IndentForLevel.size() <= TheLine.Level) 1289 IndentForLevel.push_back(-1); 1290 IndentForLevel.resize(TheLine.Level + 1); 1291 bool WasMoved = PreviousLineWasTouched && FirstTok.NewlinesBefore == 0; 1292 if (TheLine.First.is(tok::eof)) { 1293 if (PreviousLineWasTouched) { 1294 unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u); 1295 Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0, 1296 /*WhitespaceStartColumn*/ 0); 1297 } 1298 } else if (TheLine.Type != LT_Invalid && 1299 (WasMoved || touchesLine(TheLine))) { 1300 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1301 unsigned Indent = LevelIndent; 1302 if (static_cast<int>(Indent) + Offset >= 0) 1303 Indent += Offset; 1304 if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) { 1305 Indent = LevelIndent = 1306 SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; 1307 } else { 1308 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective, 1309 PreviousEndOfLineColumn); 1310 } 1311 tryFitMultipleLinesInOne(Indent, I, E); 1312 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1313 TheLine.First, Whitespaces, 1314 StructuralError); 1315 PreviousEndOfLineColumn = 1316 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1317 IndentForLevel[TheLine.Level] = LevelIndent; 1318 PreviousLineWasTouched = true; 1319 } else { 1320 if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) { 1321 unsigned Indent = 1322 SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1; 1323 unsigned LevelIndent = Indent; 1324 if (static_cast<int>(LevelIndent) - Offset >= 0) 1325 LevelIndent -= Offset; 1326 if (TheLine.First.isNot(tok::comment)) 1327 IndentForLevel[TheLine.Level] = LevelIndent; 1328 1329 // Remove trailing whitespace of the previous line if it was touched. 1330 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) 1331 formatFirstToken(TheLine.First, Indent, TheLine.InPPDirective, 1332 PreviousEndOfLineColumn); 1333 } 1334 // If we did not reformat this unwrapped line, the column at the end of 1335 // the last token is unchanged - thus, we can calculate the end of the 1336 // last token. 1337 SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation(); 1338 PreviousEndOfLineColumn = 1339 SourceMgr.getSpellingColumnNumber(LastLoc) + 1340 Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1; 1341 PreviousLineWasTouched = false; 1342 } 1343 } 1344 return Whitespaces.generateReplacements(); 1345 } 1346 1347 private: 1348 void deriveLocalStyle() { 1349 unsigned CountBoundToVariable = 0; 1350 unsigned CountBoundToType = 0; 1351 bool HasCpp03IncompatibleFormat = false; 1352 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1353 if (AnnotatedLines[i].First.Children.empty()) 1354 continue; 1355 AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0]; 1356 while (!Tok->Children.empty()) { 1357 if (Tok->Type == TT_PointerOrReference) { 1358 bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0; 1359 bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0; 1360 if (SpacesBefore && !SpacesAfter) 1361 ++CountBoundToVariable; 1362 else if (!SpacesBefore && SpacesAfter) 1363 ++CountBoundToType; 1364 } 1365 1366 if (Tok->Type == TT_TemplateCloser && 1367 Tok->Parent->Type == TT_TemplateCloser && 1368 Tok->FormatTok.WhiteSpaceLength == 0) 1369 HasCpp03IncompatibleFormat = true; 1370 Tok = &Tok->Children[0]; 1371 } 1372 } 1373 if (Style.DerivePointerBinding) { 1374 if (CountBoundToType > CountBoundToVariable) 1375 Style.PointerBindsToType = true; 1376 else if (CountBoundToType < CountBoundToVariable) 1377 Style.PointerBindsToType = false; 1378 } 1379 if (Style.Standard == FormatStyle::LS_Auto) { 1380 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1381 : FormatStyle::LS_Cpp03; 1382 } 1383 } 1384 1385 /// \brief Get the indent of \p Level from \p IndentForLevel. 1386 /// 1387 /// \p IndentForLevel must contain the indent for the level \c l 1388 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1389 /// that level is unknown. 1390 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1391 if (IndentForLevel[Level] != -1) 1392 return IndentForLevel[Level]; 1393 if (Level == 0) 1394 return 0; 1395 return getIndent(IndentForLevel, Level - 1) + 2; 1396 } 1397 1398 /// \brief Get the offset of the line relatively to the level. 1399 /// 1400 /// For example, 'public:' labels in classes are offset by 1 or 2 1401 /// characters to the left from their level. 1402 int getIndentOffset(const AnnotatedToken &RootToken) { 1403 bool IsAccessModifier = false; 1404 if (RootToken.isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private)) 1405 IsAccessModifier = true; 1406 else if (RootToken.is(tok::at) && !RootToken.Children.empty() && 1407 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) || 1408 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) || 1409 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) || 1410 RootToken.Children[0].isObjCAtKeyword(tok::objc_private))) 1411 IsAccessModifier = true; 1412 1413 if (IsAccessModifier) 1414 return Style.AccessModifierOffset; 1415 return 0; 1416 } 1417 1418 /// \brief Tries to merge lines into one. 1419 /// 1420 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1421 /// if possible; note that \c I will be incremented when lines are merged. 1422 /// 1423 /// Returns whether the resulting \c Line can fit in a single line. 1424 void tryFitMultipleLinesInOne(unsigned Indent, 1425 std::vector<AnnotatedLine>::iterator &I, 1426 std::vector<AnnotatedLine>::iterator E) { 1427 // We can never merge stuff if there are trailing line comments. 1428 if (I->Last->Type == TT_LineComment) 1429 return; 1430 1431 unsigned Limit = Style.ColumnLimit - Indent; 1432 // If we already exceed the column limit, we set 'Limit' to 0. The different 1433 // tryMerge..() functions can then decide whether to still do merging. 1434 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1435 1436 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1437 return; 1438 1439 if (I->Last->is(tok::l_brace)) { 1440 tryMergeSimpleBlock(I, E, Limit); 1441 } else if (I->First.is(tok::kw_if)) { 1442 tryMergeSimpleIf(I, E, Limit); 1443 } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline || 1444 I->First.FormatTok.IsFirst)) { 1445 tryMergeSimplePPDirective(I, E, Limit); 1446 } 1447 return; 1448 } 1449 1450 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1451 std::vector<AnnotatedLine>::iterator E, 1452 unsigned Limit) { 1453 if (Limit == 0) 1454 return; 1455 AnnotatedLine &Line = *I; 1456 if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline) 1457 return; 1458 if (I + 2 != E && (I + 2)->InPPDirective && 1459 !(I + 2)->First.FormatTok.HasUnescapedNewline) 1460 return; 1461 if (1 + (I + 1)->Last->TotalLength > Limit) 1462 return; 1463 join(Line, *(++I)); 1464 } 1465 1466 void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I, 1467 std::vector<AnnotatedLine>::iterator E, 1468 unsigned Limit) { 1469 if (Limit == 0) 1470 return; 1471 if (!Style.AllowShortIfStatementsOnASingleLine) 1472 return; 1473 if ((I + 1)->InPPDirective != I->InPPDirective || 1474 ((I + 1)->InPPDirective && 1475 (I + 1)->First.FormatTok.HasUnescapedNewline)) 1476 return; 1477 AnnotatedLine &Line = *I; 1478 if (Line.Last->isNot(tok::r_paren)) 1479 return; 1480 if (1 + (I + 1)->Last->TotalLength > Limit) 1481 return; 1482 if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment) 1483 return; 1484 // Only inline simple if's (no nested if or else). 1485 if (I + 2 != E && (I + 2)->First.is(tok::kw_else)) 1486 return; 1487 join(Line, *(++I)); 1488 } 1489 1490 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1491 std::vector<AnnotatedLine>::iterator E, 1492 unsigned Limit) { 1493 // First, check that the current line allows merging. This is the case if 1494 // we're not in a control flow statement and the last token is an opening 1495 // brace. 1496 AnnotatedLine &Line = *I; 1497 if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 1498 tok::kw_else, tok::kw_try, tok::kw_catch, 1499 tok::kw_for, 1500 // This gets rid of all ObjC @ keywords and methods. 1501 tok::at, tok::minus, tok::plus)) 1502 return; 1503 1504 AnnotatedToken *Tok = &(I + 1)->First; 1505 if (Tok->Children.empty() && Tok->is(tok::r_brace) && 1506 !Tok->MustBreakBefore) { 1507 // We merge empty blocks even if the line exceeds the column limit. 1508 Tok->SpacesRequiredBefore = 0; 1509 Tok->CanBreakBefore = true; 1510 join(Line, *(I + 1)); 1511 I += 1; 1512 } else if (Limit != 0) { 1513 // Check that we still have three lines and they fit into the limit. 1514 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1515 !nextTwoLinesFitInto(I, Limit)) 1516 return; 1517 1518 // Second, check that the next line does not contain any braces - if it 1519 // does, readability declines when putting it into a single line. 1520 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1521 return; 1522 do { 1523 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 1524 return; 1525 Tok = Tok->Children.empty() ? NULL : &Tok->Children.back(); 1526 } while (Tok != NULL); 1527 1528 // Last, check that the third line contains a single closing brace. 1529 Tok = &(I + 2)->First; 1530 if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) || 1531 Tok->MustBreakBefore) 1532 return; 1533 1534 join(Line, *(I + 1)); 1535 join(Line, *(I + 2)); 1536 I += 2; 1537 } 1538 } 1539 1540 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1541 unsigned Limit) { 1542 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1543 Limit; 1544 } 1545 1546 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1547 unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore; 1548 A.Last->Children.push_back(B.First); 1549 while (!A.Last->Children.empty()) { 1550 A.Last->Children[0].Parent = A.Last; 1551 A.Last->Children[0].TotalLength += LengthA; 1552 A.Last = &A.Last->Children[0]; 1553 } 1554 } 1555 1556 bool touchesRanges(const CharSourceRange &Range) { 1557 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1558 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1559 Ranges[i].getBegin()) && 1560 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1561 Range.getBegin())) 1562 return true; 1563 } 1564 return false; 1565 } 1566 1567 bool touchesLine(const AnnotatedLine &TheLine) { 1568 const FormatToken *First = &TheLine.First.FormatTok; 1569 const FormatToken *Last = &TheLine.Last->FormatTok; 1570 CharSourceRange LineRange = CharSourceRange::getTokenRange( 1571 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset), 1572 Last->Tok.getLocation()); 1573 return touchesRanges(LineRange); 1574 } 1575 1576 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1577 const FormatToken *First = &TheLine.First.FormatTok; 1578 CharSourceRange LineRange = CharSourceRange::getCharRange( 1579 First->WhiteSpaceStart, 1580 First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset)); 1581 return touchesRanges(LineRange); 1582 } 1583 1584 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1585 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1586 } 1587 1588 /// \brief Add a new line and the required indent before the first Token 1589 /// of the \c UnwrappedLine if there was no structural parsing error. 1590 /// Returns the indent level of the \c UnwrappedLine. 1591 void formatFirstToken(const AnnotatedToken &RootToken, unsigned Indent, 1592 bool InPPDirective, unsigned PreviousEndOfLineColumn) { 1593 const FormatToken &Tok = RootToken.FormatTok; 1594 1595 unsigned Newlines = 1596 std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1597 if (Newlines == 0 && !Tok.IsFirst) 1598 Newlines = 1; 1599 1600 if (!InPPDirective || Tok.HasUnescapedNewline) { 1601 Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0); 1602 } else { 1603 Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, 1604 PreviousEndOfLineColumn); 1605 } 1606 } 1607 1608 DiagnosticsEngine &Diag; 1609 FormatStyle Style; 1610 Lexer &Lex; 1611 SourceManager &SourceMgr; 1612 WhitespaceManager Whitespaces; 1613 std::vector<CharSourceRange> Ranges; 1614 std::vector<AnnotatedLine> AnnotatedLines; 1615 bool StructuralError; 1616 }; 1617 1618 tooling::Replacements 1619 reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1620 std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) { 1621 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 1622 OwningPtr<DiagnosticConsumer> DiagPrinter; 1623 if (DiagClient == 0) { 1624 DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts)); 1625 DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); 1626 DiagClient = DiagPrinter.get(); 1627 } 1628 DiagnosticsEngine Diagnostics( 1629 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, 1630 DiagClient, false); 1631 Diagnostics.setSourceManager(&SourceMgr); 1632 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); 1633 return formatter.format(); 1634 } 1635 1636 LangOptions getFormattingLangOpts() { 1637 LangOptions LangOpts; 1638 LangOpts.CPlusPlus = 1; 1639 LangOpts.CPlusPlus11 = 1; 1640 LangOpts.Bool = 1; 1641 LangOpts.ObjC1 = 1; 1642 LangOpts.ObjC2 = 1; 1643 return LangOpts; 1644 } 1645 1646 } // namespace format 1647 } // namespace clang 1648