1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15 /// where it can be used to format real code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #define DEBUG_TYPE "format-formatter" 20 21 #include "UnwrappedLineParser.h" 22 #include "clang/Basic/Diagnostic.h" 23 #include "clang/Basic/OperatorPrecedence.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Format/Format.h" 26 #include "clang/Frontend/TextDiagnosticPrinter.h" 27 #include "clang/Lex/Lexer.h" 28 #include "llvm/Support/Debug.h" 29 #include <string> 30 31 // Uncomment to get debug output from tests: 32 // #define DEBUG_WITH_TYPE(T, X) do { X; } while(0) 33 34 namespace clang { 35 namespace format { 36 37 enum TokenType { 38 TT_BinaryOperator, 39 TT_BlockComment, 40 TT_CastRParen, 41 TT_ConditionalExpr, 42 TT_CtorInitializerColon, 43 TT_ImplicitStringLiteral, 44 TT_LineComment, 45 TT_ObjCBlockLParen, 46 TT_ObjCDecl, 47 TT_ObjCMethodSpecifier, 48 TT_ObjCMethodExpr, 49 TT_ObjCProperty, 50 TT_OverloadedOperator, 51 TT_PointerOrReference, 52 TT_PureVirtualSpecifier, 53 TT_RangeBasedForLoopColon, 54 TT_StartOfName, 55 TT_TemplateCloser, 56 TT_TemplateOpener, 57 TT_TrailingUnaryOperator, 58 TT_UnaryOperator, 59 TT_Unknown 60 }; 61 62 enum LineType { 63 LT_Invalid, 64 LT_Other, 65 LT_BuilderTypeCall, 66 LT_PreprocessorDirective, 67 LT_VirtualFunctionDecl, 68 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 69 LT_ObjCMethodDecl, 70 LT_ObjCProperty // An @property line. 71 }; 72 73 class AnnotatedToken { 74 public: 75 explicit AnnotatedToken(const FormatToken &FormatTok) 76 : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false), 77 CanBreakBefore(false), MustBreakBefore(false), 78 ClosesTemplateDeclaration(false), MatchingParen(NULL), 79 ParameterCount(1), Parent(NULL) { 80 } 81 82 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 83 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 84 85 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 86 return FormatTok.Tok.isObjCAtKeyword(Kind); 87 } 88 89 FormatToken FormatTok; 90 91 TokenType Type; 92 93 bool SpaceRequiredBefore; 94 bool CanBreakBefore; 95 bool MustBreakBefore; 96 97 bool ClosesTemplateDeclaration; 98 99 AnnotatedToken *MatchingParen; 100 101 /// \brief Number of parameters, if this is "(", "[" or "<". 102 /// 103 /// This is initialized to 1 as we don't need to distinguish functions with 104 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 105 /// the number of commas. 106 unsigned ParameterCount; 107 108 /// \brief The total length of the line up to and including this token. 109 unsigned TotalLength; 110 111 std::vector<AnnotatedToken> Children; 112 AnnotatedToken *Parent; 113 114 const AnnotatedToken *getPreviousNoneComment() const { 115 AnnotatedToken *Tok = Parent; 116 while (Tok != NULL && Tok->is(tok::comment)) 117 Tok = Tok->Parent; 118 return Tok; 119 } 120 }; 121 122 class AnnotatedLine { 123 public: 124 AnnotatedLine(const UnwrappedLine &Line) 125 : First(Line.Tokens.front()), Level(Line.Level), 126 InPPDirective(Line.InPPDirective), 127 MustBeDeclaration(Line.MustBeDeclaration) { 128 assert(!Line.Tokens.empty()); 129 AnnotatedToken *Current = &First; 130 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 131 E = Line.Tokens.end(); 132 I != E; ++I) { 133 Current->Children.push_back(AnnotatedToken(*I)); 134 Current->Children[0].Parent = Current; 135 Current = &Current->Children[0]; 136 } 137 Last = Current; 138 } 139 AnnotatedLine(const AnnotatedLine &Other) 140 : First(Other.First), Type(Other.Type), Level(Other.Level), 141 InPPDirective(Other.InPPDirective), 142 MustBeDeclaration(Other.MustBeDeclaration) { 143 Last = &First; 144 while (!Last->Children.empty()) { 145 Last->Children[0].Parent = Last; 146 Last = &Last->Children[0]; 147 } 148 } 149 150 AnnotatedToken First; 151 AnnotatedToken *Last; 152 153 LineType Type; 154 unsigned Level; 155 bool InPPDirective; 156 bool MustBeDeclaration; 157 }; 158 159 static prec::Level getPrecedence(const AnnotatedToken &Tok) { 160 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 161 } 162 163 bool isBinaryOperator(const AnnotatedToken &Tok) { 164 // Comma is a binary operator, but does not behave as such wrt. formatting. 165 return getPrecedence(Tok) > prec::Comma; 166 } 167 168 FormatStyle getLLVMStyle() { 169 FormatStyle LLVMStyle; 170 LLVMStyle.ColumnLimit = 80; 171 LLVMStyle.MaxEmptyLinesToKeep = 1; 172 LLVMStyle.PointerAndReferenceBindToType = false; 173 LLVMStyle.AccessModifierOffset = -2; 174 LLVMStyle.SplitTemplateClosingGreater = true; 175 LLVMStyle.IndentCaseLabels = false; 176 LLVMStyle.SpacesBeforeTrailingComments = 1; 177 LLVMStyle.BinPackParameters = true; 178 LLVMStyle.AllowAllParametersOnNextLine = true; 179 LLVMStyle.AllowReturnTypeOnItsOwnLine = true; 180 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 181 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 182 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 183 return LLVMStyle; 184 } 185 186 FormatStyle getGoogleStyle() { 187 FormatStyle GoogleStyle; 188 GoogleStyle.ColumnLimit = 80; 189 GoogleStyle.MaxEmptyLinesToKeep = 1; 190 GoogleStyle.PointerAndReferenceBindToType = true; 191 GoogleStyle.AccessModifierOffset = -1; 192 GoogleStyle.SplitTemplateClosingGreater = false; 193 GoogleStyle.IndentCaseLabels = true; 194 GoogleStyle.SpacesBeforeTrailingComments = 2; 195 GoogleStyle.BinPackParameters = false; 196 GoogleStyle.AllowAllParametersOnNextLine = true; 197 GoogleStyle.AllowReturnTypeOnItsOwnLine = false; 198 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 199 GoogleStyle.AllowShortIfStatementsOnASingleLine = false; 200 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 201 return GoogleStyle; 202 } 203 204 FormatStyle getChromiumStyle() { 205 FormatStyle ChromiumStyle = getGoogleStyle(); 206 ChromiumStyle.AllowAllParametersOnNextLine = false; 207 return ChromiumStyle; 208 } 209 210 struct OptimizationParameters { 211 unsigned PenaltyIndentLevel; 212 unsigned PenaltyExcessCharacter; 213 }; 214 215 /// \brief Manages the whitespaces around tokens and their replacements. 216 /// 217 /// This includes special handling for certain constructs, e.g. the alignment of 218 /// trailing line comments. 219 class WhitespaceManager { 220 public: 221 WhitespaceManager(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 222 223 /// \brief Replaces the whitespace in front of \p Tok. Only call once for 224 /// each \c AnnotatedToken. 225 void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 226 unsigned Spaces, unsigned WhitespaceStartColumn, 227 const FormatStyle &Style) { 228 // 2+ newlines mean an empty line separating logic scopes. 229 if (NewLines >= 2) 230 alignComments(); 231 232 // Align line comments if they are trailing or if they continue other 233 // trailing comments. 234 if (Tok.Type == TT_LineComment && 235 (Tok.Parent != NULL || !Comments.empty())) { 236 if (Style.ColumnLimit >= 237 Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength) { 238 Comments.push_back(StoredComment()); 239 Comments.back().Tok = Tok.FormatTok; 240 Comments.back().Spaces = Spaces; 241 Comments.back().NewLines = NewLines; 242 Comments.back().MinColumn = WhitespaceStartColumn + Spaces; 243 Comments.back().MaxColumn = Style.ColumnLimit - 244 Spaces - Tok.FormatTok.TokenLength; 245 return; 246 } 247 } 248 249 // If this line does not have a trailing comment, align the stored comments. 250 if (Tok.Children.empty() && Tok.Type != TT_LineComment) 251 alignComments(); 252 storeReplacement(Tok.FormatTok, 253 std::string(NewLines, '\n') + std::string(Spaces, ' ')); 254 } 255 256 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned 257 /// backslashes to escape newlines inside a preprocessor directive. 258 /// 259 /// This function and \c replaceWhitespace have the same behavior if 260 /// \c Newlines == 0. 261 void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines, 262 unsigned Spaces, unsigned WhitespaceStartColumn, 263 const FormatStyle &Style) { 264 std::string NewLineText; 265 if (NewLines > 0) { 266 unsigned Offset = std::min<int>(Style.ColumnLimit - 1, 267 WhitespaceStartColumn); 268 for (unsigned i = 0; i < NewLines; ++i) { 269 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 270 NewLineText += "\\\n"; 271 Offset = 0; 272 } 273 } 274 storeReplacement(Tok.FormatTok, NewLineText + std::string(Spaces, ' ')); 275 } 276 277 /// \brief Returns all the \c Replacements created during formatting. 278 const tooling::Replacements &generateReplacements() { 279 alignComments(); 280 return Replaces; 281 } 282 283 private: 284 /// \brief Structure to store a comment for later layout and alignment. 285 struct StoredComment { 286 FormatToken Tok; 287 unsigned MinColumn; 288 unsigned MaxColumn; 289 unsigned NewLines; 290 unsigned Spaces; 291 }; 292 SmallVector<StoredComment, 16> Comments; 293 typedef SmallVector<StoredComment, 16>::iterator comment_iterator; 294 295 /// \brief Try to align all stashed comments. 296 void alignComments() { 297 unsigned MinColumn = 0; 298 unsigned MaxColumn = UINT_MAX; 299 comment_iterator Start = Comments.begin(); 300 for (comment_iterator I = Comments.begin(), E = Comments.end(); I != E; 301 ++I) { 302 if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) { 303 alignComments(Start, I, MinColumn); 304 MinColumn = I->MinColumn; 305 MaxColumn = I->MaxColumn; 306 Start = I; 307 } else { 308 MinColumn = std::max(MinColumn, I->MinColumn); 309 MaxColumn = std::min(MaxColumn, I->MaxColumn); 310 } 311 } 312 alignComments(Start, Comments.end(), MinColumn); 313 Comments.clear(); 314 } 315 316 /// \brief Put all the comments between \p I and \p E into \p Column. 317 void alignComments(comment_iterator I, comment_iterator E, unsigned Column) { 318 while (I != E) { 319 unsigned Spaces = I->Spaces + Column - I->MinColumn; 320 storeReplacement(I->Tok, std::string(I->NewLines, '\n') + 321 std::string(Spaces, ' ')); 322 ++I; 323 } 324 } 325 326 /// \brief Stores \p Text as the replacement for the whitespace in front of 327 /// \p Tok. 328 void storeReplacement(const FormatToken &Tok, const std::string Text) { 329 Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart, 330 Tok.WhiteSpaceLength, Text)); 331 } 332 333 SourceManager &SourceMgr; 334 tooling::Replacements Replaces; 335 }; 336 337 /// \brief Returns if a token is an Objective-C selector name. 338 /// 339 /// For example, "bar" is a selector name in [foo bar:(4 + 5)]. 340 static bool isObjCSelectorName(const AnnotatedToken &Tok) { 341 return Tok.is(tok::identifier) && !Tok.Children.empty() && 342 Tok.Children[0].is(tok::colon) && 343 Tok.Children[0].Type == TT_ObjCMethodExpr; 344 } 345 346 class UnwrappedLineFormatter { 347 public: 348 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 349 const AnnotatedLine &Line, unsigned FirstIndent, 350 const AnnotatedToken &RootToken, 351 WhitespaceManager &Whitespaces, bool StructuralError) 352 : Style(Style), SourceMgr(SourceMgr), Line(Line), 353 FirstIndent(FirstIndent), RootToken(RootToken), 354 Whitespaces(Whitespaces) { 355 Parameters.PenaltyIndentLevel = 20; 356 Parameters.PenaltyExcessCharacter = 1000000; 357 } 358 359 /// \brief Formats an \c UnwrappedLine. 360 /// 361 /// \returns The column after the last token in the last line of the 362 /// \c UnwrappedLine. 363 unsigned format() { 364 // Initialize state dependent on indent. 365 LineState State; 366 State.Column = FirstIndent; 367 State.NextToken = &RootToken; 368 State.Stack.push_back(ParenState(FirstIndent + 4, FirstIndent)); 369 State.ForLoopVariablePos = 0; 370 State.LineContainsContinuedForLoopSection = false; 371 372 DEBUG({ 373 DebugTokenState(*State.NextToken); 374 }); 375 376 // The first token has already been indented and thus consumed. 377 moveStateToNextToken(State); 378 379 // Start iterating at 1 as we have correctly formatted of Token #0 above. 380 while (State.NextToken != NULL) { 381 if (State.NextToken->Type == TT_ImplicitStringLiteral) { 382 // Calculating the column is important for aligning trailing comments. 383 // FIXME: This does not seem to happen in conjunction with escaped 384 // newlines. If it does, fix! 385 State.Column += State.NextToken->FormatTok.WhiteSpaceLength + 386 State.NextToken->FormatTok.TokenLength; 387 State.NextToken = State.NextToken->Children.empty() ? NULL : 388 &State.NextToken->Children[0]; 389 } else if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) { 390 addTokenToState(false, false, State); 391 } else { 392 unsigned NoBreak = calcPenalty(State, false, UINT_MAX); 393 unsigned Break = calcPenalty(State, true, NoBreak); 394 DEBUG({ 395 if (Break < NoBreak) 396 llvm::errs() << "\n"; 397 else 398 llvm::errs() << " "; 399 llvm::errs() << "<"; 400 DebugPenalty(Break, Break < NoBreak); 401 llvm::errs() << "/"; 402 DebugPenalty(NoBreak, !(Break < NoBreak)); 403 llvm::errs() << "> "; 404 DebugTokenState(*State.NextToken); 405 }); 406 addTokenToState(Break < NoBreak, false, State); 407 if (State.NextToken != NULL && 408 State.NextToken->Parent->Type == TT_CtorInitializerColon) { 409 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine && 410 Line.Last->TotalLength > getColumnLimit() - State.Column - 1) 411 State.Stack.back().BreakAfterComma = true; 412 } 413 } 414 } 415 DEBUG(llvm::errs() << "\n"); 416 return State.Column; 417 } 418 419 private: 420 void DebugTokenState(const AnnotatedToken &AnnotatedTok) { 421 const Token &Tok = AnnotatedTok.FormatTok.Tok; 422 llvm::errs() 423 << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 424 Tok.getLength()); 425 llvm::errs(); 426 } 427 428 void DebugPenalty(unsigned Penalty, bool Winner) { 429 llvm::errs().changeColor(Winner ? raw_ostream::GREEN : raw_ostream::RED); 430 if (Penalty == UINT_MAX) 431 llvm::errs() << "MAX"; 432 else 433 llvm::errs() << Penalty; 434 llvm::errs().resetColor(); 435 } 436 437 struct ParenState { 438 ParenState(unsigned Indent, unsigned LastSpace) 439 : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0), 440 FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), 441 BreakAfterComma(false), HasMultiParameterLine(false) {} 442 443 /// \brief The position to which a specific parenthesis level needs to be 444 /// indented. 445 unsigned Indent; 446 447 /// \brief The position of the last space on each level. 448 /// 449 /// Used e.g. to break like: 450 /// functionCall(Parameter, otherCall( 451 /// OtherParameter)); 452 unsigned LastSpace; 453 454 /// \brief This is the column of the first token after an assignment. 455 unsigned AssignmentColumn; 456 457 /// \brief The position the first "<<" operator encountered on each level. 458 /// 459 /// Used to align "<<" operators. 0 if no such operator has been encountered 460 /// on a level. 461 unsigned FirstLessLess; 462 463 /// \brief Whether a newline needs to be inserted before the block's closing 464 /// brace. 465 /// 466 /// We only want to insert a newline before the closing brace if there also 467 /// was a newline after the beginning left brace. 468 bool BreakBeforeClosingBrace; 469 470 /// \brief The column of a \c ? in a conditional expression; 471 unsigned QuestionColumn; 472 473 bool BreakAfterComma; 474 bool HasMultiParameterLine; 475 476 bool operator<(const ParenState &Other) const { 477 if (Indent != Other.Indent) 478 return Indent < Other.Indent; 479 if (LastSpace != Other.LastSpace) 480 return LastSpace < Other.LastSpace; 481 if (AssignmentColumn != Other.AssignmentColumn) 482 return AssignmentColumn < Other.AssignmentColumn; 483 if (FirstLessLess != Other.FirstLessLess) 484 return FirstLessLess < Other.FirstLessLess; 485 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 486 return BreakBeforeClosingBrace; 487 if (QuestionColumn != Other.QuestionColumn) 488 return QuestionColumn < Other.QuestionColumn; 489 if (BreakAfterComma != Other.BreakAfterComma) 490 return BreakAfterComma; 491 if (HasMultiParameterLine != Other.HasMultiParameterLine) 492 return HasMultiParameterLine; 493 return false; 494 } 495 }; 496 497 /// \brief The current state when indenting a unwrapped line. 498 /// 499 /// As the indenting tries different combinations this is copied by value. 500 struct LineState { 501 /// \brief The number of used columns in the current line. 502 unsigned Column; 503 504 /// \brief The token that needs to be next formatted. 505 const AnnotatedToken *NextToken; 506 507 /// \brief The column of the first variable in a for-loop declaration. 508 /// 509 /// Used to align the second variable if necessary. 510 unsigned ForLoopVariablePos; 511 512 /// \brief \c true if this line contains a continued for-loop section. 513 bool LineContainsContinuedForLoopSection; 514 515 /// \brief A stack keeping track of properties applying to parenthesis 516 /// levels. 517 std::vector<ParenState> Stack; 518 519 /// \brief Comparison operator to be able to used \c LineState in \c map. 520 bool operator<(const LineState &Other) const { 521 if (Other.NextToken != NextToken) 522 return Other.NextToken > NextToken; 523 if (Other.Column != Column) 524 return Other.Column > Column; 525 if (Other.ForLoopVariablePos != ForLoopVariablePos) 526 return Other.ForLoopVariablePos < ForLoopVariablePos; 527 if (Other.LineContainsContinuedForLoopSection != 528 LineContainsContinuedForLoopSection) 529 return LineContainsContinuedForLoopSection; 530 return Other.Stack < Stack; 531 } 532 }; 533 534 /// \brief Appends the next token to \p State and updates information 535 /// necessary for indentation. 536 /// 537 /// Puts the token on the current line if \p Newline is \c true and adds a 538 /// line break and necessary indentation otherwise. 539 /// 540 /// If \p DryRun is \c false, also creates and stores the required 541 /// \c Replacement. 542 void addTokenToState(bool Newline, bool DryRun, LineState &State) { 543 const AnnotatedToken &Current = *State.NextToken; 544 const AnnotatedToken &Previous = *State.NextToken->Parent; 545 assert(State.Stack.size()); 546 unsigned ParenLevel = State.Stack.size() - 1; 547 548 if (Newline) { 549 unsigned WhitespaceStartColumn = State.Column; 550 if (Current.is(tok::r_brace)) { 551 State.Column = Line.Level * 2; 552 } else if (Current.is(tok::string_literal) && 553 Previous.is(tok::string_literal)) { 554 State.Column = State.Column - Previous.FormatTok.TokenLength; 555 } else if (Current.is(tok::lessless) && 556 State.Stack[ParenLevel].FirstLessLess != 0) { 557 State.Column = State.Stack[ParenLevel].FirstLessLess; 558 } else if (ParenLevel != 0 && 559 (Previous.is(tok::equal) || Previous.is(tok::coloncolon) || 560 Current.is(tok::period) || Current.is(tok::arrow) || 561 Current.is(tok::question))) { 562 // Indent and extra 4 spaces after if we know the current expression is 563 // continued. Don't do that on the top level, as we already indent 4 564 // there. 565 State.Column = std::max(State.Stack.back().LastSpace, 566 State.Stack.back().Indent) + 4; 567 } else if (Current.Type == TT_ConditionalExpr) { 568 State.Column = State.Stack.back().QuestionColumn; 569 } else if (RootToken.is(tok::kw_for) && ParenLevel == 1 && 570 Previous.is(tok::comma)) { 571 State.Column = State.ForLoopVariablePos; 572 } else if (State.NextToken->Parent->ClosesTemplateDeclaration || 573 Current.Type == TT_StartOfName) { 574 State.Column = State.Stack[ParenLevel].Indent - 4; 575 } else if (Previous.Type == TT_BinaryOperator && 576 State.Stack.back().AssignmentColumn != 0) { 577 State.Column = State.Stack.back().AssignmentColumn; 578 } else { 579 State.Column = State.Stack[ParenLevel].Indent; 580 } 581 582 if (RootToken.is(tok::kw_for)) 583 State.LineContainsContinuedForLoopSection = Previous.isNot(tok::semi); 584 585 if (!DryRun) { 586 if (!Line.InPPDirective) 587 Whitespaces.replaceWhitespace(Current, 1, State.Column, 588 WhitespaceStartColumn, Style); 589 else 590 Whitespaces.replacePPWhitespace(Current, 1, State.Column, 591 WhitespaceStartColumn, Style); 592 } 593 594 State.Stack[ParenLevel].LastSpace = State.Column; 595 if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr) 596 State.Stack[ParenLevel].Indent += 2; 597 } else { 598 if (Current.is(tok::equal) && RootToken.is(tok::kw_for)) 599 State.ForLoopVariablePos = State.Column - 600 Previous.FormatTok.TokenLength; 601 602 unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0; 603 if (State.NextToken->Type == TT_LineComment) 604 Spaces = Style.SpacesBeforeTrailingComments; 605 606 if (!DryRun) 607 Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column, Style); 608 609 // FIXME: Do we need to do this for assignments nested in other 610 // expressions? 611 if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 && 612 (getPrecedence(Previous) == prec::Assignment || 613 Previous.is(tok::kw_return))) 614 State.Stack.back().AssignmentColumn = State.Column + Spaces; 615 if (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) || 616 State.NextToken->Parent->Type == TT_TemplateOpener) 617 State.Stack[ParenLevel].Indent = State.Column + Spaces; 618 if (Current.getPreviousNoneComment() != NULL && 619 Current.getPreviousNoneComment()->is(tok::comma) && 620 Current.isNot(tok::comment)) 621 State.Stack[ParenLevel].HasMultiParameterLine = true; 622 623 State.Column += Spaces; 624 if (Current.is(tok::l_paren) && Previous.is(tok::kw_if)) 625 // Treat the condition inside an if as if it was a second function 626 // parameter, i.e. let nested calls have an indent of 4. 627 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 628 else if (Previous.is(tok::comma) && ParenLevel != 0) 629 // Top-level spaces are exempt as that mostly leads to better results. 630 State.Stack.back().LastSpace = State.Column; 631 else if ((Previous.Type == TT_BinaryOperator || 632 Previous.Type == TT_ConditionalExpr || 633 Previous.Type == TT_CtorInitializerColon) && 634 getPrecedence(Previous) != prec::Assignment) 635 State.Stack.back().LastSpace = State.Column; 636 else if (Previous.ParameterCount > 1 && 637 (Previous.is(tok::l_paren) || Previous.is(tok::l_square) || 638 Previous.Type == TT_TemplateOpener)) 639 // If this function has multiple parameters, indent nested calls from 640 // the start of the first parameter. 641 State.Stack.back().LastSpace = State.Column; 642 } 643 644 // If we break after an {, we should also break before the corresponding }. 645 if (Newline && Previous.is(tok::l_brace)) 646 State.Stack.back().BreakBeforeClosingBrace = true; 647 648 if (!Style.BinPackParameters && Newline) { 649 // If we are breaking after '(', '{', '<', this is not bin packing unless 650 // AllowAllParametersOnNextLine is false. 651 if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) && 652 Previous.Type != TT_TemplateOpener) || 653 !Style.AllowAllParametersOnNextLine) 654 State.Stack.back().BreakAfterComma = true; 655 656 // Any break on this level means that the parent level has been broken 657 // and we need to avoid bin packing there. 658 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 659 State.Stack[i].BreakAfterComma = true; 660 } 661 } 662 663 moveStateToNextToken(State); 664 } 665 666 /// \brief Mark the next token as consumed in \p State and modify its stacks 667 /// accordingly. 668 void moveStateToNextToken(LineState &State) { 669 const AnnotatedToken &Current = *State.NextToken; 670 assert(State.Stack.size()); 671 672 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 673 State.Stack.back().FirstLessLess = State.Column; 674 if (Current.is(tok::question)) 675 State.Stack.back().QuestionColumn = State.Column; 676 677 // If we encounter an opening (, [, { or <, we add a level to our stacks to 678 // prepare for the following tokens. 679 if (Current.is(tok::l_paren) || Current.is(tok::l_square) || 680 Current.is(tok::l_brace) || 681 State.NextToken->Type == TT_TemplateOpener) { 682 unsigned NewIndent; 683 if (Current.is(tok::l_brace)) { 684 // FIXME: This does not work with nested static initializers. 685 // Implement a better handling for static initializers and similar 686 // constructs. 687 NewIndent = Line.Level * 2 + 2; 688 } else { 689 NewIndent = 4 + State.Stack.back().LastSpace; 690 } 691 State.Stack.push_back( 692 ParenState(NewIndent, State.Stack.back().LastSpace)); 693 } 694 695 // If we encounter a closing ), ], } or >, we can remove a level from our 696 // stacks. 697 if (Current.is(tok::r_paren) || Current.is(tok::r_square) || 698 (Current.is(tok::r_brace) && State.NextToken != &RootToken) || 699 State.NextToken->Type == TT_TemplateCloser) { 700 State.Stack.pop_back(); 701 } 702 703 if (State.NextToken->Children.empty()) 704 State.NextToken = NULL; 705 else 706 State.NextToken = &State.NextToken->Children[0]; 707 708 State.Column += Current.FormatTok.TokenLength; 709 } 710 711 /// \brief Calculate the penalty for splitting after the token at \p Index. 712 unsigned splitPenalty(const AnnotatedToken &Tok) { 713 const AnnotatedToken &Left = Tok; 714 const AnnotatedToken &Right = Tok.Children[0]; 715 716 if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace)) 717 return 50; 718 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 719 return 150; 720 if (Left.is(tok::coloncolon)) 721 return 500; 722 723 if (Left.Type == TT_RangeBasedForLoopColon) 724 return 5; 725 726 if (Right.is(tok::arrow) || Right.is(tok::period)) { 727 if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall) 728 return 5; // Should be smaller than breaking at a nested comma. 729 return 150; 730 } 731 732 // In for-loops, prefer breaking at ',' and ';'. 733 if (RootToken.is(tok::kw_for) && 734 (Left.isNot(tok::comma) && Left.isNot(tok::semi))) 735 return 20; 736 737 if (Left.is(tok::semi) || Left.is(tok::comma)) 738 return 0; 739 740 // In Objective-C method expressions, prefer breaking before "param:" over 741 // breaking after it. 742 if (isObjCSelectorName(Right)) 743 return 0; 744 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 745 return 20; 746 747 if (Left.is(tok::l_paren)) 748 return 20; 749 // FIXME: The penalty for a trailing "<" or "[" being higher than the 750 // penalty for a trainling "(" is a temporary workaround until we can 751 // properly avoid breaking in array subscripts or template parameters. 752 if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener) 753 return 50; 754 755 if (Left.Type == TT_ConditionalExpr) 756 return prec::Assignment; 757 prec::Level Level = getPrecedence(Left); 758 759 if (Level != prec::Unknown) 760 return Level; 761 762 return 3; 763 } 764 765 unsigned getColumnLimit() { 766 return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0); 767 } 768 769 /// \brief Calculate the number of lines needed to format the remaining part 770 /// of the unwrapped line. 771 /// 772 /// Assumes the formatting so far has led to 773 /// the \c LineSta \p State. If \p NewLine is set, a new line will be 774 /// added after the previous token. 775 /// 776 /// \param StopAt is used for optimization. If we can determine that we'll 777 /// definitely need at least \p StopAt additional lines, we already know of a 778 /// better solution. 779 unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) { 780 // We are at the end of the unwrapped line, so we don't need any more lines. 781 if (State.NextToken == NULL) 782 return 0; 783 784 if (!NewLine && State.NextToken->MustBreakBefore) 785 return UINT_MAX; 786 if (NewLine && !State.NextToken->CanBreakBefore && 787 !(State.NextToken->is(tok::r_brace) && 788 State.Stack.back().BreakBeforeClosingBrace)) 789 return UINT_MAX; 790 if (!NewLine && State.NextToken->is(tok::r_brace) && 791 State.Stack.back().BreakBeforeClosingBrace) 792 return UINT_MAX; 793 if (!NewLine && State.NextToken->Parent->is(tok::semi) && 794 State.LineContainsContinuedForLoopSection) 795 return UINT_MAX; 796 if (!NewLine && State.NextToken->Parent->is(tok::comma) && 797 State.NextToken->isNot(tok::comment) && 798 State.Stack.back().BreakAfterComma) 799 return UINT_MAX; 800 // Trying to insert a parameter on a new line if there are already more than 801 // one parameter on the current line is bin packing. 802 if (NewLine && State.NextToken->Parent->is(tok::comma) && 803 State.Stack.back().HasMultiParameterLine && !Style.BinPackParameters) 804 return UINT_MAX; 805 if (!NewLine && (State.NextToken->Type == TT_CtorInitializerColon || 806 (State.NextToken->Parent->ClosesTemplateDeclaration && 807 State.Stack.size() == 1))) 808 return UINT_MAX; 809 810 unsigned CurrentPenalty = 0; 811 if (NewLine) 812 CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() + 813 splitPenalty(*State.NextToken->Parent); 814 815 addTokenToState(NewLine, true, State); 816 817 // Exceeding column limit is bad, assign penalty. 818 if (State.Column > getColumnLimit()) { 819 unsigned ExcessCharacters = State.Column - getColumnLimit(); 820 CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters; 821 } 822 823 if (StopAt <= CurrentPenalty) 824 return UINT_MAX; 825 StopAt -= CurrentPenalty; 826 StateMap::iterator I = Memory.find(State); 827 if (I != Memory.end()) { 828 // If this state has already been examined, we can safely return the 829 // previous result if we 830 // - have not hit the optimatization (and thus returned UINT_MAX) OR 831 // - are now computing for a smaller or equal StopAt. 832 unsigned SavedResult = I->second.first; 833 unsigned SavedStopAt = I->second.second; 834 if (SavedResult != UINT_MAX) 835 return SavedResult + CurrentPenalty; 836 else if (StopAt <= SavedStopAt) 837 return UINT_MAX; 838 } 839 840 unsigned NoBreak = calcPenalty(State, false, StopAt); 841 unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak)); 842 unsigned Result = std::min(NoBreak, WithBreak); 843 844 // We have to store 'Result' without adding 'CurrentPenalty' as the latter 845 // can depend on 'NewLine'. 846 Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt); 847 848 return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty; 849 } 850 851 FormatStyle Style; 852 SourceManager &SourceMgr; 853 const AnnotatedLine &Line; 854 const unsigned FirstIndent; 855 const AnnotatedToken &RootToken; 856 WhitespaceManager &Whitespaces; 857 858 // A map from an indent state to a pair (Result, Used-StopAt). 859 typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap; 860 StateMap Memory; 861 862 OptimizationParameters Parameters; 863 }; 864 865 /// \brief Determines extra information about the tokens comprising an 866 /// \c UnwrappedLine. 867 class TokenAnnotator { 868 public: 869 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 870 AnnotatedLine &Line) 871 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {} 872 873 /// \brief A parser that gathers additional information about tokens. 874 /// 875 /// The \c TokenAnnotator tries to matches parenthesis and square brakets and 876 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 877 /// into template parameter lists. 878 class AnnotatingParser { 879 public: 880 AnnotatingParser(AnnotatedToken &RootToken) 881 : CurrentToken(&RootToken), KeywordVirtualFound(false), 882 ColonIsObjCMethodExpr(false), ColonIsForRangeExpr(false) {} 883 884 /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr. 885 struct ObjCSelectorRAII { 886 AnnotatingParser &P; 887 bool ColonWasObjCMethodExpr; 888 889 ObjCSelectorRAII(AnnotatingParser &P) 890 : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {} 891 892 ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; } 893 894 void markStart(AnnotatedToken &Left) { 895 P.ColonIsObjCMethodExpr = true; 896 Left.Type = TT_ObjCMethodExpr; 897 } 898 899 void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; } 900 }; 901 902 bool parseAngle() { 903 if (CurrentToken == NULL) 904 return false; 905 AnnotatedToken *Left = CurrentToken->Parent; 906 while (CurrentToken != NULL) { 907 if (CurrentToken->is(tok::greater)) { 908 Left->MatchingParen = CurrentToken; 909 CurrentToken->MatchingParen = Left; 910 CurrentToken->Type = TT_TemplateCloser; 911 next(); 912 return true; 913 } 914 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) || 915 CurrentToken->is(tok::r_brace)) 916 return false; 917 if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) || 918 CurrentToken->is(tok::question) || CurrentToken->is(tok::colon)) 919 return false; 920 if (CurrentToken->is(tok::comma)) 921 ++Left->ParameterCount; 922 if (!consumeToken()) 923 return false; 924 } 925 return false; 926 } 927 928 bool parseParens(bool LookForDecls = false) { 929 if (CurrentToken == NULL) 930 return false; 931 bool StartsObjCMethodExpr = false; 932 AnnotatedToken *Left = CurrentToken->Parent; 933 if (CurrentToken->is(tok::caret)) { 934 // ^( starts a block. 935 Left->Type = TT_ObjCBlockLParen; 936 } else if (AnnotatedToken *MaybeSel = Left->Parent) { 937 // @selector( starts a selector. 938 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && 939 MaybeSel->Parent->is(tok::at)) { 940 StartsObjCMethodExpr = true; 941 } 942 } 943 944 ObjCSelectorRAII objCSelector(*this); 945 if (StartsObjCMethodExpr) 946 objCSelector.markStart(*Left); 947 948 while (CurrentToken != NULL) { 949 // LookForDecls is set when "if (" has been seen. Check for 950 // 'identifier' '*' 'identifier' followed by not '=' -- this 951 // '*' has to be a binary operator but determineStarAmpUsage() will 952 // categorize it as an unary operator, so set the right type here. 953 if (LookForDecls && !CurrentToken->Children.empty()) { 954 AnnotatedToken &Prev = *CurrentToken->Parent; 955 AnnotatedToken &Next = CurrentToken->Children[0]; 956 if (Prev.Parent->is(tok::identifier) && 957 (Prev.is(tok::star) || Prev.is(tok::amp)) && 958 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { 959 Prev.Type = TT_BinaryOperator; 960 LookForDecls = false; 961 } 962 } 963 964 if (CurrentToken->is(tok::r_paren)) { 965 Left->MatchingParen = CurrentToken; 966 CurrentToken->MatchingParen = Left; 967 968 if (StartsObjCMethodExpr) 969 objCSelector.markEnd(*CurrentToken); 970 971 next(); 972 return true; 973 } 974 if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace)) 975 return false; 976 if (CurrentToken->is(tok::comma)) 977 ++Left->ParameterCount; 978 if (!consumeToken()) 979 return false; 980 } 981 return false; 982 } 983 984 bool parseSquare() { 985 if (!CurrentToken) 986 return false; 987 988 // A '[' could be an index subscript (after an indentifier or after 989 // ')' or ']'), or it could be the start of an Objective-C method 990 // expression. 991 AnnotatedToken *Left = CurrentToken->Parent; 992 bool StartsObjCMethodExpr = 993 !Left->Parent || Left->Parent->is(tok::colon) || 994 Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) || 995 Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) || 996 getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), 997 true, true) > prec::Unknown; 998 999 ObjCSelectorRAII objCSelector(*this); 1000 if (StartsObjCMethodExpr) 1001 objCSelector.markStart(*Left); 1002 1003 while (CurrentToken != NULL) { 1004 if (CurrentToken->is(tok::r_square)) { 1005 if (!CurrentToken->Children.empty() && 1006 CurrentToken->Children[0].is(tok::l_paren)) { 1007 // An ObjC method call can't be followed by an open parenthesis. 1008 // FIXME: Do we incorrectly label ":" with this? 1009 StartsObjCMethodExpr = false; 1010 Left->Type = TT_Unknown; 1011 } 1012 if (StartsObjCMethodExpr) 1013 objCSelector.markEnd(*CurrentToken); 1014 Left->MatchingParen = CurrentToken; 1015 CurrentToken->MatchingParen = Left; 1016 next(); 1017 return true; 1018 } 1019 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace)) 1020 return false; 1021 if (CurrentToken->is(tok::comma)) 1022 ++Left->ParameterCount; 1023 if (!consumeToken()) 1024 return false; 1025 } 1026 return false; 1027 } 1028 1029 bool parseBrace() { 1030 // Lines are fine to end with '{'. 1031 if (CurrentToken == NULL) 1032 return true; 1033 AnnotatedToken *Left = CurrentToken->Parent; 1034 while (CurrentToken != NULL) { 1035 if (CurrentToken->is(tok::r_brace)) { 1036 Left->MatchingParen = CurrentToken; 1037 CurrentToken->MatchingParen = Left; 1038 next(); 1039 return true; 1040 } 1041 if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square)) 1042 return false; 1043 if (!consumeToken()) 1044 return false; 1045 } 1046 return true; 1047 } 1048 1049 bool parseConditional() { 1050 while (CurrentToken != NULL) { 1051 if (CurrentToken->is(tok::colon)) { 1052 CurrentToken->Type = TT_ConditionalExpr; 1053 next(); 1054 return true; 1055 } 1056 if (!consumeToken()) 1057 return false; 1058 } 1059 return false; 1060 } 1061 1062 bool parseTemplateDeclaration() { 1063 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 1064 CurrentToken->Type = TT_TemplateOpener; 1065 next(); 1066 if (!parseAngle()) 1067 return false; 1068 CurrentToken->Parent->ClosesTemplateDeclaration = true; 1069 return true; 1070 } 1071 return false; 1072 } 1073 1074 bool consumeToken() { 1075 AnnotatedToken *Tok = CurrentToken; 1076 next(); 1077 switch (Tok->FormatTok.Tok.getKind()) { 1078 case tok::plus: 1079 case tok::minus: 1080 // At the start of the line, +/- specific ObjectiveC method 1081 // declarations. 1082 if (Tok->Parent == NULL) 1083 Tok->Type = TT_ObjCMethodSpecifier; 1084 break; 1085 case tok::colon: 1086 // Colons from ?: are handled in parseConditional(). 1087 if (Tok->Parent->is(tok::r_paren)) 1088 Tok->Type = TT_CtorInitializerColon; 1089 else if (ColonIsObjCMethodExpr) 1090 Tok->Type = TT_ObjCMethodExpr; 1091 else if (ColonIsForRangeExpr) 1092 Tok->Type = TT_RangeBasedForLoopColon; 1093 break; 1094 case tok::kw_if: 1095 case tok::kw_while: 1096 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 1097 next(); 1098 if (!parseParens(/*LookForDecls=*/true)) 1099 return false; 1100 } 1101 break; 1102 case tok::kw_for: 1103 ColonIsForRangeExpr = true; 1104 next(); 1105 if (!parseParens()) 1106 return false; 1107 break; 1108 case tok::l_paren: 1109 if (!parseParens()) 1110 return false; 1111 break; 1112 case tok::l_square: 1113 if (!parseSquare()) 1114 return false; 1115 break; 1116 case tok::l_brace: 1117 if (!parseBrace()) 1118 return false; 1119 break; 1120 case tok::less: 1121 if (parseAngle()) 1122 Tok->Type = TT_TemplateOpener; 1123 else { 1124 Tok->Type = TT_BinaryOperator; 1125 CurrentToken = Tok; 1126 next(); 1127 } 1128 break; 1129 case tok::r_paren: 1130 case tok::r_square: 1131 return false; 1132 case tok::r_brace: 1133 // Lines can start with '}'. 1134 if (Tok->Parent != NULL) 1135 return false; 1136 break; 1137 case tok::greater: 1138 Tok->Type = TT_BinaryOperator; 1139 break; 1140 case tok::kw_operator: 1141 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 1142 CurrentToken->Type = TT_OverloadedOperator; 1143 next(); 1144 if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) { 1145 CurrentToken->Type = TT_OverloadedOperator; 1146 next(); 1147 } 1148 } else { 1149 while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) { 1150 CurrentToken->Type = TT_OverloadedOperator; 1151 next(); 1152 } 1153 } 1154 break; 1155 case tok::question: 1156 parseConditional(); 1157 break; 1158 case tok::kw_template: 1159 parseTemplateDeclaration(); 1160 break; 1161 default: 1162 break; 1163 } 1164 return true; 1165 } 1166 1167 void parseIncludeDirective() { 1168 next(); 1169 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 1170 next(); 1171 while (CurrentToken != NULL) { 1172 if (CurrentToken->isNot(tok::comment) || 1173 !CurrentToken->Children.empty()) 1174 CurrentToken->Type = TT_ImplicitStringLiteral; 1175 next(); 1176 } 1177 } else { 1178 while (CurrentToken != NULL) { 1179 next(); 1180 } 1181 } 1182 } 1183 1184 void parseWarningOrError() { 1185 next(); 1186 // We still want to format the whitespace left of the first token of the 1187 // warning or error. 1188 next(); 1189 while (CurrentToken != NULL) { 1190 CurrentToken->Type = TT_ImplicitStringLiteral; 1191 next(); 1192 } 1193 } 1194 1195 void parsePreprocessorDirective() { 1196 next(); 1197 if (CurrentToken == NULL) 1198 return; 1199 // Hashes in the middle of a line can lead to any strange token 1200 // sequence. 1201 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 1202 return; 1203 switch ( 1204 CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 1205 case tok::pp_include: 1206 case tok::pp_import: 1207 parseIncludeDirective(); 1208 break; 1209 case tok::pp_error: 1210 case tok::pp_warning: 1211 parseWarningOrError(); 1212 break; 1213 default: 1214 break; 1215 } 1216 } 1217 1218 LineType parseLine() { 1219 int PeriodsAndArrows = 0; 1220 bool CanBeBuilderTypeStmt = true; 1221 if (CurrentToken->is(tok::hash)) { 1222 parsePreprocessorDirective(); 1223 return LT_PreprocessorDirective; 1224 } 1225 while (CurrentToken != NULL) { 1226 1227 if (CurrentToken->is(tok::kw_virtual)) 1228 KeywordVirtualFound = true; 1229 if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow)) 1230 ++PeriodsAndArrows; 1231 if (getPrecedence(*CurrentToken) > prec::Assignment && 1232 CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater)) 1233 CanBeBuilderTypeStmt = false; 1234 if (!consumeToken()) 1235 return LT_Invalid; 1236 } 1237 if (KeywordVirtualFound) 1238 return LT_VirtualFunctionDecl; 1239 1240 // Assume a builder-type call if there are 2 or more "." and "->". 1241 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) 1242 return LT_BuilderTypeCall; 1243 1244 return LT_Other; 1245 } 1246 1247 void next() { 1248 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 1249 CurrentToken = &CurrentToken->Children[0]; 1250 else 1251 CurrentToken = NULL; 1252 } 1253 1254 private: 1255 AnnotatedToken *CurrentToken; 1256 bool KeywordVirtualFound; 1257 bool ColonIsObjCMethodExpr; 1258 bool ColonIsForRangeExpr; 1259 }; 1260 1261 void calculateExtraInformation(AnnotatedToken &Current) { 1262 Current.SpaceRequiredBefore = spaceRequiredBefore(Current); 1263 1264 if (Current.FormatTok.MustBreakBefore) { 1265 Current.MustBreakBefore = true; 1266 } else { 1267 if (Current.Type == TT_LineComment) { 1268 Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0; 1269 } else if ((Current.Parent->is(tok::comment) && 1270 Current.FormatTok.NewlinesBefore > 0) || 1271 (Current.is(tok::string_literal) && 1272 Current.Parent->is(tok::string_literal))) { 1273 Current.MustBreakBefore = true; 1274 } else { 1275 Current.MustBreakBefore = false; 1276 } 1277 } 1278 Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current); 1279 if (Current.MustBreakBefore) 1280 Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit; 1281 else 1282 Current.TotalLength = Current.Parent->TotalLength + 1283 Current.FormatTok.TokenLength + 1284 (Current.SpaceRequiredBefore ? 1 : 0); 1285 if (!Current.Children.empty()) 1286 calculateExtraInformation(Current.Children[0]); 1287 } 1288 1289 void annotate() { 1290 AnnotatingParser Parser(Line.First); 1291 Line.Type = Parser.parseLine(); 1292 if (Line.Type == LT_Invalid) 1293 return; 1294 1295 bool LookForFunctionName = Line.MustBeDeclaration; 1296 determineTokenTypes(Line.First, /*IsExpression=*/ false, 1297 LookForFunctionName); 1298 1299 if (Line.First.Type == TT_ObjCMethodSpecifier) 1300 Line.Type = LT_ObjCMethodDecl; 1301 else if (Line.First.Type == TT_ObjCDecl) 1302 Line.Type = LT_ObjCDecl; 1303 else if (Line.First.Type == TT_ObjCProperty) 1304 Line.Type = LT_ObjCProperty; 1305 1306 Line.First.SpaceRequiredBefore = true; 1307 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; 1308 Line.First.CanBreakBefore = Line.First.MustBreakBefore; 1309 1310 Line.First.TotalLength = Line.First.FormatTok.TokenLength; 1311 if (!Line.First.Children.empty()) 1312 calculateExtraInformation(Line.First.Children[0]); 1313 } 1314 1315 private: 1316 void determineTokenTypes(AnnotatedToken &Current, bool IsExpression, 1317 bool LookForFunctionName) { 1318 if (getPrecedence(Current) == prec::Assignment) { 1319 IsExpression = true; 1320 AnnotatedToken *Previous = Current.Parent; 1321 while (Previous != NULL) { 1322 if (Previous->Type == TT_BinaryOperator && 1323 (Previous->is(tok::star) || Previous->is(tok::amp))) { 1324 Previous->Type = TT_PointerOrReference; 1325 } 1326 Previous = Previous->Parent; 1327 } 1328 } 1329 if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) || 1330 (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 1331 (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for)))) 1332 IsExpression = true; 1333 1334 if (Current.Type == TT_Unknown) { 1335 if (LookForFunctionName && Current.is(tok::l_paren)) { 1336 findFunctionName(&Current); 1337 LookForFunctionName = false; 1338 } else if (Current.is(tok::star) || Current.is(tok::amp)) { 1339 Current.Type = determineStarAmpUsage(Current, IsExpression); 1340 } else if (Current.is(tok::minus) || Current.is(tok::plus) || 1341 Current.is(tok::caret)) { 1342 Current.Type = determinePlusMinusCaretUsage(Current); 1343 } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) { 1344 Current.Type = determineIncrementUsage(Current); 1345 } else if (Current.is(tok::exclaim)) { 1346 Current.Type = TT_UnaryOperator; 1347 } else if (isBinaryOperator(Current)) { 1348 Current.Type = TT_BinaryOperator; 1349 } else if (Current.is(tok::comment)) { 1350 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 1351 Lex.getLangOpts())); 1352 if (StringRef(Data).startswith("//")) 1353 Current.Type = TT_LineComment; 1354 else 1355 Current.Type = TT_BlockComment; 1356 } else if (Current.is(tok::r_paren) && 1357 (Current.Parent->Type == TT_PointerOrReference || 1358 Current.Parent->Type == TT_TemplateCloser) && 1359 (Current.Children.empty() || 1360 (Current.Children[0].isNot(tok::equal) && 1361 Current.Children[0].isNot(tok::semi) && 1362 Current.Children[0].isNot(tok::l_brace)))) { 1363 // FIXME: We need to get smarter and understand more cases of casts. 1364 Current.Type = TT_CastRParen; 1365 } else if (Current.is(tok::at) && Current.Children.size()) { 1366 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 1367 case tok::objc_interface: 1368 case tok::objc_implementation: 1369 case tok::objc_protocol: 1370 Current.Type = TT_ObjCDecl; 1371 break; 1372 case tok::objc_property: 1373 Current.Type = TT_ObjCProperty; 1374 break; 1375 default: 1376 break; 1377 } 1378 } 1379 } 1380 1381 if (!Current.Children.empty()) 1382 determineTokenTypes(Current.Children[0], IsExpression, 1383 LookForFunctionName); 1384 } 1385 1386 /// \brief Starting from \p Current, this searches backwards for an 1387 /// identifier which could be the start of a function name and marks it. 1388 void findFunctionName(AnnotatedToken *Current) { 1389 AnnotatedToken *Parent = Current->Parent; 1390 while (Parent != NULL && Parent->Parent != NULL) { 1391 if (Parent->is(tok::identifier) && 1392 (Parent->Parent->is(tok::identifier) || 1393 Parent->Parent->Type == TT_PointerOrReference || 1394 Parent->Parent->Type == TT_TemplateCloser)) { 1395 Parent->Type = TT_StartOfName; 1396 break; 1397 } 1398 Parent = Parent->Parent; 1399 } 1400 } 1401 1402 /// \brief Returns the previous token ignoring comments. 1403 const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { 1404 const AnnotatedToken *PrevToken = Tok.Parent; 1405 while (PrevToken != NULL && PrevToken->is(tok::comment)) 1406 PrevToken = PrevToken->Parent; 1407 return PrevToken; 1408 } 1409 1410 /// \brief Returns the next token ignoring comments. 1411 const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { 1412 if (Tok.Children.empty()) 1413 return NULL; 1414 const AnnotatedToken *NextToken = &Tok.Children[0]; 1415 while (NextToken->is(tok::comment)) { 1416 if (NextToken->Children.empty()) 1417 return NULL; 1418 NextToken = &NextToken->Children[0]; 1419 } 1420 return NextToken; 1421 } 1422 1423 /// \brief Return the type of the given token assuming it is * or &. 1424 TokenType determineStarAmpUsage(const AnnotatedToken &Tok, 1425 bool IsExpression) { 1426 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 1427 if (PrevToken == NULL) 1428 return TT_UnaryOperator; 1429 1430 const AnnotatedToken *NextToken = getNextToken(Tok); 1431 if (NextToken == NULL) 1432 return TT_Unknown; 1433 1434 if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr) 1435 return TT_PointerOrReference; 1436 1437 if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) || 1438 PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) || 1439 PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) || 1440 PrevToken->Type == TT_BinaryOperator || 1441 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 1442 return TT_UnaryOperator; 1443 1444 if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) || 1445 PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() || 1446 NextToken->is(tok::plus) || NextToken->is(tok::minus) || 1447 NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) || 1448 NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) || 1449 NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) || 1450 NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof)) 1451 return TT_BinaryOperator; 1452 1453 if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) || 1454 NextToken->is(tok::greater)) 1455 return TT_PointerOrReference; 1456 1457 // It is very unlikely that we are going to find a pointer or reference type 1458 // definition on the RHS of an assignment. 1459 if (IsExpression) 1460 return TT_BinaryOperator; 1461 1462 return TT_PointerOrReference; 1463 } 1464 1465 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 1466 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 1467 if (PrevToken == NULL) 1468 return TT_UnaryOperator; 1469 1470 // Use heuristics to recognize unary operators. 1471 if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) || 1472 PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) || 1473 PrevToken->is(tok::question) || PrevToken->is(tok::colon) || 1474 PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) || 1475 PrevToken->is(tok::at) || PrevToken->is(tok::l_brace)) 1476 return TT_UnaryOperator; 1477 1478 // There can't be to consecutive binary operators. 1479 if (PrevToken->Type == TT_BinaryOperator) 1480 return TT_UnaryOperator; 1481 1482 // Fall back to marking the token as binary operator. 1483 return TT_BinaryOperator; 1484 } 1485 1486 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1487 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 1488 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 1489 if (PrevToken == NULL) 1490 return TT_UnaryOperator; 1491 if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) || 1492 PrevToken->is(tok::identifier)) 1493 return TT_TrailingUnaryOperator; 1494 1495 return TT_UnaryOperator; 1496 } 1497 1498 bool spaceRequiredBetween(const AnnotatedToken &Left, 1499 const AnnotatedToken &Right) { 1500 if (Right.is(tok::hashhash)) 1501 return Left.is(tok::hash); 1502 if (Left.is(tok::hashhash) || Left.is(tok::hash)) 1503 return Right.is(tok::hash); 1504 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) 1505 return false; 1506 if (Right.is(tok::less) && 1507 (Left.is(tok::kw_template) || 1508 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1509 return true; 1510 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1511 return false; 1512 if (Left.is(tok::exclaim) || Left.is(tok::tilde)) 1513 return false; 1514 if (Left.is(tok::at) && 1515 (Right.is(tok::identifier) || Right.is(tok::string_literal) || 1516 Right.is(tok::char_constant) || Right.is(tok::numeric_constant) || 1517 Right.is(tok::l_paren) || Right.is(tok::l_brace) || 1518 Right.is(tok::kw_true) || Right.is(tok::kw_false))) 1519 return false; 1520 if (Left.is(tok::coloncolon)) 1521 return false; 1522 if (Right.is(tok::coloncolon)) 1523 return Left.isNot(tok::identifier) && Left.isNot(tok::greater); 1524 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) 1525 return false; 1526 if (Right.is(tok::amp) || Right.is(tok::star)) 1527 return Left.FormatTok.Tok.isLiteral() || 1528 (Left.isNot(tok::star) && Left.isNot(tok::amp) && 1529 !Style.PointerAndReferenceBindToType); 1530 if (Left.is(tok::amp) || Left.is(tok::star)) 1531 return Right.FormatTok.Tok.isLiteral() || 1532 Style.PointerAndReferenceBindToType; 1533 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1534 return false; 1535 if (Left.is(tok::l_square) || Right.is(tok::r_square)) 1536 return false; 1537 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 1538 return false; 1539 if (Left.is(tok::period) || Right.is(tok::period)) 1540 return false; 1541 if (Left.is(tok::colon)) 1542 return Left.Type != TT_ObjCMethodExpr; 1543 if (Right.is(tok::colon)) 1544 return Right.Type != TT_ObjCMethodExpr; 1545 if (Left.is(tok::l_paren)) 1546 return false; 1547 if (Right.is(tok::l_paren)) { 1548 return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) || 1549 Left.is(tok::kw_for) || Left.is(tok::kw_while) || 1550 Left.is(tok::kw_switch) || Left.is(tok::kw_return) || 1551 Left.is(tok::kw_catch) || Left.is(tok::kw_new) || 1552 Left.is(tok::kw_delete); 1553 } 1554 if (Left.is(tok::at) && 1555 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1556 return false; 1557 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1558 return false; 1559 return true; 1560 } 1561 1562 bool spaceRequiredBefore(const AnnotatedToken &Tok) { 1563 if (Line.Type == LT_ObjCMethodDecl) { 1564 if (Tok.is(tok::identifier) && !Tok.Children.empty() && 1565 Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier)) 1566 return true; 1567 if (Tok.is(tok::colon)) 1568 return false; 1569 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1570 return true; 1571 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1572 // Don't space between ')' and <id> 1573 return false; 1574 if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren)) 1575 // Don't space between ':' and '(' 1576 return false; 1577 } 1578 if (Line.Type == LT_ObjCProperty && 1579 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1580 return false; 1581 1582 if (Tok.Parent->is(tok::comma)) 1583 return true; 1584 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1585 return true; 1586 if (Tok.Type == TT_OverloadedOperator) 1587 return Tok.is(tok::identifier) || Tok.is(tok::kw_new) || 1588 Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool); 1589 if (Tok.Parent->Type == TT_OverloadedOperator) 1590 return false; 1591 if (Tok.is(tok::colon)) 1592 return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() && 1593 Tok.Type != TT_ObjCMethodExpr; 1594 if (Tok.Parent->Type == TT_UnaryOperator || 1595 Tok.Parent->Type == TT_CastRParen) 1596 return false; 1597 if (Tok.Type == TT_UnaryOperator) 1598 return Tok.Parent->isNot(tok::l_paren) && 1599 Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) && 1600 (Tok.Parent->isNot(tok::colon) || 1601 Tok.Parent->Type != TT_ObjCMethodExpr); 1602 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1603 return Tok.Type == TT_TemplateCloser && Tok.Parent->Type == 1604 TT_TemplateCloser && Style.SplitTemplateClosingGreater; 1605 } 1606 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1607 return true; 1608 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1609 return false; 1610 if (Tok.is(tok::less) && Line.First.is(tok::hash)) 1611 return true; 1612 if (Tok.Type == TT_TrailingUnaryOperator) 1613 return false; 1614 return spaceRequiredBetween(*Tok.Parent, Tok); 1615 } 1616 1617 bool canBreakBefore(const AnnotatedToken &Right) { 1618 const AnnotatedToken &Left = *Right.Parent; 1619 if (Line.Type == LT_ObjCMethodDecl) { 1620 if (Right.is(tok::identifier) && !Right.Children.empty() && 1621 Right.Children[0].is(tok::colon) && Left.is(tok::identifier)) 1622 return true; 1623 if (Right.is(tok::identifier) && Left.is(tok::l_paren) && 1624 Left.Parent->is(tok::colon)) 1625 // Don't break this identifier as ':' or identifier 1626 // before it will break. 1627 return false; 1628 if (Right.is(tok::colon) && Left.is(tok::identifier) && 1629 Left.CanBreakBefore) 1630 // Don't break at ':' if identifier before it can beak. 1631 return false; 1632 } 1633 if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine) 1634 return true; 1635 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1636 return false; 1637 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1638 return true; 1639 if (isObjCSelectorName(Right)) 1640 return true; 1641 if (Left.ClosesTemplateDeclaration) 1642 return true; 1643 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1644 return true; 1645 if (Left.Type == TT_RangeBasedForLoopColon) 1646 return true; 1647 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1648 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || 1649 Left.is(tok::question)) 1650 return false; 1651 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1652 return false; 1653 1654 if (Right.Type == TT_LineComment) 1655 // We rely on MustBreakBefore being set correctly here as we should not 1656 // change the "binding" behavior of a comment. 1657 return false; 1658 1659 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1660 // unless it is follow by ';', '{' or '='. 1661 if (Left.is(tok::kw_const) && Left.Parent != NULL && 1662 Left.Parent->is(tok::r_paren)) 1663 return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) && 1664 Right.isNot(tok::equal); 1665 1666 // We only break before r_brace if there was a corresponding break before 1667 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1668 if (Right.is(tok::r_brace)) 1669 return false; 1670 1671 if (Right.is(tok::r_paren) || Right.is(tok::greater)) 1672 return false; 1673 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1674 Left.is(tok::comma) || Right.is(tok::lessless) || 1675 Right.is(tok::arrow) || Right.is(tok::period) || 1676 Right.is(tok::colon) || Left.is(tok::coloncolon) || 1677 Left.is(tok::semi) || Left.is(tok::l_brace) || 1678 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1679 Right.is(tok::identifier)) || 1680 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 1681 (Left.is(tok::l_square) && !Right.is(tok::r_square)); 1682 } 1683 1684 FormatStyle Style; 1685 SourceManager &SourceMgr; 1686 Lexer &Lex; 1687 AnnotatedLine &Line; 1688 }; 1689 1690 class LexerBasedFormatTokenSource : public FormatTokenSource { 1691 public: 1692 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1693 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1694 IdentTable(Lex.getLangOpts()) { 1695 Lex.SetKeepWhitespaceMode(true); 1696 } 1697 1698 virtual FormatToken getNextToken() { 1699 if (GreaterStashed) { 1700 FormatTok.NewlinesBefore = 0; 1701 FormatTok.WhiteSpaceStart = 1702 FormatTok.Tok.getLocation().getLocWithOffset(1); 1703 FormatTok.WhiteSpaceLength = 0; 1704 GreaterStashed = false; 1705 return FormatTok; 1706 } 1707 1708 FormatTok = FormatToken(); 1709 Lex.LexFromRawLexer(FormatTok.Tok); 1710 StringRef Text = rawTokenText(FormatTok.Tok); 1711 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1712 if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0) 1713 FormatTok.IsFirst = true; 1714 1715 // Consume and record whitespace until we find a significant token. 1716 while (FormatTok.Tok.is(tok::unknown)) { 1717 FormatTok.NewlinesBefore += Text.count('\n'); 1718 FormatTok.HasUnescapedNewline = Text.count("\\\n") != 1719 FormatTok.NewlinesBefore; 1720 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1721 1722 if (FormatTok.Tok.is(tok::eof)) 1723 return FormatTok; 1724 Lex.LexFromRawLexer(FormatTok.Tok); 1725 Text = rawTokenText(FormatTok.Tok); 1726 } 1727 1728 // Now FormatTok is the next non-whitespace token. 1729 FormatTok.TokenLength = Text.size(); 1730 1731 // In case the token starts with escaped newlines, we want to 1732 // take them into account as whitespace - this pattern is quite frequent 1733 // in macro definitions. 1734 // FIXME: What do we want to do with other escaped spaces, and escaped 1735 // spaces or newlines in the middle of tokens? 1736 // FIXME: Add a more explicit test. 1737 unsigned i = 0; 1738 while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') { 1739 // FIXME: ++FormatTok.NewlinesBefore is missing... 1740 FormatTok.WhiteSpaceLength += 2; 1741 FormatTok.TokenLength -= 2; 1742 i += 2; 1743 } 1744 1745 if (FormatTok.Tok.is(tok::raw_identifier)) { 1746 IdentifierInfo &Info = IdentTable.get(Text); 1747 FormatTok.Tok.setIdentifierInfo(&Info); 1748 FormatTok.Tok.setKind(Info.getTokenID()); 1749 } 1750 1751 if (FormatTok.Tok.is(tok::greatergreater)) { 1752 FormatTok.Tok.setKind(tok::greater); 1753 GreaterStashed = true; 1754 } 1755 1756 return FormatTok; 1757 } 1758 1759 private: 1760 FormatToken FormatTok; 1761 bool GreaterStashed; 1762 Lexer &Lex; 1763 SourceManager &SourceMgr; 1764 IdentifierTable IdentTable; 1765 1766 /// Returns the text of \c FormatTok. 1767 StringRef rawTokenText(Token &Tok) { 1768 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1769 Tok.getLength()); 1770 } 1771 }; 1772 1773 class Formatter : public UnwrappedLineConsumer { 1774 public: 1775 Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex, 1776 SourceManager &SourceMgr, 1777 const std::vector<CharSourceRange> &Ranges) 1778 : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1779 Whitespaces(SourceMgr), Ranges(Ranges) {} 1780 1781 virtual ~Formatter() {} 1782 1783 tooling::Replacements format() { 1784 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1785 UnwrappedLineParser Parser(Diag, Style, Tokens, *this); 1786 StructuralError = Parser.parse(); 1787 unsigned PreviousEndOfLineColumn = 0; 1788 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1789 TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]); 1790 Annotator.annotate(); 1791 } 1792 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1793 E = AnnotatedLines.end(); 1794 I != E; ++I) { 1795 const AnnotatedLine &TheLine = *I; 1796 if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) { 1797 unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level, 1798 TheLine.InPPDirective, 1799 PreviousEndOfLineColumn); 1800 tryFitMultipleLinesInOne(Indent, I, E); 1801 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1802 TheLine.First, Whitespaces, 1803 StructuralError); 1804 PreviousEndOfLineColumn = Formatter.format(); 1805 } else { 1806 // If we did not reformat this unwrapped line, the column at the end of 1807 // the last token is unchanged - thus, we can calculate the end of the 1808 // last token, and return the result. 1809 PreviousEndOfLineColumn = 1810 SourceMgr.getSpellingColumnNumber( 1811 TheLine.Last->FormatTok.Tok.getLocation()) + 1812 Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(), 1813 SourceMgr, Lex.getLangOpts()) - 1814 1; 1815 } 1816 } 1817 return Whitespaces.generateReplacements(); 1818 } 1819 1820 private: 1821 /// \brief Tries to merge lines into one. 1822 /// 1823 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1824 /// if possible; note that \c I will be incremented when lines are merged. 1825 /// 1826 /// Returns whether the resulting \c Line can fit in a single line. 1827 void tryFitMultipleLinesInOne(unsigned Indent, 1828 std::vector<AnnotatedLine>::iterator &I, 1829 std::vector<AnnotatedLine>::iterator E) { 1830 unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent; 1831 1832 // We can never merge stuff if there are trailing line comments. 1833 if (I->Last->Type == TT_LineComment) 1834 return; 1835 1836 // Check whether the UnwrappedLine can be put onto a single line. If 1837 // so, this is bound to be the optimal solution (by definition) and we 1838 // don't need to analyze the entire solution space. 1839 if (I->Last->TotalLength > Limit) 1840 return; 1841 Limit -= I->Last->TotalLength; 1842 1843 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1844 return; 1845 1846 if (I->Last->is(tok::l_brace)) { 1847 tryMergeSimpleBlock(I, E, Limit); 1848 } else if (I->First.is(tok::kw_if)) { 1849 tryMergeSimpleIf(I, E, Limit); 1850 } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline || 1851 I->First.FormatTok.IsFirst)) { 1852 tryMergeSimplePPDirective(I, E, Limit); 1853 } 1854 return; 1855 } 1856 1857 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1858 std::vector<AnnotatedLine>::iterator E, 1859 unsigned Limit) { 1860 AnnotatedLine &Line = *I; 1861 if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline) 1862 return; 1863 if (I + 2 != E && (I + 2)->InPPDirective && 1864 !(I + 2)->First.FormatTok.HasUnescapedNewline) 1865 return; 1866 if (1 + (I + 1)->Last->TotalLength > Limit) 1867 return; 1868 join(Line, *(++I)); 1869 } 1870 1871 void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I, 1872 std::vector<AnnotatedLine>::iterator E, 1873 unsigned Limit) { 1874 if (!Style.AllowShortIfStatementsOnASingleLine) 1875 return; 1876 if ((I + 1)->InPPDirective != I->InPPDirective || 1877 ((I + 1)->InPPDirective && 1878 (I + 1)->First.FormatTok.HasUnescapedNewline)) 1879 return; 1880 AnnotatedLine &Line = *I; 1881 if (Line.Last->isNot(tok::r_paren)) 1882 return; 1883 if (1 + (I + 1)->Last->TotalLength > Limit) 1884 return; 1885 if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment) 1886 return; 1887 // Only inline simple if's (no nested if or else). 1888 if (I + 2 != E && (I + 2)->First.is(tok::kw_else)) 1889 return; 1890 join(Line, *(++I)); 1891 } 1892 1893 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1894 std::vector<AnnotatedLine>::iterator E, 1895 unsigned Limit){ 1896 // First, check that the current line allows merging. This is the case if 1897 // we're not in a control flow statement and the last token is an opening 1898 // brace. 1899 AnnotatedLine &Line = *I; 1900 bool AllowedTokens = 1901 Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) && 1902 Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) && 1903 Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) && 1904 Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) && 1905 // This gets rid of all ObjC @ keywords and methods. 1906 Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) && 1907 Line.First.isNot(tok::plus); 1908 if (!AllowedTokens) 1909 return; 1910 1911 AnnotatedToken *Tok = &(I + 1)->First; 1912 if (Tok->Children.empty() && Tok->is(tok::r_brace) && 1913 !Tok->MustBreakBefore && Tok->TotalLength <= Limit) { 1914 Tok->SpaceRequiredBefore = false; 1915 join(Line, *(I + 1)); 1916 I += 1; 1917 } else { 1918 // Check that we still have three lines and they fit into the limit. 1919 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1920 !nextTwoLinesFitInto(I, Limit)) 1921 return; 1922 1923 // Second, check that the next line does not contain any braces - if it 1924 // does, readability declines when putting it into a single line. 1925 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1926 return; 1927 do { 1928 if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace)) 1929 return; 1930 Tok = Tok->Children.empty() ? NULL : &Tok->Children.back(); 1931 } while (Tok != NULL); 1932 1933 // Last, check that the third line contains a single closing brace. 1934 Tok = &(I + 2)->First; 1935 if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) || 1936 Tok->MustBreakBefore) 1937 return; 1938 1939 join(Line, *(I + 1)); 1940 join(Line, *(I + 2)); 1941 I += 2; 1942 } 1943 } 1944 1945 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1946 unsigned Limit) { 1947 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1948 Limit; 1949 } 1950 1951 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1952 A.Last->Children.push_back(B.First); 1953 while (!A.Last->Children.empty()) { 1954 A.Last->Children[0].Parent = A.Last; 1955 A.Last = &A.Last->Children[0]; 1956 } 1957 } 1958 1959 bool touchesRanges(const AnnotatedLine &TheLine) { 1960 const FormatToken *First = &TheLine.First.FormatTok; 1961 const FormatToken *Last = &TheLine.Last->FormatTok; 1962 CharSourceRange LineRange = CharSourceRange::getTokenRange( 1963 First->Tok.getLocation(), 1964 Last->Tok.getLocation()); 1965 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1966 if (!SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(), 1967 Ranges[i].getBegin()) && 1968 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1969 LineRange.getBegin())) 1970 return true; 1971 } 1972 return false; 1973 } 1974 1975 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1976 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1977 } 1978 1979 /// \brief Add a new line and the required indent before the first Token 1980 /// of the \c UnwrappedLine if there was no structural parsing error. 1981 /// Returns the indent level of the \c UnwrappedLine. 1982 unsigned formatFirstToken(const AnnotatedToken &RootToken, unsigned Level, 1983 bool InPPDirective, 1984 unsigned PreviousEndOfLineColumn) { 1985 const FormatToken &Tok = RootToken.FormatTok; 1986 if (!Tok.WhiteSpaceStart.isValid() || StructuralError) 1987 return SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1; 1988 1989 unsigned Newlines = std::min(Tok.NewlinesBefore, 1990 Style.MaxEmptyLinesToKeep + 1); 1991 if (Newlines == 0 && !Tok.IsFirst) 1992 Newlines = 1; 1993 unsigned Indent = Level * 2; 1994 1995 bool IsAccessModifier = false; 1996 if (RootToken.is(tok::kw_public) || RootToken.is(tok::kw_protected) || 1997 RootToken.is(tok::kw_private)) 1998 IsAccessModifier = true; 1999 else if (RootToken.is(tok::at) && !RootToken.Children.empty() && 2000 (RootToken.Children[0].isObjCAtKeyword(tok::objc_public) || 2001 RootToken.Children[0].isObjCAtKeyword(tok::objc_protected) || 2002 RootToken.Children[0].isObjCAtKeyword(tok::objc_package) || 2003 RootToken.Children[0].isObjCAtKeyword(tok::objc_private))) 2004 IsAccessModifier = true; 2005 2006 if (IsAccessModifier && 2007 static_cast<int>(Indent) + Style.AccessModifierOffset >= 0) 2008 Indent += Style.AccessModifierOffset; 2009 if (!InPPDirective || Tok.HasUnescapedNewline) { 2010 Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0, Style); 2011 } else { 2012 Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent, 2013 PreviousEndOfLineColumn, Style); 2014 } 2015 return Indent; 2016 } 2017 2018 DiagnosticsEngine &Diag; 2019 FormatStyle Style; 2020 Lexer &Lex; 2021 SourceManager &SourceMgr; 2022 WhitespaceManager Whitespaces; 2023 std::vector<CharSourceRange> Ranges; 2024 std::vector<AnnotatedLine> AnnotatedLines; 2025 bool StructuralError; 2026 }; 2027 2028 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 2029 SourceManager &SourceMgr, 2030 std::vector<CharSourceRange> Ranges, 2031 DiagnosticConsumer *DiagClient) { 2032 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions(); 2033 OwningPtr<DiagnosticConsumer> DiagPrinter; 2034 if (DiagClient == 0) { 2035 DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts)); 2036 DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP()); 2037 DiagClient = DiagPrinter.get(); 2038 } 2039 DiagnosticsEngine Diagnostics( 2040 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, 2041 DiagClient, false); 2042 Diagnostics.setSourceManager(&SourceMgr); 2043 Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges); 2044 return formatter.format(); 2045 } 2046 2047 LangOptions getFormattingLangOpts() { 2048 LangOptions LangOpts; 2049 LangOpts.CPlusPlus = 1; 2050 LangOpts.CPlusPlus11 = 1; 2051 LangOpts.Bool = 1; 2052 LangOpts.ObjC1 = 1; 2053 LangOpts.ObjC2 = 1; 2054 return LangOpts; 2055 } 2056 2057 } // namespace format 2058 } // namespace clang 2059