1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15 /// where it can be used to format real code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "clang/Format/Format.h" 20 #include "UnwrappedLineParser.h" 21 #include "clang/Basic/OperatorPrecedence.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Lex/Lexer.h" 24 #include <string> 25 26 namespace clang { 27 namespace format { 28 29 // FIXME: Move somewhere sane. 30 struct TokenAnnotation { 31 enum TokenType { 32 TT_Unknown, 33 TT_TemplateOpener, 34 TT_TemplateCloser, 35 TT_BinaryOperator, 36 TT_UnaryOperator, 37 TT_TrailingUnaryOperator, 38 TT_OverloadedOperator, 39 TT_PointerOrReference, 40 TT_ConditionalExpr, 41 TT_CtorInitializerColon, 42 TT_LineComment, 43 TT_BlockComment, 44 TT_DirectorySeparator, 45 TT_ObjCMethodSpecifier 46 }; 47 48 TokenType Type; 49 50 bool SpaceRequiredBefore; 51 bool CanBreakBefore; 52 bool MustBreakBefore; 53 54 bool ClosesTemplateDeclaration; 55 }; 56 57 static prec::Level getPrecedence(const FormatToken &Tok) { 58 return getBinOpPrecedence(Tok.Tok.getKind(), true, true); 59 } 60 61 using llvm::MutableArrayRef; 62 63 FormatStyle getLLVMStyle() { 64 FormatStyle LLVMStyle; 65 LLVMStyle.ColumnLimit = 80; 66 LLVMStyle.MaxEmptyLinesToKeep = 1; 67 LLVMStyle.PointerAndReferenceBindToType = false; 68 LLVMStyle.AccessModifierOffset = -2; 69 LLVMStyle.SplitTemplateClosingGreater = true; 70 LLVMStyle.IndentCaseLabels = false; 71 return LLVMStyle; 72 } 73 74 FormatStyle getGoogleStyle() { 75 FormatStyle GoogleStyle; 76 GoogleStyle.ColumnLimit = 80; 77 GoogleStyle.MaxEmptyLinesToKeep = 1; 78 GoogleStyle.PointerAndReferenceBindToType = true; 79 GoogleStyle.AccessModifierOffset = -1; 80 GoogleStyle.SplitTemplateClosingGreater = false; 81 GoogleStyle.IndentCaseLabels = true; 82 return GoogleStyle; 83 } 84 85 struct OptimizationParameters { 86 unsigned PenaltyIndentLevel; 87 unsigned PenaltyLevelDecrease; 88 }; 89 90 class UnwrappedLineFormatter { 91 public: 92 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 93 const UnwrappedLine &Line, 94 const std::vector<TokenAnnotation> &Annotations, 95 tooling::Replacements &Replaces, bool StructuralError) 96 : Style(Style), SourceMgr(SourceMgr), Line(Line), 97 Annotations(Annotations), Replaces(Replaces), 98 StructuralError(StructuralError) { 99 Parameters.PenaltyIndentLevel = 15; 100 Parameters.PenaltyLevelDecrease = 10; 101 } 102 103 void format() { 104 // Format first token and initialize indent. 105 unsigned Indent = formatFirstToken(); 106 107 // Initialize state dependent on indent. 108 IndentState State; 109 State.Column = Indent; 110 State.ConsumedTokens = 0; 111 State.Indent.push_back(Indent + 4); 112 State.LastSpace.push_back(Indent); 113 State.FirstLessLess.push_back(0); 114 State.ForLoopVariablePos = 0; 115 State.LineContainsContinuedForLoopSection = false; 116 State.StartOfLineLevel = 1; 117 118 // The first token has already been indented and thus consumed. 119 moveStateToNextToken(State); 120 121 // Check whether the UnwrappedLine can be put onto a single line. If so, 122 // this is bound to be the optimal solution (by definition) and we don't 123 // need to analyze the entire solution space. 124 unsigned Columns = State.Column; 125 bool FitsOnALine = true; 126 for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) { 127 Columns += (Annotations[i].SpaceRequiredBefore ? 1 : 0) + 128 Line.Tokens[i].Tok.getLength(); 129 // A special case for the colon of a constructor initializer as this only 130 // needs to be put on a new line if the line needs to be split. 131 if (Columns > Style.ColumnLimit || 132 (Annotations[i].MustBreakBefore && 133 Annotations[i].Type != TokenAnnotation::TT_CtorInitializerColon)) { 134 FitsOnALine = false; 135 break; 136 } 137 } 138 139 // Start iterating at 1 as we have correctly formatted of Token #0 above. 140 for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) { 141 if (FitsOnALine) { 142 addTokenToState(false, false, State); 143 } else { 144 unsigned NoBreak = calcPenalty(State, false, UINT_MAX); 145 unsigned Break = calcPenalty(State, true, NoBreak); 146 addTokenToState(Break < NoBreak, false, State); 147 } 148 } 149 } 150 151 private: 152 /// \brief The current state when indenting a unwrapped line. 153 /// 154 /// As the indenting tries different combinations this is copied by value. 155 struct IndentState { 156 /// \brief The number of used columns in the current line. 157 unsigned Column; 158 159 /// \brief The number of tokens already consumed. 160 unsigned ConsumedTokens; 161 162 /// \brief The parenthesis level of the first token on the current line. 163 unsigned StartOfLineLevel; 164 165 /// \brief The position to which a specific parenthesis level needs to be 166 /// indented. 167 std::vector<unsigned> Indent; 168 169 /// \brief The position of the last space on each level. 170 /// 171 /// Used e.g. to break like: 172 /// functionCall(Parameter, otherCall( 173 /// OtherParameter)); 174 std::vector<unsigned> LastSpace; 175 176 /// \brief The position the first "<<" operator encountered on each level. 177 /// 178 /// Used to align "<<" operators. 0 if no such operator has been encountered 179 /// on a level. 180 std::vector<unsigned> FirstLessLess; 181 182 /// \brief The column of the first variable in a for-loop declaration. 183 /// 184 /// Used to align the second variable if necessary. 185 unsigned ForLoopVariablePos; 186 187 /// \brief \c true if this line contains a continued for-loop section. 188 bool LineContainsContinuedForLoopSection; 189 190 /// \brief Comparison operator to be able to used \c IndentState in \c map. 191 bool operator<(const IndentState &Other) const { 192 if (Other.ConsumedTokens != ConsumedTokens) 193 return Other.ConsumedTokens > ConsumedTokens; 194 if (Other.Column != Column) 195 return Other.Column > Column; 196 if (Other.StartOfLineLevel != StartOfLineLevel) 197 return Other.StartOfLineLevel > StartOfLineLevel; 198 if (Other.Indent.size() != Indent.size()) 199 return Other.Indent.size() > Indent.size(); 200 for (int i = 0, e = Indent.size(); i != e; ++i) { 201 if (Other.Indent[i] != Indent[i]) 202 return Other.Indent[i] > Indent[i]; 203 } 204 if (Other.LastSpace.size() != LastSpace.size()) 205 return Other.LastSpace.size() > LastSpace.size(); 206 for (int i = 0, e = LastSpace.size(); i != e; ++i) { 207 if (Other.LastSpace[i] != LastSpace[i]) 208 return Other.LastSpace[i] > LastSpace[i]; 209 } 210 if (Other.FirstLessLess.size() != FirstLessLess.size()) 211 return Other.FirstLessLess.size() > FirstLessLess.size(); 212 for (int i = 0, e = FirstLessLess.size(); i != e; ++i) { 213 if (Other.FirstLessLess[i] != FirstLessLess[i]) 214 return Other.FirstLessLess[i] > FirstLessLess[i]; 215 } 216 if (Other.ForLoopVariablePos != ForLoopVariablePos) 217 return Other.ForLoopVariablePos < ForLoopVariablePos; 218 if (Other.LineContainsContinuedForLoopSection != 219 LineContainsContinuedForLoopSection) 220 return LineContainsContinuedForLoopSection; 221 return false; 222 } 223 }; 224 225 /// \brief Appends the next token to \p State and updates information 226 /// necessary for indentation. 227 /// 228 /// Puts the token on the current line if \p Newline is \c true and adds a 229 /// line break and necessary indentation otherwise. 230 /// 231 /// If \p DryRun is \c false, also creates and stores the required 232 /// \c Replacement. 233 void addTokenToState(bool Newline, bool DryRun, IndentState &State) { 234 unsigned Index = State.ConsumedTokens; 235 const FormatToken &Current = Line.Tokens[Index]; 236 const FormatToken &Previous = Line.Tokens[Index - 1]; 237 unsigned ParenLevel = State.Indent.size() - 1; 238 239 if (Newline) { 240 unsigned WhitespaceStartColumn = State.Column; 241 if (Current.Tok.is(tok::string_literal) && 242 Previous.Tok.is(tok::string_literal)) { 243 State.Column = State.Column - Previous.Tok.getLength(); 244 } else if (Current.Tok.is(tok::lessless) && 245 State.FirstLessLess[ParenLevel] != 0) { 246 State.Column = State.FirstLessLess[ParenLevel]; 247 } else if (ParenLevel != 0 && 248 (Previous.Tok.is(tok::equal) || Current.Tok.is(tok::arrow) || 249 Current.Tok.is(tok::period))) { 250 // Indent and extra 4 spaces after '=' as it continues an expression. 251 // Don't do that on the top level, as we already indent 4 there. 252 State.Column = State.Indent[ParenLevel] + 4; 253 } else if ( 254 Line.Tokens[0].Tok.is(tok::kw_for) && Previous.Tok.is(tok::comma)) { 255 State.Column = State.ForLoopVariablePos; 256 } else if (Annotations[Index - 1].ClosesTemplateDeclaration) { 257 State.Column = State.Indent[ParenLevel] - 4; 258 } else { 259 State.Column = State.Indent[ParenLevel]; 260 } 261 262 State.StartOfLineLevel = ParenLevel + 1; 263 264 if (Line.Tokens[0].Tok.is(tok::kw_for)) 265 State.LineContainsContinuedForLoopSection = 266 Previous.Tok.isNot(tok::semi); 267 268 if (!DryRun) { 269 if (!Line.InPPDirective) 270 replaceWhitespace(Current, 1, State.Column); 271 else 272 replacePPWhitespace(Current, 1, State.Column, WhitespaceStartColumn); 273 } 274 275 State.LastSpace[ParenLevel] = State.Indent[ParenLevel]; 276 if (Current.Tok.is(tok::colon) && 277 Annotations[Index].Type != TokenAnnotation::TT_ConditionalExpr && 278 Annotations[0].Type != TokenAnnotation::TT_ObjCMethodSpecifier) 279 State.Indent[ParenLevel] += 2; 280 } else { 281 if (Current.Tok.is(tok::equal) && Line.Tokens[0].Tok.is(tok::kw_for)) 282 State.ForLoopVariablePos = State.Column - Previous.Tok.getLength(); 283 284 unsigned Spaces = Annotations[Index].SpaceRequiredBefore ? 1 : 0; 285 if (Annotations[Index].Type == TokenAnnotation::TT_LineComment) 286 Spaces = 2; 287 288 if (!DryRun) 289 replaceWhitespace(Current, 0, Spaces); 290 291 // FIXME: Look into using this alignment at other ParenLevels. 292 if (ParenLevel == 0 && (getPrecedence(Previous) == prec::Assignment || 293 Previous.Tok.is(tok::kw_return))) 294 State.Indent[ParenLevel] = State.Column + Spaces; 295 if (Previous.Tok.is(tok::l_paren) || 296 Annotations[Index - 1].Type == TokenAnnotation::TT_TemplateOpener) 297 State.Indent[ParenLevel] = State.Column; 298 299 // Top-level spaces are exempt as that mostly leads to better results. 300 State.Column += Spaces; 301 if (Spaces > 0 && ParenLevel != 0) 302 State.LastSpace[ParenLevel] = State.Column; 303 } 304 moveStateToNextToken(State); 305 } 306 307 /// \brief Mark the next token as consumed in \p State and modify its stacks 308 /// accordingly. 309 void moveStateToNextToken(IndentState &State) { 310 unsigned Index = State.ConsumedTokens; 311 const FormatToken &Current = Line.Tokens[Index]; 312 unsigned ParenLevel = State.Indent.size() - 1; 313 314 if (Current.Tok.is(tok::lessless) && State.FirstLessLess[ParenLevel] == 0) 315 State.FirstLessLess[ParenLevel] = State.Column; 316 317 State.Column += Current.Tok.getLength(); 318 319 // If we encounter an opening (, [, { or <, we add a level to our stacks to 320 // prepare for the following tokens. 321 if (Current.Tok.is(tok::l_paren) || Current.Tok.is(tok::l_square) || 322 Current.Tok.is(tok::l_brace) || 323 Annotations[Index].Type == TokenAnnotation::TT_TemplateOpener) { 324 State.Indent.push_back(4 + State.LastSpace.back()); 325 State.LastSpace.push_back(State.LastSpace.back()); 326 State.FirstLessLess.push_back(0); 327 } 328 329 // If we encounter a closing ), ], } or >, we can remove a level from our 330 // stacks. 331 if (Current.Tok.is(tok::r_paren) || Current.Tok.is(tok::r_square) || 332 (Current.Tok.is(tok::r_brace) && State.ConsumedTokens > 0) || 333 Annotations[Index].Type == TokenAnnotation::TT_TemplateCloser) { 334 State.Indent.pop_back(); 335 State.LastSpace.pop_back(); 336 State.FirstLessLess.pop_back(); 337 } 338 339 ++State.ConsumedTokens; 340 } 341 342 /// \brief Calculate the panelty for splitting after the token at \p Index. 343 unsigned splitPenalty(unsigned Index) { 344 assert(Index < Line.Tokens.size() && 345 "Tried to calculate penalty for splitting after the last token"); 346 const FormatToken &Left = Line.Tokens[Index]; 347 const FormatToken &Right = Line.Tokens[Index + 1]; 348 349 // In for-loops, prefer breaking at ',' and ';'. 350 if (Line.Tokens[0].Tok.is(tok::kw_for) && 351 (Left.Tok.isNot(tok::comma) && Left.Tok.isNot(tok::semi))) 352 return 20; 353 354 if (Left.Tok.is(tok::semi) || Left.Tok.is(tok::comma) || 355 Annotations[Index].ClosesTemplateDeclaration) 356 return 0; 357 if (Left.Tok.is(tok::l_paren)) 358 return 20; 359 360 prec::Level Level = getPrecedence(Line.Tokens[Index]); 361 if (Level != prec::Unknown) 362 return Level; 363 364 if (Right.Tok.is(tok::arrow) || Right.Tok.is(tok::period)) 365 return 50; 366 367 return 3; 368 } 369 370 /// \brief Calculate the number of lines needed to format the remaining part 371 /// of the unwrapped line. 372 /// 373 /// Assumes the formatting so far has led to 374 /// the \c IndentState \p State. If \p NewLine is set, a new line will be 375 /// added after the previous token. 376 /// 377 /// \param StopAt is used for optimization. If we can determine that we'll 378 /// definitely need at least \p StopAt additional lines, we already know of a 379 /// better solution. 380 unsigned calcPenalty(IndentState State, bool NewLine, unsigned StopAt) { 381 // We are at the end of the unwrapped line, so we don't need any more lines. 382 if (State.ConsumedTokens >= Line.Tokens.size()) 383 return 0; 384 385 if (!NewLine && Annotations[State.ConsumedTokens].MustBreakBefore) 386 return UINT_MAX; 387 if (NewLine && !Annotations[State.ConsumedTokens].CanBreakBefore) 388 return UINT_MAX; 389 if (!NewLine && Line.Tokens[State.ConsumedTokens - 1].Tok.is(tok::semi) && 390 State.LineContainsContinuedForLoopSection) 391 return UINT_MAX; 392 393 unsigned CurrentPenalty = 0; 394 if (NewLine) { 395 CurrentPenalty += Parameters.PenaltyIndentLevel * State.Indent.size() + 396 splitPenalty(State.ConsumedTokens - 1); 397 } else { 398 if (State.Indent.size() < State.StartOfLineLevel) 399 CurrentPenalty += Parameters.PenaltyLevelDecrease * 400 (State.StartOfLineLevel - State.Indent.size()); 401 } 402 403 addTokenToState(NewLine, true, State); 404 405 // Exceeding column limit is bad. 406 if (State.Column > Style.ColumnLimit - (Line.InPPDirective ? 1 : 0)) 407 return UINT_MAX; 408 409 if (StopAt <= CurrentPenalty) 410 return UINT_MAX; 411 StopAt -= CurrentPenalty; 412 413 StateMap::iterator I = Memory.find(State); 414 if (I != Memory.end()) { 415 // If this state has already been examined, we can safely return the 416 // previous result if we 417 // - have not hit the optimatization (and thus returned UINT_MAX) OR 418 // - are now computing for a smaller or equal StopAt. 419 unsigned SavedResult = I->second.first; 420 unsigned SavedStopAt = I->second.second; 421 if (SavedResult != UINT_MAX) 422 return SavedResult + CurrentPenalty; 423 else if (StopAt <= SavedStopAt) 424 return UINT_MAX; 425 } 426 427 unsigned NoBreak = calcPenalty(State, false, StopAt); 428 unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak)); 429 unsigned Result = std::min(NoBreak, WithBreak); 430 431 // We have to store 'Result' without adding 'CurrentPenalty' as the latter 432 // can depend on 'NewLine'. 433 Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt); 434 435 return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty; 436 } 437 438 /// \brief Replaces the whitespace in front of \p Tok. Only call once for 439 /// each \c FormatToken. 440 void replaceWhitespace(const FormatToken &Tok, unsigned NewLines, 441 unsigned Spaces) { 442 Replaces.insert(tooling::Replacement( 443 SourceMgr, Tok.WhiteSpaceStart, Tok.WhiteSpaceLength, 444 std::string(NewLines, '\n') + std::string(Spaces, ' '))); 445 } 446 447 /// \brief Like \c replaceWhitespace, but additionally adds right-aligned 448 /// backslashes to escape newlines inside a preprocessor directive. 449 /// 450 /// This function and \c replaceWhitespace have the same behavior if 451 /// \c Newlines == 0. 452 void replacePPWhitespace(const FormatToken &Tok, unsigned NewLines, 453 unsigned Spaces, unsigned WhitespaceStartColumn) { 454 std::string NewLineText; 455 if (NewLines > 0) { 456 unsigned Offset = 457 std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn); 458 for (unsigned i = 0; i < NewLines; ++i) { 459 NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' '); 460 NewLineText += "\\\n"; 461 Offset = 0; 462 } 463 } 464 Replaces.insert(tooling::Replacement( 465 SourceMgr, Tok.WhiteSpaceStart, Tok.WhiteSpaceLength, 466 NewLineText + std::string(Spaces, ' '))); 467 } 468 469 /// \brief Add a new line and the required indent before the first Token 470 /// of the \c UnwrappedLine if there was no structural parsing error. 471 /// Returns the indent level of the \c UnwrappedLine. 472 unsigned formatFirstToken() { 473 const FormatToken &Token = Line.Tokens[0]; 474 if (!Token.WhiteSpaceStart.isValid() || StructuralError) 475 return SourceMgr.getSpellingColumnNumber(Token.Tok.getLocation()) - 1; 476 477 unsigned Newlines = 478 std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 479 unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart); 480 if (Newlines == 0 && Offset != 0) 481 Newlines = 1; 482 unsigned Indent = Line.Level * 2; 483 if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) || 484 Token.Tok.is(tok::kw_private)) && 485 static_cast<int>(Indent) + Style.AccessModifierOffset >= 0) 486 Indent += Style.AccessModifierOffset; 487 if (!Line.InPPDirective || Token.HasUnescapedNewline) 488 replaceWhitespace(Token, Newlines, Indent); 489 else 490 // FIXME: Figure out how to get the previous end-of-line column. 491 replacePPWhitespace(Token, Newlines, Indent, 0); 492 return Indent; 493 } 494 495 FormatStyle Style; 496 SourceManager &SourceMgr; 497 const UnwrappedLine &Line; 498 const std::vector<TokenAnnotation> &Annotations; 499 tooling::Replacements &Replaces; 500 bool StructuralError; 501 502 // A map from an indent state to a pair (Result, Used-StopAt). 503 typedef std::map<IndentState, std::pair<unsigned, unsigned> > StateMap; 504 StateMap Memory; 505 506 OptimizationParameters Parameters; 507 }; 508 509 /// \brief Determines extra information about the tokens comprising an 510 /// \c UnwrappedLine. 511 class TokenAnnotator { 512 public: 513 TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style, 514 SourceManager &SourceMgr) 515 : Line(Line), Style(Style), SourceMgr(SourceMgr) { 516 } 517 518 /// \brief A parser that gathers additional information about tokens. 519 /// 520 /// The \c TokenAnnotator tries to matches parenthesis and square brakets and 521 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 522 /// into template parameter lists. 523 class AnnotatingParser { 524 public: 525 AnnotatingParser(const SmallVector<FormatToken, 16> &Tokens, 526 std::vector<TokenAnnotation> &Annotations) 527 : Tokens(Tokens), Annotations(Annotations), Index(0) { 528 } 529 530 bool parseAngle() { 531 while (Index < Tokens.size()) { 532 if (Tokens[Index].Tok.is(tok::greater)) { 533 Annotations[Index].Type = TokenAnnotation::TT_TemplateCloser; 534 next(); 535 return true; 536 } 537 if (Tokens[Index].Tok.is(tok::r_paren) || 538 Tokens[Index].Tok.is(tok::r_square) || 539 Tokens[Index].Tok.is(tok::r_brace)) 540 return false; 541 if (Tokens[Index].Tok.is(tok::pipepipe) || 542 Tokens[Index].Tok.is(tok::ampamp) || 543 Tokens[Index].Tok.is(tok::question) || 544 Tokens[Index].Tok.is(tok::colon)) 545 return false; 546 if (!consumeToken()) 547 return false; 548 } 549 return false; 550 } 551 552 bool parseParens() { 553 while (Index < Tokens.size()) { 554 if (Tokens[Index].Tok.is(tok::r_paren)) { 555 next(); 556 return true; 557 } 558 if (Tokens[Index].Tok.is(tok::r_square) || 559 Tokens[Index].Tok.is(tok::r_brace)) 560 return false; 561 if (!consumeToken()) 562 return false; 563 } 564 return false; 565 } 566 567 bool parseSquare() { 568 while (Index < Tokens.size()) { 569 if (Tokens[Index].Tok.is(tok::r_square)) { 570 next(); 571 return true; 572 } 573 if (Tokens[Index].Tok.is(tok::r_paren) || 574 Tokens[Index].Tok.is(tok::r_brace)) 575 return false; 576 if (!consumeToken()) 577 return false; 578 } 579 return false; 580 } 581 582 bool parseConditional() { 583 while (Index < Tokens.size()) { 584 if (Tokens[Index].Tok.is(tok::colon)) { 585 Annotations[Index].Type = TokenAnnotation::TT_ConditionalExpr; 586 next(); 587 return true; 588 } 589 if (!consumeToken()) 590 return false; 591 } 592 return false; 593 } 594 595 bool parseTemplateDeclaration() { 596 if (Index < Tokens.size() && Tokens[Index].Tok.is(tok::less)) { 597 Annotations[Index].Type = TokenAnnotation::TT_TemplateOpener; 598 next(); 599 if (!parseAngle()) 600 return false; 601 Annotations[Index - 1].ClosesTemplateDeclaration = true; 602 parseLine(); 603 return true; 604 } 605 return false; 606 } 607 608 bool consumeToken() { 609 unsigned CurrentIndex = Index; 610 next(); 611 switch (Tokens[CurrentIndex].Tok.getKind()) { 612 case tok::l_paren: 613 if (!parseParens()) 614 return false; 615 if (Index < Tokens.size() && Tokens[Index].Tok.is(tok::colon)) { 616 Annotations[Index].Type = TokenAnnotation::TT_CtorInitializerColon; 617 next(); 618 } 619 break; 620 case tok::l_square: 621 if (!parseSquare()) 622 return false; 623 break; 624 case tok::less: 625 if (parseAngle()) 626 Annotations[CurrentIndex].Type = TokenAnnotation::TT_TemplateOpener; 627 else { 628 Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator; 629 Index = CurrentIndex + 1; 630 } 631 break; 632 case tok::r_paren: 633 case tok::r_square: 634 return false; 635 case tok::greater: 636 Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator; 637 break; 638 case tok::kw_operator: 639 if (Tokens[Index].Tok.is(tok::l_paren)) { 640 Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator; 641 next(); 642 if (Index < Tokens.size() && Tokens[Index].Tok.is(tok::r_paren)) { 643 Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator; 644 next(); 645 } 646 } else { 647 while (Index < Tokens.size() && !Tokens[Index].Tok.is(tok::l_paren)) { 648 Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator; 649 next(); 650 } 651 } 652 break; 653 case tok::question: 654 parseConditional(); 655 break; 656 case tok::kw_template: 657 parseTemplateDeclaration(); 658 break; 659 default: 660 break; 661 } 662 return true; 663 } 664 665 void parseIncludeDirective() { 666 while (Index < Tokens.size()) { 667 if (Tokens[Index].Tok.is(tok::slash)) 668 Annotations[Index].Type = TokenAnnotation::TT_DirectorySeparator; 669 else if (Tokens[Index].Tok.is(tok::less)) 670 Annotations[Index].Type = TokenAnnotation::TT_TemplateOpener; 671 else if (Tokens[Index].Tok.is(tok::greater)) 672 Annotations[Index].Type = TokenAnnotation::TT_TemplateCloser; 673 next(); 674 } 675 } 676 677 void parsePreprocessorDirective() { 678 next(); 679 if (Index >= Tokens.size()) 680 return; 681 // It is the responsibility of the UnwrappedLineParser to make sure 682 // this sequence is not produced inside an unwrapped line. 683 assert(Tokens[Index].Tok.getIdentifierInfo() != NULL); 684 switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) { 685 case tok::pp_include: 686 case tok::pp_import: 687 parseIncludeDirective(); 688 break; 689 default: 690 break; 691 } 692 } 693 694 bool parseLine() { 695 if (Tokens[Index].Tok.is(tok::hash)) { 696 parsePreprocessorDirective(); 697 return true; 698 } 699 while (Index < Tokens.size()) { 700 if (!consumeToken()) 701 return false; 702 } 703 return true; 704 } 705 706 void next() { 707 ++Index; 708 } 709 710 private: 711 const SmallVector<FormatToken, 16> &Tokens; 712 std::vector<TokenAnnotation> &Annotations; 713 unsigned Index; 714 }; 715 716 bool annotate() { 717 Annotations.clear(); 718 for (int i = 0, e = Line.Tokens.size(); i != e; ++i) { 719 Annotations.push_back(TokenAnnotation()); 720 } 721 722 AnnotatingParser Parser(Line.Tokens, Annotations); 723 if (!Parser.parseLine()) 724 return false; 725 726 determineTokenTypes(); 727 bool IsObjCMethodDecl = 728 (Line.Tokens.size() > 0 && 729 (Annotations[0].Type == TokenAnnotation::TT_ObjCMethodSpecifier)); 730 for (int i = 1, e = Line.Tokens.size(); i != e; ++i) { 731 TokenAnnotation &Annotation = Annotations[i]; 732 733 Annotation.CanBreakBefore = canBreakBefore(i); 734 735 if (Annotation.Type == TokenAnnotation::TT_CtorInitializerColon) { 736 Annotation.MustBreakBefore = true; 737 Annotation.SpaceRequiredBefore = true; 738 } else if (Annotation.Type == TokenAnnotation::TT_OverloadedOperator) { 739 Annotation.SpaceRequiredBefore = 740 Line.Tokens[i].Tok.is(tok::identifier) || 741 Line.Tokens[i].Tok.is(tok::kw_new) || 742 Line.Tokens[i].Tok.is(tok::kw_delete); 743 } else if ( 744 Annotations[i - 1].Type == TokenAnnotation::TT_OverloadedOperator) { 745 Annotation.SpaceRequiredBefore = false; 746 } else if (IsObjCMethodDecl && Line.Tokens[i].Tok.is(tok::identifier) && 747 (i != e - 1) && Line.Tokens[i + 1].Tok.is(tok::colon) && 748 Line.Tokens[i - 1].Tok.is(tok::identifier)) { 749 Annotation.CanBreakBefore = true; 750 Annotation.SpaceRequiredBefore = true; 751 } else if (IsObjCMethodDecl && Line.Tokens[i].Tok.is(tok::identifier) && 752 Line.Tokens[i - 1].Tok.is(tok::l_paren) && 753 Line.Tokens[i - 2].Tok.is(tok::colon)) { 754 // Don't break this identifier as ':' or identifier 755 // before it will break. 756 Annotation.CanBreakBefore = false; 757 } else if (Line.Tokens[i].Tok.is(tok::at) && 758 Line.Tokens[i - 2].Tok.is(tok::at)) { 759 // Don't put two objc's '@' on the same line. This could happen, 760 // as in, @optional @property ... 761 Annotation.MustBreakBefore = true; 762 } else if (Line.Tokens[i].Tok.is(tok::colon)) { 763 Annotation.SpaceRequiredBefore = 764 Line.Tokens[0].Tok.isNot(tok::kw_case) && !IsObjCMethodDecl && 765 (i != e - 1); 766 // Don't break at ':' if identifier before it can beak. 767 if (IsObjCMethodDecl && Line.Tokens[i - 1].Tok.is(tok::identifier) && 768 Annotations[i - 1].CanBreakBefore) 769 Annotation.CanBreakBefore = false; 770 } else if ( 771 Annotations[i - 1].Type == TokenAnnotation::TT_ObjCMethodSpecifier) { 772 Annotation.SpaceRequiredBefore = true; 773 } else if (Annotations[i - 1].Type == TokenAnnotation::TT_UnaryOperator) { 774 Annotation.SpaceRequiredBefore = false; 775 } else if (Annotation.Type == TokenAnnotation::TT_UnaryOperator) { 776 Annotation.SpaceRequiredBefore = 777 Line.Tokens[i - 1].Tok.isNot(tok::l_paren) && 778 Line.Tokens[i - 1].Tok.isNot(tok::l_square); 779 } else if (Line.Tokens[i - 1].Tok.is(tok::greater) && 780 Line.Tokens[i].Tok.is(tok::greater)) { 781 if (Annotation.Type == TokenAnnotation::TT_TemplateCloser && 782 Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser) 783 Annotation.SpaceRequiredBefore = Style.SplitTemplateClosingGreater; 784 else 785 Annotation.SpaceRequiredBefore = false; 786 } else if ( 787 Annotation.Type == TokenAnnotation::TT_DirectorySeparator || 788 Annotations[i - 1].Type == TokenAnnotation::TT_DirectorySeparator) { 789 Annotation.SpaceRequiredBefore = false; 790 } else if ( 791 Annotation.Type == TokenAnnotation::TT_BinaryOperator || 792 Annotations[i - 1].Type == TokenAnnotation::TT_BinaryOperator) { 793 Annotation.SpaceRequiredBefore = true; 794 } else if ( 795 Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser && 796 Line.Tokens[i].Tok.is(tok::l_paren)) { 797 Annotation.SpaceRequiredBefore = false; 798 } else if (Line.Tokens[i].Tok.is(tok::less) && 799 Line.Tokens[0].Tok.is(tok::hash)) { 800 Annotation.SpaceRequiredBefore = true; 801 } else if (IsObjCMethodDecl && Line.Tokens[i - 1].Tok.is(tok::r_paren) && 802 Line.Tokens[i].Tok.is(tok::identifier)) { 803 // Don't space between ')' and <id> 804 Annotation.SpaceRequiredBefore = false; 805 } else if (IsObjCMethodDecl && Line.Tokens[i - 1].Tok.is(tok::colon) && 806 Line.Tokens[i].Tok.is(tok::l_paren)) { 807 // Don't space between ':' and '(' 808 Annotation.SpaceRequiredBefore = false; 809 } else if (Annotation.Type == TokenAnnotation::TT_TrailingUnaryOperator) { 810 Annotation.SpaceRequiredBefore = false; 811 } else { 812 Annotation.SpaceRequiredBefore = 813 spaceRequiredBetween(Line.Tokens[i - 1].Tok, Line.Tokens[i].Tok); 814 } 815 816 if (Annotations[i - 1].Type == TokenAnnotation::TT_LineComment || 817 (Line.Tokens[i].Tok.is(tok::string_literal) && 818 Line.Tokens[i - 1].Tok.is(tok::string_literal))) { 819 Annotation.MustBreakBefore = true; 820 } 821 822 if (Annotation.MustBreakBefore) 823 Annotation.CanBreakBefore = true; 824 } 825 return true; 826 } 827 828 const std::vector<TokenAnnotation> &getAnnotations() { 829 return Annotations; 830 } 831 832 private: 833 void determineTokenTypes() { 834 bool IsRHS = false; 835 for (int i = 0, e = Line.Tokens.size(); i != e; ++i) { 836 TokenAnnotation &Annotation = Annotations[i]; 837 const FormatToken &Tok = Line.Tokens[i]; 838 839 if (getPrecedence(Tok) == prec::Assignment) 840 IsRHS = true; 841 else if (Tok.Tok.is(tok::kw_return)) 842 IsRHS = true; 843 844 if (Annotation.Type != TokenAnnotation::TT_Unknown) 845 continue; 846 847 if (Tok.Tok.is(tok::star) || Tok.Tok.is(tok::amp)) { 848 Annotation.Type = determineStarAmpUsage(i, IsRHS); 849 } else if (Tok.Tok.is(tok::minus) || Tok.Tok.is(tok::plus)) { 850 Annotation.Type = determinePlusMinusUsage(i); 851 } else if (Tok.Tok.is(tok::minusminus) || Tok.Tok.is(tok::plusplus)) { 852 Annotation.Type = determineIncrementUsage(i); 853 } else if (Tok.Tok.is(tok::exclaim)) { 854 Annotation.Type = TokenAnnotation::TT_UnaryOperator; 855 } else if (isBinaryOperator(Line.Tokens[i])) { 856 Annotation.Type = TokenAnnotation::TT_BinaryOperator; 857 } else if (Tok.Tok.is(tok::comment)) { 858 StringRef Data(SourceMgr.getCharacterData(Tok.Tok.getLocation()), 859 Tok.Tok.getLength()); 860 if (Data.startswith("//")) 861 Annotation.Type = TokenAnnotation::TT_LineComment; 862 else 863 Annotation.Type = TokenAnnotation::TT_BlockComment; 864 } 865 } 866 } 867 868 bool isBinaryOperator(const FormatToken &Tok) { 869 // Comma is a binary operator, but does not behave as such wrt. formatting. 870 return getPrecedence(Tok) > prec::Comma; 871 } 872 873 TokenAnnotation::TokenType determineStarAmpUsage(unsigned Index, bool IsRHS) { 874 if (Index == 0) 875 return TokenAnnotation::TT_UnaryOperator; 876 if (Index == Annotations.size()) 877 return TokenAnnotation::TT_Unknown; 878 const FormatToken &PrevToken = Line.Tokens[Index - 1]; 879 const FormatToken &NextToken = Line.Tokens[Index + 1]; 880 881 if (PrevToken.Tok.is(tok::l_paren) || PrevToken.Tok.is(tok::comma) || 882 PrevToken.Tok.is(tok::kw_return) || PrevToken.Tok.is(tok::colon) || 883 Annotations[Index - 1].Type == TokenAnnotation::TT_BinaryOperator) 884 return TokenAnnotation::TT_UnaryOperator; 885 886 if (PrevToken.Tok.isLiteral() || NextToken.Tok.isLiteral() || 887 NextToken.Tok.is(tok::plus) || NextToken.Tok.is(tok::minus) || 888 NextToken.Tok.is(tok::plusplus) || NextToken.Tok.is(tok::minusminus) || 889 NextToken.Tok.is(tok::tilde) || NextToken.Tok.is(tok::exclaim) || 890 NextToken.Tok.is(tok::kw_alignof) || NextToken.Tok.is(tok::kw_sizeof)) 891 return TokenAnnotation::TT_BinaryOperator; 892 893 if (NextToken.Tok.is(tok::comma) || NextToken.Tok.is(tok::r_paren) || 894 NextToken.Tok.is(tok::greater)) 895 return TokenAnnotation::TT_PointerOrReference; 896 897 // It is very unlikely that we are going to find a pointer or reference type 898 // definition on the RHS of an assignment. 899 if (IsRHS) 900 return TokenAnnotation::TT_BinaryOperator; 901 902 return TokenAnnotation::TT_PointerOrReference; 903 } 904 905 TokenAnnotation::TokenType determinePlusMinusUsage(unsigned Index) { 906 // At the start of the line, +/- specific ObjectiveC method declarations. 907 if (Index == 0) 908 return TokenAnnotation::TT_ObjCMethodSpecifier; 909 910 // Use heuristics to recognize unary operators. 911 const Token &PreviousTok = Line.Tokens[Index - 1].Tok; 912 if (PreviousTok.is(tok::equal) || PreviousTok.is(tok::l_paren) || 913 PreviousTok.is(tok::comma) || PreviousTok.is(tok::l_square) || 914 PreviousTok.is(tok::question) || PreviousTok.is(tok::colon) || 915 PreviousTok.is(tok::kw_return) || PreviousTok.is(tok::kw_case)) 916 return TokenAnnotation::TT_UnaryOperator; 917 918 // There can't be to consecutive binary operators. 919 if (Annotations[Index - 1].Type == TokenAnnotation::TT_BinaryOperator) 920 return TokenAnnotation::TT_UnaryOperator; 921 922 // Fall back to marking the token as binary operator. 923 return TokenAnnotation::TT_BinaryOperator; 924 } 925 926 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 927 TokenAnnotation::TokenType determineIncrementUsage(unsigned Index) { 928 if (Index != 0 && Line.Tokens[Index - 1].Tok.is(tok::identifier)) 929 return TokenAnnotation::TT_TrailingUnaryOperator; 930 931 return TokenAnnotation::TT_UnaryOperator; 932 } 933 934 bool spaceRequiredBetween(Token Left, Token Right) { 935 if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma)) 936 return false; 937 if (Left.is(tok::kw_template) && Right.is(tok::less)) 938 return true; 939 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 940 return false; 941 if (Left.is(tok::exclaim) || Left.is(tok::tilde)) 942 return false; 943 if (Left.is(tok::at) && Right.is(tok::identifier)) 944 return false; 945 if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less)) 946 return false; 947 if (Right.is(tok::amp) || Right.is(tok::star)) 948 return Left.isLiteral() || 949 (Left.isNot(tok::star) && Left.isNot(tok::amp) && 950 !Style.PointerAndReferenceBindToType); 951 if (Left.is(tok::amp) || Left.is(tok::star)) 952 return Right.isLiteral() || Style.PointerAndReferenceBindToType; 953 if (Right.is(tok::star) && Left.is(tok::l_paren)) 954 return false; 955 if (Left.is(tok::l_square) || Right.is(tok::l_square) || 956 Right.is(tok::r_square)) 957 return false; 958 if (Left.is(tok::coloncolon) || 959 (Right.is(tok::coloncolon) && 960 (Left.is(tok::identifier) || Left.is(tok::greater)))) 961 return false; 962 if (Left.is(tok::period) || Right.is(tok::period)) 963 return false; 964 if (Left.is(tok::colon) || Right.is(tok::colon)) 965 return true; 966 if (Left.is(tok::l_paren)) 967 return false; 968 if (Left.is(tok::hash)) 969 return false; 970 if (Right.is(tok::l_paren)) { 971 return Left.is(tok::kw_if) || Left.is(tok::kw_for) || 972 Left.is(tok::kw_while) || Left.is(tok::kw_switch) || 973 (Left.isNot(tok::identifier) && Left.isNot(tok::kw_sizeof) && 974 Left.isNot(tok::kw_typeof) && Left.isNot(tok::kw_alignof)); 975 } 976 return true; 977 } 978 979 bool canBreakBefore(unsigned i) { 980 if (Annotations[i - 1].ClosesTemplateDeclaration) 981 return true; 982 if (Annotations[i - 1].Type == TokenAnnotation::TT_PointerOrReference || 983 Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser || 984 Annotations[i].Type == TokenAnnotation::TT_ConditionalExpr) { 985 return false; 986 } 987 const FormatToken &Left = Line.Tokens[i - 1]; 988 const FormatToken &Right = Line.Tokens[i]; 989 if (Right.Tok.is(tok::r_paren) || Right.Tok.is(tok::l_brace) || 990 Right.Tok.is(tok::comment) || Right.Tok.is(tok::greater)) 991 return false; 992 return (isBinaryOperator(Left) && Left.Tok.isNot(tok::lessless)) || 993 Left.Tok.is(tok::comma) || Right.Tok.is(tok::lessless) || 994 Right.Tok.is(tok::arrow) || Right.Tok.is(tok::period) || 995 Right.Tok.is(tok::colon) || Left.Tok.is(tok::semi) || 996 Left.Tok.is(tok::l_brace) || 997 (Left.Tok.is(tok::l_paren) && !Right.Tok.is(tok::r_paren)); 998 } 999 1000 const UnwrappedLine &Line; 1001 FormatStyle Style; 1002 SourceManager &SourceMgr; 1003 std::vector<TokenAnnotation> Annotations; 1004 }; 1005 1006 class LexerBasedFormatTokenSource : public FormatTokenSource { 1007 public: 1008 LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr) 1009 : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr), 1010 IdentTable(Lex.getLangOpts()) { 1011 Lex.SetKeepWhitespaceMode(true); 1012 } 1013 1014 virtual FormatToken getNextToken() { 1015 if (GreaterStashed) { 1016 FormatTok.NewlinesBefore = 0; 1017 FormatTok.WhiteSpaceStart = 1018 FormatTok.Tok.getLocation().getLocWithOffset(1); 1019 FormatTok.WhiteSpaceLength = 0; 1020 GreaterStashed = false; 1021 return FormatTok; 1022 } 1023 1024 FormatTok = FormatToken(); 1025 Lex.LexFromRawLexer(FormatTok.Tok); 1026 FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation(); 1027 1028 // Consume and record whitespace until we find a significant token. 1029 while (FormatTok.Tok.is(tok::unknown)) { 1030 StringRef Text = tokenText(FormatTok.Tok); 1031 FormatTok.NewlinesBefore += Text.count('\n'); 1032 FormatTok.HasUnescapedNewline = 1033 Text.count("\\\n") != FormatTok.NewlinesBefore; 1034 FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength(); 1035 1036 if (FormatTok.Tok.is(tok::eof)) 1037 return FormatTok; 1038 Lex.LexFromRawLexer(FormatTok.Tok); 1039 } 1040 1041 if (FormatTok.Tok.is(tok::raw_identifier)) { 1042 IdentifierInfo &Info = IdentTable.get(tokenText(FormatTok.Tok)); 1043 FormatTok.Tok.setIdentifierInfo(&Info); 1044 FormatTok.Tok.setKind(Info.getTokenID()); 1045 } 1046 1047 if (FormatTok.Tok.is(tok::greatergreater)) { 1048 FormatTok.Tok.setKind(tok::greater); 1049 GreaterStashed = true; 1050 } 1051 1052 return FormatTok; 1053 } 1054 1055 private: 1056 FormatToken FormatTok; 1057 bool GreaterStashed; 1058 Lexer &Lex; 1059 SourceManager &SourceMgr; 1060 IdentifierTable IdentTable; 1061 1062 /// Returns the text of \c FormatTok. 1063 StringRef tokenText(Token &Tok) { 1064 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1065 Tok.getLength()); 1066 } 1067 }; 1068 1069 class Formatter : public UnwrappedLineConsumer { 1070 public: 1071 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1072 const std::vector<CharSourceRange> &Ranges) 1073 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), Ranges(Ranges), 1074 StructuralError(false) { 1075 } 1076 1077 virtual ~Formatter() { 1078 } 1079 1080 tooling::Replacements format() { 1081 LexerBasedFormatTokenSource Tokens(Lex, SourceMgr); 1082 UnwrappedLineParser Parser(Style, Tokens, *this); 1083 StructuralError = Parser.parse(); 1084 for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(), 1085 E = UnwrappedLines.end(); 1086 I != E; ++I) 1087 formatUnwrappedLine(*I); 1088 return Replaces; 1089 } 1090 1091 private: 1092 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1093 UnwrappedLines.push_back(TheLine); 1094 } 1095 1096 void formatUnwrappedLine(const UnwrappedLine &TheLine) { 1097 if (TheLine.Tokens.size() == 0) 1098 return; 1099 1100 CharSourceRange LineRange = 1101 CharSourceRange::getTokenRange(TheLine.Tokens.front().Tok.getLocation(), 1102 TheLine.Tokens.back().Tok.getLocation()); 1103 1104 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1105 if (SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(), 1106 Ranges[i].getBegin()) || 1107 SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1108 LineRange.getBegin())) 1109 continue; 1110 1111 TokenAnnotator Annotator(TheLine, Style, SourceMgr); 1112 if (!Annotator.annotate()) 1113 return; 1114 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, 1115 Annotator.getAnnotations(), Replaces, 1116 StructuralError); 1117 Formatter.format(); 1118 return; 1119 } 1120 } 1121 1122 FormatStyle Style; 1123 Lexer &Lex; 1124 SourceManager &SourceMgr; 1125 tooling::Replacements Replaces; 1126 std::vector<CharSourceRange> Ranges; 1127 std::vector<UnwrappedLine> UnwrappedLines; 1128 bool StructuralError; 1129 }; 1130 1131 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1132 SourceManager &SourceMgr, 1133 std::vector<CharSourceRange> Ranges) { 1134 Formatter formatter(Style, Lex, SourceMgr, Ranges); 1135 return formatter.format(); 1136 } 1137 1138 } // namespace format 1139 } // namespace clang 1140