1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "llvm/Support/Debug.h" 19 20 namespace clang { 21 namespace format { 22 23 namespace { 24 25 /// \brief A parser that gathers additional information about tokens. 26 /// 27 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 28 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 29 /// into template parameter lists. 30 class AnnotatingParser { 31 public: 32 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, 33 IdentifierInfo &Ident_in) 34 : Style(Style), Line(Line), CurrentToken(Line.First), 35 KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) { 36 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); 37 resetTokenMetadata(CurrentToken); 38 } 39 40 private: 41 bool parseAngle() { 42 if (CurrentToken == NULL) 43 return false; 44 ScopedContextCreator ContextCreator(*this, tok::less, 10); 45 FormatToken *Left = CurrentToken->Previous; 46 Contexts.back().IsExpression = false; 47 // If there's a template keyword before the opening angle bracket, this is a 48 // template parameter, not an argument. 49 Contexts.back().InTemplateArgument = 50 Left->Previous != NULL && Left->Previous->Tok.isNot(tok::kw_template); 51 52 while (CurrentToken != NULL) { 53 if (CurrentToken->is(tok::greater)) { 54 Left->MatchingParen = CurrentToken; 55 CurrentToken->MatchingParen = Left; 56 CurrentToken->Type = TT_TemplateCloser; 57 next(); 58 return true; 59 } 60 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 61 tok::question, tok::colon)) 62 return false; 63 // If a && or || is found and interpreted as a binary operator, this set 64 // of angles is likely part of something like "a < b && c > d". If the 65 // angles are inside an expression, the ||/&& might also be a binary 66 // operator that was misinterpreted because we are parsing template 67 // parameters. 68 // FIXME: This is getting out of hand, write a decent parser. 69 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && 70 ((CurrentToken->Previous->Type == TT_BinaryOperator && 71 // Toplevel bool expressions do not make lots of sense; 72 // If we're on the top level, it contains only the base context and 73 // the context for the current opening angle bracket. 74 Contexts.size() > 2) || 75 Contexts[Contexts.size() - 2].IsExpression) && 76 Line.First->isNot(tok::kw_template)) 77 return false; 78 updateParameterCount(Left, CurrentToken); 79 if (!consumeToken()) 80 return false; 81 } 82 return false; 83 } 84 85 bool parseParens(bool LookForDecls = false) { 86 if (CurrentToken == NULL) 87 return false; 88 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 89 90 // FIXME: This is a bit of a hack. Do better. 91 Contexts.back().ColonIsForRangeExpr = 92 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 93 94 bool StartsObjCMethodExpr = false; 95 FormatToken *Left = CurrentToken->Previous; 96 if (CurrentToken->is(tok::caret)) { 97 // (^ can start a block type. 98 Left->Type = TT_ObjCBlockLParen; 99 } else if (FormatToken *MaybeSel = Left->Previous) { 100 // @selector( starts a selector. 101 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && 102 MaybeSel->Previous->is(tok::at)) { 103 StartsObjCMethodExpr = true; 104 } 105 } 106 107 if (Left->Previous && 108 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, 109 tok::kw_while, tok::l_paren, tok::comma) || 110 Left->Previous->Type == TT_BinaryOperator)) { 111 // static_assert, if and while usually contain expressions. 112 Contexts.back().IsExpression = true; 113 } else if (Line.InPPDirective && 114 (!Left->Previous || 115 (Left->Previous->isNot(tok::identifier) && 116 Left->Previous->Type != TT_OverloadedOperator))) { 117 Contexts.back().IsExpression = true; 118 } else if (Left->Previous && Left->Previous->is(tok::r_square) && 119 Left->Previous->MatchingParen && 120 Left->Previous->MatchingParen->Type == TT_LambdaLSquare) { 121 // This is a parameter list of a lambda expression. 122 Contexts.back().IsExpression = false; 123 } else if (Contexts[Contexts.size() - 2].CaretFound) { 124 // This is the parameter list of an ObjC block. 125 Contexts.back().IsExpression = false; 126 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { 127 Left->Type = TT_AttributeParen; 128 } else if (Left->Previous && Left->Previous->IsForEachMacro) { 129 // The first argument to a foreach macro is a declaration. 130 Contexts.back().IsForEachMacro = true; 131 Contexts.back().IsExpression = false; 132 } 133 134 if (StartsObjCMethodExpr) { 135 Contexts.back().ColonIsObjCMethodExpr = true; 136 Left->Type = TT_ObjCMethodExpr; 137 } 138 139 bool MightBeFunctionType = CurrentToken->is(tok::star); 140 bool HasMultipleLines = false; 141 bool HasMultipleParametersOnALine = false; 142 while (CurrentToken != NULL) { 143 // LookForDecls is set when "if (" has been seen. Check for 144 // 'identifier' '*' 'identifier' followed by not '=' -- this 145 // '*' has to be a binary operator but determineStarAmpUsage() will 146 // categorize it as an unary operator, so set the right type here. 147 if (LookForDecls && CurrentToken->Next) { 148 FormatToken *Prev = CurrentToken->getPreviousNonComment(); 149 if (Prev) { 150 FormatToken *PrevPrev = Prev->getPreviousNonComment(); 151 FormatToken *Next = CurrentToken->Next; 152 if (PrevPrev && PrevPrev->is(tok::identifier) && 153 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && 154 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { 155 Prev->Type = TT_BinaryOperator; 156 LookForDecls = false; 157 } 158 } 159 } 160 161 if (CurrentToken->Previous->Type == TT_PointerOrReference && 162 CurrentToken->Previous->Previous->isOneOf(tok::l_paren, 163 tok::coloncolon)) 164 MightBeFunctionType = true; 165 if (CurrentToken->is(tok::r_paren)) { 166 if (MightBeFunctionType && CurrentToken->Next && 167 (CurrentToken->Next->is(tok::l_paren) || 168 (CurrentToken->Next->is(tok::l_square) && 169 !Contexts.back().IsExpression))) 170 Left->Type = TT_FunctionTypeLParen; 171 Left->MatchingParen = CurrentToken; 172 CurrentToken->MatchingParen = Left; 173 174 if (StartsObjCMethodExpr) { 175 CurrentToken->Type = TT_ObjCMethodExpr; 176 if (Contexts.back().FirstObjCSelectorName != NULL) { 177 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 178 Contexts.back().LongestObjCSelectorName; 179 } 180 } 181 182 if (Left->Type == TT_AttributeParen) 183 CurrentToken->Type = TT_AttributeParen; 184 185 if (!HasMultipleLines) 186 Left->PackingKind = PPK_Inconclusive; 187 else if (HasMultipleParametersOnALine) 188 Left->PackingKind = PPK_BinPacked; 189 else 190 Left->PackingKind = PPK_OnePerLine; 191 192 next(); 193 return true; 194 } 195 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 196 return false; 197 else if (CurrentToken->is(tok::l_brace)) 198 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen 199 updateParameterCount(Left, CurrentToken); 200 if (CurrentToken->is(tok::comma) && CurrentToken->Next && 201 !CurrentToken->Next->HasUnescapedNewline && 202 !CurrentToken->Next->isTrailingComment()) 203 HasMultipleParametersOnALine = true; 204 if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || 205 CurrentToken->isSimpleTypeSpecifier()) 206 Contexts.back().IsExpression = false; 207 if (!consumeToken()) 208 return false; 209 if (CurrentToken && CurrentToken->HasUnescapedNewline) 210 HasMultipleLines = true; 211 } 212 return false; 213 } 214 215 bool parseSquare() { 216 if (!CurrentToken) 217 return false; 218 219 // A '[' could be an index subscript (after an identifier or after 220 // ')' or ']'), it could be the start of an Objective-C method 221 // expression, or it could the the start of an Objective-C array literal. 222 FormatToken *Left = CurrentToken->Previous; 223 FormatToken *Parent = Left->getPreviousNonComment(); 224 bool StartsObjCMethodExpr = 225 Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare && 226 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 227 tok::kw_return, tok::kw_throw) || 228 Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || 229 Parent->Type == TT_CastRParen || 230 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); 231 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 232 Contexts.back().IsExpression = true; 233 bool ColonFound = false; 234 235 if (StartsObjCMethodExpr) { 236 Contexts.back().ColonIsObjCMethodExpr = true; 237 Left->Type = TT_ObjCMethodExpr; 238 } else if (Parent && Parent->is(tok::at)) { 239 Left->Type = TT_ArrayInitializerLSquare; 240 } else if (Left->Type == TT_Unknown) { 241 Left->Type = TT_ArraySubscriptLSquare; 242 } 243 244 while (CurrentToken != NULL) { 245 if (CurrentToken->is(tok::r_square)) { 246 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && 247 Left->Type == TT_ObjCMethodExpr) { 248 // An ObjC method call is rarely followed by an open parenthesis. 249 // FIXME: Do we incorrectly label ":" with this? 250 StartsObjCMethodExpr = false; 251 Left->Type = TT_Unknown; 252 } 253 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { 254 CurrentToken->Type = TT_ObjCMethodExpr; 255 // determineStarAmpUsage() thinks that '*' '[' is allocating an 256 // array of pointers, but if '[' starts a selector then '*' is a 257 // binary operator. 258 if (Parent != NULL && Parent->Type == TT_PointerOrReference) 259 Parent->Type = TT_BinaryOperator; 260 } 261 Left->MatchingParen = CurrentToken; 262 CurrentToken->MatchingParen = Left; 263 if (Contexts.back().FirstObjCSelectorName != NULL) { 264 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 265 Contexts.back().LongestObjCSelectorName; 266 if (Contexts.back().NumBlockParameters > 1) 267 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; 268 } 269 next(); 270 return true; 271 } 272 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 273 return false; 274 if (CurrentToken->is(tok::colon)) 275 ColonFound = true; 276 if (CurrentToken->is(tok::comma) && 277 Style.Language != FormatStyle::LK_Proto && 278 (Left->Type == TT_ArraySubscriptLSquare || 279 (Left->Type == TT_ObjCMethodExpr && !ColonFound))) 280 Left->Type = TT_ArrayInitializerLSquare; 281 updateParameterCount(Left, CurrentToken); 282 if (!consumeToken()) 283 return false; 284 } 285 return false; 286 } 287 288 bool parseBrace() { 289 if (CurrentToken != NULL) { 290 FormatToken *Left = CurrentToken->Previous; 291 292 if (Contexts.back().CaretFound) 293 Left->Type = TT_ObjCBlockLBrace; 294 Contexts.back().CaretFound = false; 295 296 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 297 Contexts.back().ColonIsDictLiteral = true; 298 299 while (CurrentToken != NULL) { 300 if (CurrentToken->is(tok::r_brace)) { 301 Left->MatchingParen = CurrentToken; 302 CurrentToken->MatchingParen = Left; 303 next(); 304 return true; 305 } 306 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 307 return false; 308 updateParameterCount(Left, CurrentToken); 309 if (CurrentToken->is(tok::colon) && 310 Style.Language != FormatStyle::LK_Proto) 311 Left->Type = TT_DictLiteral; 312 if (!consumeToken()) 313 return false; 314 } 315 } 316 // No closing "}" found, this probably starts a definition. 317 Line.StartsDefinition = true; 318 return true; 319 } 320 321 void updateParameterCount(FormatToken *Left, FormatToken *Current) { 322 if (Current->is(tok::comma)) { 323 ++Left->ParameterCount; 324 if (!Left->Role) 325 Left->Role.reset(new CommaSeparatedList(Style)); 326 Left->Role->CommaFound(Current); 327 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { 328 Left->ParameterCount = 1; 329 } 330 } 331 332 bool parseConditional() { 333 while (CurrentToken != NULL) { 334 if (CurrentToken->is(tok::colon)) { 335 CurrentToken->Type = TT_ConditionalExpr; 336 next(); 337 return true; 338 } 339 if (!consumeToken()) 340 return false; 341 } 342 return false; 343 } 344 345 bool parseTemplateDeclaration() { 346 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 347 CurrentToken->Type = TT_TemplateOpener; 348 next(); 349 if (!parseAngle()) 350 return false; 351 if (CurrentToken != NULL) 352 CurrentToken->Previous->ClosesTemplateDeclaration = true; 353 return true; 354 } 355 return false; 356 } 357 358 bool consumeToken() { 359 FormatToken *Tok = CurrentToken; 360 next(); 361 switch (Tok->Tok.getKind()) { 362 case tok::plus: 363 case tok::minus: 364 if (Tok->Previous == NULL && Line.MustBeDeclaration) 365 Tok->Type = TT_ObjCMethodSpecifier; 366 break; 367 case tok::colon: 368 if (Tok->Previous == NULL) 369 return false; 370 // Colons from ?: are handled in parseConditional(). 371 if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) { 372 Tok->Type = TT_CtorInitializerColon; 373 } else if (Contexts.back().ColonIsDictLiteral) { 374 Tok->Type = TT_DictLiteral; 375 } else if (Contexts.back().ColonIsObjCMethodExpr || 376 Line.First->Type == TT_ObjCMethodSpecifier) { 377 Tok->Type = TT_ObjCMethodExpr; 378 Tok->Previous->Type = TT_ObjCSelectorName; 379 if (Tok->Previous->ColumnWidth > 380 Contexts.back().LongestObjCSelectorName) { 381 Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; 382 } 383 if (Contexts.back().FirstObjCSelectorName == NULL) 384 Contexts.back().FirstObjCSelectorName = Tok->Previous; 385 } else if (Contexts.back().ColonIsForRangeExpr) { 386 Tok->Type = TT_RangeBasedForLoopColon; 387 } else if (CurrentToken != NULL && 388 CurrentToken->is(tok::numeric_constant)) { 389 Tok->Type = TT_BitFieldColon; 390 } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) { 391 Tok->Type = TT_InheritanceColon; 392 } else if (Contexts.back().ContextKind == tok::l_paren) { 393 Tok->Type = TT_InlineASMColon; 394 } 395 break; 396 case tok::kw_if: 397 case tok::kw_while: 398 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 399 next(); 400 if (!parseParens(/*LookForDecls=*/true)) 401 return false; 402 } 403 break; 404 case tok::kw_for: 405 Contexts.back().ColonIsForRangeExpr = true; 406 next(); 407 if (!parseParens()) 408 return false; 409 break; 410 case tok::l_paren: 411 if (!parseParens()) 412 return false; 413 if (Line.MustBeDeclaration && Contexts.size() == 1 && 414 !Contexts.back().IsExpression && Line.First->Type != TT_ObjCProperty) 415 Line.MightBeFunctionDecl = true; 416 break; 417 case tok::l_square: 418 if (!parseSquare()) 419 return false; 420 break; 421 case tok::l_brace: 422 if (!parseBrace()) 423 return false; 424 break; 425 case tok::less: 426 if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle()) 427 Tok->Type = TT_TemplateOpener; 428 else { 429 Tok->Type = TT_BinaryOperator; 430 CurrentToken = Tok; 431 next(); 432 } 433 break; 434 case tok::r_paren: 435 case tok::r_square: 436 return false; 437 case tok::r_brace: 438 // Lines can start with '}'. 439 if (Tok->Previous != NULL) 440 return false; 441 break; 442 case tok::greater: 443 Tok->Type = TT_BinaryOperator; 444 break; 445 case tok::kw_operator: 446 while (CurrentToken && 447 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { 448 if (CurrentToken->isOneOf(tok::star, tok::amp)) 449 CurrentToken->Type = TT_PointerOrReference; 450 consumeToken(); 451 if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator) 452 CurrentToken->Previous->Type = TT_OverloadedOperator; 453 } 454 if (CurrentToken) { 455 CurrentToken->Type = TT_OverloadedOperatorLParen; 456 if (CurrentToken->Previous->Type == TT_BinaryOperator) 457 CurrentToken->Previous->Type = TT_OverloadedOperator; 458 } 459 break; 460 case tok::question: 461 parseConditional(); 462 break; 463 case tok::kw_template: 464 parseTemplateDeclaration(); 465 break; 466 case tok::identifier: 467 if (Line.First->is(tok::kw_for) && 468 Tok->Tok.getIdentifierInfo() == &Ident_in) 469 Tok->Type = TT_ObjCForIn; 470 break; 471 case tok::comma: 472 if (Contexts.back().FirstStartOfName) 473 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 474 if (Contexts.back().InCtorInitializer) 475 Tok->Type = TT_CtorInitializerComma; 476 if (Contexts.back().IsForEachMacro) 477 Contexts.back().IsExpression = true; 478 break; 479 default: 480 break; 481 } 482 return true; 483 } 484 485 void parseIncludeDirective() { 486 next(); 487 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 488 next(); 489 while (CurrentToken != NULL) { 490 if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) 491 CurrentToken->Type = TT_ImplicitStringLiteral; 492 next(); 493 } 494 } else { 495 while (CurrentToken != NULL) { 496 if (CurrentToken->is(tok::string_literal)) 497 // Mark these string literals as "implicit" literals, too, so that 498 // they are not split or line-wrapped. 499 CurrentToken->Type = TT_ImplicitStringLiteral; 500 next(); 501 } 502 } 503 } 504 505 void parseWarningOrError() { 506 next(); 507 // We still want to format the whitespace left of the first token of the 508 // warning or error. 509 next(); 510 while (CurrentToken != NULL) { 511 CurrentToken->Type = TT_ImplicitStringLiteral; 512 next(); 513 } 514 } 515 516 void parsePragma() { 517 next(); // Consume "pragma". 518 if (CurrentToken && CurrentToken->TokenText == "mark") { 519 next(); // Consume "mark". 520 next(); // Consume first token (so we fix leading whitespace). 521 while (CurrentToken != NULL) { 522 CurrentToken->Type = TT_ImplicitStringLiteral; 523 next(); 524 } 525 } 526 } 527 528 void parsePreprocessorDirective() { 529 next(); 530 if (CurrentToken == NULL) 531 return; 532 if (CurrentToken->Tok.is(tok::numeric_constant)) { 533 CurrentToken->SpacesRequiredBefore = 1; 534 return; 535 } 536 // Hashes in the middle of a line can lead to any strange token 537 // sequence. 538 if (CurrentToken->Tok.getIdentifierInfo() == NULL) 539 return; 540 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { 541 case tok::pp_include: 542 case tok::pp_import: 543 parseIncludeDirective(); 544 break; 545 case tok::pp_error: 546 case tok::pp_warning: 547 parseWarningOrError(); 548 break; 549 case tok::pp_pragma: 550 parsePragma(); 551 break; 552 case tok::pp_if: 553 case tok::pp_elif: 554 Contexts.back().IsExpression = true; 555 parseLine(); 556 break; 557 default: 558 break; 559 } 560 while (CurrentToken != NULL) 561 next(); 562 } 563 564 public: 565 LineType parseLine() { 566 if (CurrentToken->is(tok::hash)) { 567 parsePreprocessorDirective(); 568 return LT_PreprocessorDirective; 569 } 570 571 // Directly allow to 'import <string-literal>' to support protocol buffer 572 // definitions (code.google.com/p/protobuf) or missing "#" (either way we 573 // should not break the line). 574 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); 575 if (Info && Info->getPPKeywordID() == tok::pp_import && 576 CurrentToken->Next && CurrentToken->Next->is(tok::string_literal)) 577 parseIncludeDirective(); 578 579 while (CurrentToken != NULL) { 580 if (CurrentToken->is(tok::kw_virtual)) 581 KeywordVirtualFound = true; 582 if (!consumeToken()) 583 return LT_Invalid; 584 } 585 if (KeywordVirtualFound) 586 return LT_VirtualFunctionDecl; 587 588 if (Line.First->Type == TT_ObjCMethodSpecifier) { 589 if (Contexts.back().FirstObjCSelectorName != NULL) 590 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 591 Contexts.back().LongestObjCSelectorName; 592 return LT_ObjCMethodDecl; 593 } 594 595 return LT_Other; 596 } 597 598 private: 599 void resetTokenMetadata(FormatToken *Token) { 600 if (Token == nullptr) return; 601 602 // Reset token type in case we have already looked at it and then 603 // recovered from an error (e.g. failure to find the matching >). 604 if (CurrentToken->Type != TT_LambdaLSquare && 605 CurrentToken->Type != TT_FunctionLBrace && 606 CurrentToken->Type != TT_ImplicitStringLiteral && 607 CurrentToken->Type != TT_TrailingReturnArrow) 608 CurrentToken->Type = TT_Unknown; 609 if (CurrentToken->Role) 610 CurrentToken->Role.reset(NULL); 611 CurrentToken->FakeLParens.clear(); 612 CurrentToken->FakeRParens = 0; 613 } 614 615 void next() { 616 if (CurrentToken != NULL) { 617 determineTokenType(*CurrentToken); 618 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 619 CurrentToken->NestingLevel = Contexts.size() - 1; 620 } 621 622 if (CurrentToken != NULL) 623 CurrentToken = CurrentToken->Next; 624 625 resetTokenMetadata(CurrentToken); 626 } 627 628 /// \brief A struct to hold information valid in a specific context, e.g. 629 /// a pair of parenthesis. 630 struct Context { 631 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 632 bool IsExpression) 633 : ContextKind(ContextKind), BindingStrength(BindingStrength), 634 LongestObjCSelectorName(0), NumBlockParameters(0), 635 ColonIsForRangeExpr(false), ColonIsDictLiteral(false), 636 ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL), 637 FirstStartOfName(NULL), IsExpression(IsExpression), 638 CanBeExpression(true), InTemplateArgument(false), 639 InCtorInitializer(false), CaretFound(false), IsForEachMacro(false) {} 640 641 tok::TokenKind ContextKind; 642 unsigned BindingStrength; 643 unsigned LongestObjCSelectorName; 644 unsigned NumBlockParameters; 645 bool ColonIsForRangeExpr; 646 bool ColonIsDictLiteral; 647 bool ColonIsObjCMethodExpr; 648 FormatToken *FirstObjCSelectorName; 649 FormatToken *FirstStartOfName; 650 bool IsExpression; 651 bool CanBeExpression; 652 bool InTemplateArgument; 653 bool InCtorInitializer; 654 bool CaretFound; 655 bool IsForEachMacro; 656 }; 657 658 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 659 /// of each instance. 660 struct ScopedContextCreator { 661 AnnotatingParser &P; 662 663 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 664 unsigned Increase) 665 : P(P) { 666 P.Contexts.push_back(Context(ContextKind, 667 P.Contexts.back().BindingStrength + Increase, 668 P.Contexts.back().IsExpression)); 669 } 670 671 ~ScopedContextCreator() { P.Contexts.pop_back(); } 672 }; 673 674 void determineTokenType(FormatToken &Current) { 675 if (Current.getPrecedence() == prec::Assignment && 676 !Line.First->isOneOf(tok::kw_template, tok::kw_using) && 677 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { 678 Contexts.back().IsExpression = true; 679 for (FormatToken *Previous = Current.Previous; 680 Previous && !Previous->isOneOf(tok::comma, tok::semi); 681 Previous = Previous->Previous) { 682 if (Previous->is(tok::r_square)) 683 Previous = Previous->MatchingParen; 684 if (Previous->Type == TT_BinaryOperator && 685 Previous->isOneOf(tok::star, tok::amp)) { 686 Previous->Type = TT_PointerOrReference; 687 } 688 } 689 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { 690 Contexts.back().IsExpression = true; 691 } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 692 !Line.InPPDirective) { 693 bool ParametersOfFunctionType = 694 Current.Previous && Current.Previous->is(tok::r_paren) && 695 Current.Previous->MatchingParen && 696 Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen; 697 bool IsForOrCatch = Current.Previous && 698 Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); 699 Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; 700 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 701 for (FormatToken *Previous = Current.Previous; 702 Previous && Previous->isOneOf(tok::star, tok::amp); 703 Previous = Previous->Previous) 704 Previous->Type = TT_PointerOrReference; 705 } else if (Current.Previous && 706 Current.Previous->Type == TT_CtorInitializerColon) { 707 Contexts.back().IsExpression = true; 708 Contexts.back().InCtorInitializer = true; 709 } else if (Current.is(tok::kw_new)) { 710 Contexts.back().CanBeExpression = false; 711 } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) { 712 // This should be the condition or increment in a for-loop. 713 Contexts.back().IsExpression = true; 714 } 715 716 if (Current.Type == TT_Unknown) { 717 // Line.MightBeFunctionDecl can only be true after the parentheses of a 718 // function declaration have been found. In this case, 'Current' is a 719 // trailing token of this declaration and thus cannot be a name. 720 if (isStartOfName(Current) && !Line.MightBeFunctionDecl) { 721 Contexts.back().FirstStartOfName = &Current; 722 Current.Type = TT_StartOfName; 723 } else if (Current.is(tok::kw_auto)) { 724 AutoFound = true; 725 } else if (Current.is(tok::arrow) && AutoFound && 726 Line.MustBeDeclaration) { 727 Current.Type = TT_TrailingReturnArrow; 728 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 729 Current.Type = 730 determineStarAmpUsage(Current, Contexts.back().CanBeExpression && 731 Contexts.back().IsExpression, 732 Contexts.back().InTemplateArgument); 733 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 734 Current.Type = determinePlusMinusCaretUsage(Current); 735 if (Current.Type == TT_UnaryOperator && Current.is(tok::caret)) { 736 ++Contexts.back().NumBlockParameters; 737 Contexts.back().CaretFound = true; 738 } 739 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 740 Current.Type = determineIncrementUsage(Current); 741 } else if (Current.is(tok::exclaim)) { 742 Current.Type = TT_UnaryOperator; 743 } else if (Current.is(tok::question)) { 744 Current.Type = TT_ConditionalExpr; 745 } else if (Current.isBinaryOperator() && 746 (!Current.Previous || 747 Current.Previous->isNot(tok::l_square))) { 748 Current.Type = TT_BinaryOperator; 749 } else if (Current.is(tok::comment)) { 750 if (Current.TokenText.startswith("//")) 751 Current.Type = TT_LineComment; 752 else 753 Current.Type = TT_BlockComment; 754 } else if (Current.is(tok::r_paren)) { 755 FormatToken *LeftOfParens = NULL; 756 if (Current.MatchingParen) 757 LeftOfParens = Current.MatchingParen->getPreviousNonComment(); 758 bool IsCast = false; 759 bool ParensAreEmpty = Current.Previous == Current.MatchingParen; 760 bool ParensAreType = !Current.Previous || 761 Current.Previous->Type == TT_PointerOrReference || 762 Current.Previous->Type == TT_TemplateCloser || 763 Current.Previous->isSimpleTypeSpecifier(); 764 bool ParensCouldEndDecl = 765 Current.Next && 766 Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); 767 bool IsSizeOfOrAlignOf = 768 LeftOfParens && 769 LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); 770 if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 771 ((Contexts.size() > 1 && 772 Contexts[Contexts.size() - 2].IsExpression) || 773 (Current.Next && Current.Next->isBinaryOperator()))) 774 IsCast = true; 775 if (Current.Next && Current.Next->isNot(tok::string_literal) && 776 (Current.Next->Tok.isLiteral() || 777 Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) 778 IsCast = true; 779 // If there is an identifier after the (), it is likely a cast, unless 780 // there is also an identifier before the (). 781 if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL || 782 LeftOfParens->is(tok::kw_return)) && 783 LeftOfParens->Type != TT_OverloadedOperator && 784 LeftOfParens->isNot(tok::at) && 785 LeftOfParens->Type != TT_TemplateCloser && Current.Next && 786 Current.Next->is(tok::identifier)) 787 IsCast = true; 788 if (IsCast && !ParensAreEmpty) 789 Current.Type = TT_CastRParen; 790 } else if (Current.is(tok::at) && Current.Next) { 791 switch (Current.Next->Tok.getObjCKeywordID()) { 792 case tok::objc_interface: 793 case tok::objc_implementation: 794 case tok::objc_protocol: 795 Current.Type = TT_ObjCDecl; 796 break; 797 case tok::objc_property: 798 Current.Type = TT_ObjCProperty; 799 break; 800 default: 801 break; 802 } 803 } else if (Current.is(tok::period)) { 804 FormatToken *PreviousNoComment = Current.getPreviousNonComment(); 805 if (PreviousNoComment && 806 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) 807 Current.Type = TT_DesignatedInitializerPeriod; 808 } else if (Current.isOneOf(tok::identifier, tok::kw_const) && 809 Current.Previous && Current.Previous->isNot(tok::equal) && 810 Line.MightBeFunctionDecl && Contexts.size() == 1) { 811 // Line.MightBeFunctionDecl can only be true after the parentheses of a 812 // function declaration have been found. 813 Current.Type = TT_TrailingAnnotation; 814 } 815 } 816 } 817 818 /// \brief Take a guess at whether \p Tok starts a name of a function or 819 /// variable declaration. 820 /// 821 /// This is a heuristic based on whether \p Tok is an identifier following 822 /// something that is likely a type. 823 bool isStartOfName(const FormatToken &Tok) { 824 if (Tok.isNot(tok::identifier) || Tok.Previous == NULL) 825 return false; 826 827 // Skip "const" as it does not have an influence on whether this is a name. 828 FormatToken *PreviousNotConst = Tok.Previous; 829 while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const)) 830 PreviousNotConst = PreviousNotConst->Previous; 831 832 if (PreviousNotConst == NULL) 833 return false; 834 835 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && 836 PreviousNotConst->Previous && 837 PreviousNotConst->Previous->is(tok::hash); 838 839 if (PreviousNotConst->Type == TT_TemplateCloser) 840 return PreviousNotConst && PreviousNotConst->MatchingParen && 841 PreviousNotConst->MatchingParen->Previous && 842 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); 843 844 return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || 845 PreviousNotConst->Type == TT_PointerOrReference || 846 PreviousNotConst->isSimpleTypeSpecifier(); 847 } 848 849 /// \brief Return the type of the given token assuming it is * or &. 850 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, 851 bool InTemplateArgument) { 852 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 853 if (PrevToken == NULL) 854 return TT_UnaryOperator; 855 856 const FormatToken *NextToken = Tok.getNextNonComment(); 857 if (NextToken == NULL) 858 return TT_Unknown; 859 860 if (PrevToken->is(tok::coloncolon) || 861 (PrevToken->is(tok::l_paren) && !IsExpression)) 862 return TT_PointerOrReference; 863 864 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 865 tok::comma, tok::semi, tok::kw_return, tok::colon, 866 tok::equal, tok::kw_delete, tok::kw_sizeof) || 867 PrevToken->Type == TT_BinaryOperator || 868 PrevToken->Type == TT_ConditionalExpr || 869 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 870 return TT_UnaryOperator; 871 872 if (NextToken->is(tok::l_square)) 873 return TT_PointerOrReference; 874 875 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && 876 PrevToken->MatchingParen->Previous && 877 PrevToken->MatchingParen->Previous->is(tok::kw_typeof)) 878 return TT_PointerOrReference; 879 880 if (PrevToken->Tok.isLiteral() || 881 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, 882 tok::kw_false) || 883 NextToken->Tok.isLiteral() || 884 NextToken->isOneOf(tok::kw_true, tok::kw_false) || 885 NextToken->isUnaryOperator() || 886 // If we know we're in a template argument, there are no named 887 // declarations. Thus, having an identifier on the right-hand side 888 // indicates a binary operator. 889 (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) 890 return TT_BinaryOperator; 891 892 // It is very unlikely that we are going to find a pointer or reference type 893 // definition on the RHS of an assignment. 894 if (IsExpression) 895 return TT_BinaryOperator; 896 897 return TT_PointerOrReference; 898 } 899 900 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { 901 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 902 if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) 903 return TT_UnaryOperator; 904 905 // Use heuristics to recognize unary operators. 906 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 907 tok::question, tok::colon, tok::kw_return, 908 tok::kw_case, tok::at, tok::l_brace)) 909 return TT_UnaryOperator; 910 911 // There can't be two consecutive binary operators. 912 if (PrevToken->Type == TT_BinaryOperator) 913 return TT_UnaryOperator; 914 915 // Fall back to marking the token as binary operator. 916 return TT_BinaryOperator; 917 } 918 919 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 920 TokenType determineIncrementUsage(const FormatToken &Tok) { 921 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 922 if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) 923 return TT_UnaryOperator; 924 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 925 return TT_TrailingUnaryOperator; 926 927 return TT_UnaryOperator; 928 } 929 930 931 SmallVector<Context, 8> Contexts; 932 933 const FormatStyle &Style; 934 AnnotatedLine &Line; 935 FormatToken *CurrentToken; 936 bool KeywordVirtualFound; 937 bool AutoFound; 938 IdentifierInfo &Ident_in; 939 }; 940 941 static int PrecedenceUnaryOperator = prec::PointerToMember + 1; 942 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 943 944 /// \brief Parses binary expressions by inserting fake parenthesis based on 945 /// operator precedence. 946 class ExpressionParser { 947 public: 948 ExpressionParser(AnnotatedLine &Line) : Current(Line.First) { 949 // Skip leading "}", e.g. in "} else if (...) {". 950 if (Current->is(tok::r_brace)) 951 next(); 952 } 953 954 /// \brief Parse expressions with the given operatore precedence. 955 void parse(int Precedence = 0) { 956 // Skip 'return' and ObjC selector colons as they are not part of a binary 957 // expression. 958 while (Current && 959 (Current->is(tok::kw_return) || 960 (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr))) 961 next(); 962 963 if (Current == NULL || Precedence > PrecedenceArrowAndPeriod) 964 return; 965 966 // Conditional expressions need to be parsed separately for proper nesting. 967 if (Precedence == prec::Conditional) { 968 parseConditionalExpr(); 969 return; 970 } 971 972 // Parse unary operators, which all have a higher precedence than binary 973 // operators. 974 if (Precedence == PrecedenceUnaryOperator) { 975 parseUnaryOperator(); 976 return; 977 } 978 979 FormatToken *Start = Current; 980 FormatToken *LatestOperator = NULL; 981 982 while (Current) { 983 // Consume operators with higher precedence. 984 parse(Precedence + 1); 985 986 int CurrentPrecedence = getCurrentPrecedence(); 987 988 if (Current && Current->Type == TT_ObjCSelectorName && 989 Precedence == CurrentPrecedence) { 990 if (LatestOperator) 991 addFakeParenthesis(Start, prec::Level(Precedence)); 992 Start = Current; 993 } 994 995 // At the end of the line or when an operator with higher precedence is 996 // found, insert fake parenthesis and return. 997 if (Current == NULL || Current->closesScope() || 998 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) { 999 if (LatestOperator) { 1000 if (Precedence == PrecedenceArrowAndPeriod) { 1001 LatestOperator->LastInChainOfCalls = true; 1002 // Call expressions don't have a binary operator precedence. 1003 addFakeParenthesis(Start, prec::Unknown); 1004 } else { 1005 addFakeParenthesis(Start, prec::Level(Precedence)); 1006 } 1007 } 1008 return; 1009 } 1010 1011 // Consume scopes: (), [], <> and {} 1012 if (Current->opensScope()) { 1013 while (Current && !Current->closesScope()) { 1014 next(); 1015 parse(); 1016 } 1017 next(); 1018 } else { 1019 // Operator found. 1020 if (CurrentPrecedence == Precedence) 1021 LatestOperator = Current; 1022 1023 next(); 1024 } 1025 } 1026 } 1027 1028 private: 1029 /// \brief Gets the precedence (+1) of the given token for binary operators 1030 /// and other tokens that we treat like binary operators. 1031 int getCurrentPrecedence() { 1032 if (Current) { 1033 if (Current->Type == TT_ConditionalExpr) 1034 return prec::Conditional; 1035 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || 1036 Current->Type == TT_ObjCSelectorName) 1037 return 0; 1038 else if (Current->Type == TT_RangeBasedForLoopColon) 1039 return prec::Comma; 1040 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 1041 return Current->getPrecedence(); 1042 else if (Current->isOneOf(tok::period, tok::arrow)) 1043 return PrecedenceArrowAndPeriod; 1044 } 1045 return -1; 1046 } 1047 1048 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { 1049 Start->FakeLParens.push_back(Precedence); 1050 if (Precedence > prec::Unknown) 1051 Start->StartsBinaryExpression = true; 1052 if (Current) { 1053 ++Current->Previous->FakeRParens; 1054 if (Precedence > prec::Unknown) 1055 Current->Previous->EndsBinaryExpression = true; 1056 } 1057 } 1058 1059 /// \brief Parse unary operator expressions and surround them with fake 1060 /// parentheses if appropriate. 1061 void parseUnaryOperator() { 1062 if (Current == NULL || Current->Type != TT_UnaryOperator) { 1063 parse(PrecedenceArrowAndPeriod); 1064 return; 1065 } 1066 1067 FormatToken *Start = Current; 1068 next(); 1069 parseUnaryOperator(); 1070 1071 // The actual precedence doesn't matter. 1072 addFakeParenthesis(Start, prec::Unknown); 1073 } 1074 1075 void parseConditionalExpr() { 1076 FormatToken *Start = Current; 1077 parse(prec::LogicalOr); 1078 if (!Current || !Current->is(tok::question)) 1079 return; 1080 next(); 1081 parse(prec::LogicalOr); 1082 if (!Current || Current->Type != TT_ConditionalExpr) 1083 return; 1084 next(); 1085 parseConditionalExpr(); 1086 addFakeParenthesis(Start, prec::Conditional); 1087 } 1088 1089 void next() { 1090 if (Current) 1091 Current = Current->Next; 1092 while (Current && Current->isTrailingComment()) 1093 Current = Current->Next; 1094 } 1095 1096 FormatToken *Current; 1097 }; 1098 1099 } // end anonymous namespace 1100 1101 void 1102 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) { 1103 const AnnotatedLine *NextNonCommentLine = NULL; 1104 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), 1105 E = Lines.rend(); 1106 I != E; ++I) { 1107 if (NextNonCommentLine && (*I)->First->is(tok::comment) && 1108 (*I)->First->Next == NULL) 1109 (*I)->Level = NextNonCommentLine->Level; 1110 else 1111 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL; 1112 1113 setCommentLineLevels((*I)->Children); 1114 } 1115 } 1116 1117 void TokenAnnotator::annotate(AnnotatedLine &Line) { 1118 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1119 E = Line.Children.end(); 1120 I != E; ++I) { 1121 annotate(**I); 1122 } 1123 AnnotatingParser Parser(Style, Line, Ident_in); 1124 Line.Type = Parser.parseLine(); 1125 if (Line.Type == LT_Invalid) 1126 return; 1127 1128 ExpressionParser ExprParser(Line); 1129 ExprParser.parse(); 1130 1131 if (Line.First->Type == TT_ObjCMethodSpecifier) 1132 Line.Type = LT_ObjCMethodDecl; 1133 else if (Line.First->Type == TT_ObjCDecl) 1134 Line.Type = LT_ObjCDecl; 1135 else if (Line.First->Type == TT_ObjCProperty) 1136 Line.Type = LT_ObjCProperty; 1137 1138 Line.First->SpacesRequiredBefore = 1; 1139 Line.First->CanBreakBefore = Line.First->MustBreakBefore; 1140 } 1141 1142 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 1143 Line.First->TotalLength = 1144 Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; 1145 if (!Line.First->Next) 1146 return; 1147 FormatToken *Current = Line.First->Next; 1148 bool InFunctionDecl = Line.MightBeFunctionDecl; 1149 while (Current != NULL) { 1150 if (Current->Type == TT_LineComment) { 1151 if (Current->Previous->BlockKind == BK_BracedInit && 1152 Current->Previous->opensScope()) 1153 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; 1154 else 1155 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 1156 1157 // If we find a trailing comment, iterate backwards to determine whether 1158 // it seems to relate to a specific parameter. If so, break before that 1159 // parameter to avoid changing the comment's meaning. E.g. don't move 'b' 1160 // to the previous line in: 1161 // SomeFunction(a, 1162 // b, // comment 1163 // c); 1164 if (!Current->HasUnescapedNewline) { 1165 for (FormatToken *Parameter = Current->Previous; Parameter; 1166 Parameter = Parameter->Previous) { 1167 if (Parameter->isOneOf(tok::comment, tok::r_brace)) 1168 break; 1169 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { 1170 if (Parameter->Previous->Type != TT_CtorInitializerComma && 1171 Parameter->HasUnescapedNewline) 1172 Parameter->MustBreakBefore = true; 1173 break; 1174 } 1175 } 1176 } 1177 } else if (Current->SpacesRequiredBefore == 0 && 1178 spaceRequiredBefore(Line, *Current)) { 1179 Current->SpacesRequiredBefore = 1; 1180 } 1181 1182 Current->MustBreakBefore = 1183 Current->MustBreakBefore || mustBreakBefore(Line, *Current); 1184 1185 Current->CanBreakBefore = 1186 Current->MustBreakBefore || canBreakBefore(Line, *Current); 1187 if (Current->MustBreakBefore || !Current->Children.empty() || 1188 Current->IsMultiline) 1189 Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; 1190 else 1191 Current->TotalLength = Current->Previous->TotalLength + 1192 Current->ColumnWidth + 1193 Current->SpacesRequiredBefore; 1194 1195 if (Current->Type == TT_CtorInitializerColon) 1196 InFunctionDecl = false; 1197 1198 // FIXME: Only calculate this if CanBreakBefore is true once static 1199 // initializers etc. are sorted out. 1200 // FIXME: Move magic numbers to a better place. 1201 Current->SplitPenalty = 20 * Current->BindingStrength + 1202 splitPenalty(Line, *Current, InFunctionDecl); 1203 1204 Current = Current->Next; 1205 } 1206 1207 calculateUnbreakableTailLengths(Line); 1208 for (Current = Line.First; Current != NULL; Current = Current->Next) { 1209 if (Current->Role) 1210 Current->Role->precomputeFormattingInfos(Current); 1211 } 1212 1213 DEBUG({ printDebugInfo(Line); }); 1214 1215 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1216 E = Line.Children.end(); 1217 I != E; ++I) { 1218 calculateFormattingInformation(**I); 1219 } 1220 } 1221 1222 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { 1223 unsigned UnbreakableTailLength = 0; 1224 FormatToken *Current = Line.Last; 1225 while (Current != NULL) { 1226 Current->UnbreakableTailLength = UnbreakableTailLength; 1227 if (Current->CanBreakBefore || 1228 Current->isOneOf(tok::comment, tok::string_literal)) { 1229 UnbreakableTailLength = 0; 1230 } else { 1231 UnbreakableTailLength += 1232 Current->ColumnWidth + Current->SpacesRequiredBefore; 1233 } 1234 Current = Current->Previous; 1235 } 1236 } 1237 1238 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 1239 const FormatToken &Tok, 1240 bool InFunctionDecl) { 1241 const FormatToken &Left = *Tok.Previous; 1242 const FormatToken &Right = Tok; 1243 1244 if (Left.is(tok::semi)) 1245 return 0; 1246 if (Left.is(tok::comma)) 1247 return 1; 1248 if (Right.is(tok::l_square)) { 1249 if (Style.Language == FormatStyle::LK_Proto) 1250 return 1; 1251 if (Right.Type != TT_ObjCMethodExpr) 1252 return 250; 1253 } 1254 if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) { 1255 if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 1256 return 3; 1257 if (Left.Type == TT_StartOfName) 1258 return 20; 1259 if (InFunctionDecl && Right.NestingLevel == 0) 1260 return Style.PenaltyReturnTypeOnItsOwnLine; 1261 return 200; 1262 } 1263 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 1264 return 150; 1265 if (Left.Type == TT_CastRParen) 1266 return 100; 1267 if (Left.is(tok::coloncolon) || 1268 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) 1269 return 500; 1270 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 1271 return 5000; 1272 1273 if (Left.Type == TT_RangeBasedForLoopColon || 1274 Left.Type == TT_InheritanceColon) 1275 return 2; 1276 1277 if (Right.isMemberAccess()) { 1278 if (Left.is(tok::r_paren) && Left.MatchingParen && 1279 Left.MatchingParen->ParameterCount > 0) 1280 return 20; // Should be smaller than breaking at a nested comma. 1281 return 150; 1282 } 1283 1284 if (Right.Type == TT_TrailingAnnotation && Right.Next && 1285 Right.Next->isNot(tok::l_paren)) { 1286 // Generally, breaking before a trailing annotation is bad unless it is 1287 // function-like. It seems to be especially preferable to keep standard 1288 // annotations (i.e. "const", "final" and "override") on the same line. 1289 // Use a slightly higher penalty after ")" so that annotations like 1290 // "const override" are kept together. 1291 bool is_short_annotation = Right.TokenText.size() < 10; 1292 return (Left.is(tok::r_paren) ? 100 : 120) + 1293 (is_short_annotation ? 50 : 0); 1294 } 1295 1296 // In for-loops, prefer breaking at ',' and ';'. 1297 if (Line.First->is(tok::kw_for) && Left.is(tok::equal)) 1298 return 4; 1299 1300 // In Objective-C method expressions, prefer breaking before "param:" over 1301 // breaking after it. 1302 if (Right.Type == TT_ObjCSelectorName) 1303 return 0; 1304 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1305 return Line.MightBeFunctionDecl ? 50 : 500; 1306 1307 if (Left.is(tok::l_paren) && InFunctionDecl) 1308 return 100; 1309 if (Left.is(tok::equal) && InFunctionDecl) 1310 return 110; 1311 if (Left.opensScope()) 1312 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter 1313 : 19; 1314 1315 if (Right.is(tok::lessless)) { 1316 if (Left.is(tok::string_literal)) { 1317 StringRef Content = Left.TokenText; 1318 if (Content.startswith("\"")) 1319 Content = Content.drop_front(1); 1320 if (Content.endswith("\"")) 1321 Content = Content.drop_back(1); 1322 Content = Content.trim(); 1323 if (Content.size() > 1 && 1324 (Content.back() == ':' || Content.back() == '=')) 1325 return 25; 1326 } 1327 return 1; // Breaking at a << is really cheap. 1328 } 1329 if (Left.Type == TT_ConditionalExpr) 1330 return prec::Conditional; 1331 prec::Level Level = Left.getPrecedence(); 1332 1333 if (Level != prec::Unknown) 1334 return Level; 1335 1336 return 3; 1337 } 1338 1339 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 1340 const FormatToken &Left, 1341 const FormatToken &Right) { 1342 if (Style.Language == FormatStyle::LK_Proto) { 1343 if (Right.is(tok::l_paren) && 1344 (Left.TokenText == "returns" || Left.TokenText == "option")) 1345 return true; 1346 } 1347 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && 1348 Left.Tok.getObjCKeywordID() == tok::objc_property) 1349 return true; 1350 if (Right.is(tok::hashhash)) 1351 return Left.is(tok::hash); 1352 if (Left.isOneOf(tok::hashhash, tok::hash)) 1353 return Right.is(tok::hash); 1354 if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) 1355 return Style.SpaceInEmptyParentheses; 1356 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) 1357 return (Right.Type == TT_CastRParen || 1358 (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen)) 1359 ? Style.SpacesInCStyleCastParentheses 1360 : Style.SpacesInParentheses; 1361 if (Style.SpacesInAngles && 1362 ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser))) 1363 return true; 1364 if (Right.isOneOf(tok::semi, tok::comma)) 1365 return false; 1366 if (Right.is(tok::less) && 1367 (Left.is(tok::kw_template) || 1368 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1369 return true; 1370 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1371 return false; 1372 if (Left.isOneOf(tok::exclaim, tok::tilde)) 1373 return false; 1374 if (Left.is(tok::at) && 1375 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 1376 tok::numeric_constant, tok::l_paren, tok::l_brace, 1377 tok::kw_true, tok::kw_false)) 1378 return false; 1379 if (Left.is(tok::coloncolon)) 1380 return false; 1381 if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) 1382 return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) || 1383 !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren, 1384 tok::r_paren, tok::less); 1385 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 1386 return false; 1387 if (Right.is(tok::ellipsis)) 1388 return Left.Tok.isLiteral(); 1389 if (Left.is(tok::l_square) && Right.is(tok::amp)) 1390 return false; 1391 if (Right.Type == TT_PointerOrReference) 1392 return Left.Tok.isLiteral() || 1393 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 1394 !Style.PointerBindsToType); 1395 if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) && 1396 (Left.Type != TT_PointerOrReference || Style.PointerBindsToType)) 1397 return true; 1398 if (Left.Type == TT_PointerOrReference) 1399 return Right.Tok.isLiteral() || Right.Type == TT_BlockComment || 1400 ((Right.Type != TT_PointerOrReference) && 1401 Right.isNot(tok::l_paren) && Style.PointerBindsToType && 1402 Left.Previous && 1403 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); 1404 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1405 return false; 1406 if (Left.is(tok::l_square)) 1407 return Left.Type == TT_ArrayInitializerLSquare && 1408 Style.SpacesInContainerLiterals && Right.isNot(tok::r_square); 1409 if (Right.is(tok::r_square)) 1410 return Right.MatchingParen && Style.SpacesInContainerLiterals && 1411 Right.MatchingParen->Type == TT_ArrayInitializerLSquare; 1412 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr && 1413 Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant)) 1414 return false; 1415 if (Left.is(tok::colon)) 1416 return Left.Type != TT_ObjCMethodExpr; 1417 if (Right.is(tok::l_paren)) { 1418 if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen) 1419 return true; 1420 return Line.Type == LT_ObjCDecl || 1421 Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete, 1422 tok::semi) || 1423 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && 1424 (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, 1425 tok::kw_switch, tok::kw_catch) || 1426 Left.IsForEachMacro)) || 1427 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && 1428 Left.isOneOf(tok::identifier, tok::kw___attribute) && 1429 Line.Type != LT_PreprocessorDirective); 1430 } 1431 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1432 return false; 1433 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1434 return !Left.Children.empty(); // No spaces in "{}". 1435 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || 1436 (Right.is(tok::r_brace) && Right.MatchingParen && 1437 Right.MatchingParen->BlockKind != BK_Block)) 1438 return !Style.Cpp11BracedListStyle; 1439 if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/")) 1440 return false; 1441 if (Right.Type == TT_UnaryOperator) 1442 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && 1443 (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr); 1444 if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) && 1445 Right.is(tok::l_brace) && Right.getNextNonComment() && 1446 Right.BlockKind != BK_Block) 1447 return false; 1448 if (Left.is(tok::period) || Right.is(tok::period)) 1449 return false; 1450 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") 1451 return false; 1452 return true; 1453 } 1454 1455 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1456 const FormatToken &Tok) { 1457 if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo()) 1458 return true; // Never ever merge two identifiers. 1459 if (Tok.Previous->Type == TT_ImplicitStringLiteral) 1460 return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd(); 1461 if (Line.Type == LT_ObjCMethodDecl) { 1462 if (Tok.Previous->Type == TT_ObjCMethodSpecifier) 1463 return true; 1464 if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier)) 1465 // Don't space between ')' and <id> 1466 return false; 1467 } 1468 if (Line.Type == LT_ObjCProperty && 1469 (Tok.is(tok::equal) || Tok.Previous->is(tok::equal))) 1470 return false; 1471 1472 if (Tok.Type == TT_TrailingReturnArrow || 1473 Tok.Previous->Type == TT_TrailingReturnArrow) 1474 return true; 1475 if (Tok.Previous->is(tok::comma)) 1476 return true; 1477 if (Tok.is(tok::comma)) 1478 return false; 1479 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1480 return true; 1481 if (Tok.Previous->Tok.is(tok::kw_operator)) 1482 return Tok.is(tok::coloncolon); 1483 if (Tok.Type == TT_OverloadedOperatorLParen) 1484 return false; 1485 if (Tok.is(tok::colon)) 1486 return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && 1487 Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr && 1488 !Tok.Previous->is(tok::question) && 1489 (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals); 1490 if (Tok.Previous->Type == TT_UnaryOperator || 1491 Tok.Previous->Type == TT_CastRParen) 1492 return Tok.Type == TT_BinaryOperator; 1493 if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) { 1494 return Tok.Type == TT_TemplateCloser && 1495 Tok.Previous->Type == TT_TemplateCloser && 1496 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); 1497 } 1498 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || 1499 Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar)) 1500 return false; 1501 if (!Style.SpaceBeforeAssignmentOperators && 1502 Tok.getPrecedence() == prec::Assignment) 1503 return false; 1504 if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) || 1505 Tok.Previous->Type == TT_BinaryOperator || 1506 Tok.Previous->Type == TT_ConditionalExpr) 1507 return true; 1508 if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1509 return false; 1510 if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) && 1511 Line.First->is(tok::hash)) 1512 return true; 1513 if (Tok.Type == TT_TrailingUnaryOperator) 1514 return false; 1515 return spaceRequiredBetween(Line, *Tok.Previous, Tok); 1516 } 1517 1518 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, 1519 const FormatToken &Right) { 1520 const FormatToken &Left = *Right.Previous; 1521 if (Right.is(tok::comment)) { 1522 return Right.Previous->BlockKind != BK_BracedInit && 1523 Right.Previous->Type != TT_CtorInitializerColon && 1524 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); 1525 } else if (Right.Previous->isTrailingComment() || 1526 (Right.isStringLiteral() && Right.Previous->isStringLiteral())) { 1527 return true; 1528 } else if (Right.Previous->IsUnterminatedLiteral) { 1529 return true; 1530 } else if (Right.is(tok::lessless) && Right.Next && 1531 Right.Previous->is(tok::string_literal) && 1532 Right.Next->is(tok::string_literal)) { 1533 return true; 1534 } else if (Right.Previous->ClosesTemplateDeclaration && 1535 Right.Previous->MatchingParen && 1536 Right.Previous->MatchingParen->NestingLevel == 0 && 1537 Style.AlwaysBreakTemplateDeclarations) { 1538 return true; 1539 } else if ((Right.Type == TT_CtorInitializerComma || 1540 Right.Type == TT_CtorInitializerColon) && 1541 Style.BreakConstructorInitializersBeforeComma && 1542 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) { 1543 return true; 1544 } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) { 1545 return Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1546 Style.BreakBeforeBraces == FormatStyle::BS_GNU; 1547 } else if (Right.is(tok::string_literal) && 1548 Right.TokenText.startswith("R\"")) { 1549 // Raw string literals are special wrt. line breaks. The author has made a 1550 // deliberate choice and might have aligned the contents of the string 1551 // literal accordingly. Thus, we try keep existing line breaks. 1552 return Right.NewlinesBefore > 0; 1553 } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && 1554 Style.Language == FormatStyle::LK_Proto) { 1555 // Don't enums onto single lines in protocol buffers. 1556 return true; 1557 } 1558 1559 // If the last token before a '}' is a comma or a comment, the intention is to 1560 // insert a line break after it in order to make shuffling around entries 1561 // easier. 1562 const FormatToken *BeforeClosingBrace = nullptr; 1563 if (Left.is(tok::l_brace) && Left.MatchingParen) 1564 BeforeClosingBrace = Left.MatchingParen->Previous; 1565 else if (Right.is(tok::r_brace)) 1566 BeforeClosingBrace = Right.Previous; 1567 if (BeforeClosingBrace && 1568 BeforeClosingBrace->isOneOf(tok::comma, tok::comment)) 1569 return true; 1570 1571 return false; 1572 } 1573 1574 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1575 const FormatToken &Right) { 1576 const FormatToken &Left = *Right.Previous; 1577 if (Left.is(tok::at)) 1578 return false; 1579 if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) 1580 return true; 1581 if (Right.isTrailingComment()) 1582 // We rely on MustBreakBefore being set correctly here as we should not 1583 // change the "binding" behavior of a comment. 1584 // The first comment in a braced lists is always interpreted as belonging to 1585 // the first list element. Otherwise, it should be placed outside of the 1586 // list. 1587 return Left.BlockKind == BK_BracedInit; 1588 if (Left.is(tok::question) && Right.is(tok::colon)) 1589 return false; 1590 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1591 return Style.BreakBeforeTernaryOperators; 1592 if (Left.Type == TT_ConditionalExpr || Left.is(tok::question)) 1593 return !Style.BreakBeforeTernaryOperators; 1594 if (Right.is(tok::colon) && 1595 (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr)) 1596 return false; 1597 if (Right.Type == TT_InheritanceColon) 1598 return true; 1599 if (Left.is(tok::colon) && 1600 (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr)) 1601 return true; 1602 if (Right.Type == TT_ObjCSelectorName) 1603 return true; 1604 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) 1605 return true; 1606 if (Left.ClosesTemplateDeclaration) 1607 return true; 1608 if (Right.Type == TT_RangeBasedForLoopColon || 1609 Right.Type == TT_OverloadedOperatorLParen || 1610 Right.Type == TT_OverloadedOperator) 1611 return false; 1612 if (Left.Type == TT_RangeBasedForLoopColon) 1613 return true; 1614 if (Right.Type == TT_RangeBasedForLoopColon) 1615 return false; 1616 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1617 Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator)) 1618 return false; 1619 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1620 return false; 1621 if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen) 1622 return false; 1623 if (Left.is(tok::l_paren) && Left.Previous && 1624 (Left.Previous->Type == TT_BinaryOperator || 1625 Left.Previous->Type == TT_CastRParen)) 1626 return false; 1627 if (Right.Type == TT_ImplicitStringLiteral) 1628 return false; 1629 1630 if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser) 1631 return false; 1632 1633 // We only break before r_brace if there was a corresponding break before 1634 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1635 if (Right.is(tok::r_brace)) 1636 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; 1637 1638 // Allow breaking after a trailing annotation, e.g. after a method 1639 // declaration. 1640 if (Left.Type == TT_TrailingAnnotation) 1641 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, 1642 tok::less, tok::coloncolon); 1643 1644 if (Right.is(tok::kw___attribute)) 1645 return true; 1646 1647 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1648 return true; 1649 1650 if (Left.Type == TT_CtorInitializerComma && 1651 Style.BreakConstructorInitializersBeforeComma) 1652 return false; 1653 if (Right.Type == TT_CtorInitializerComma && 1654 Style.BreakConstructorInitializersBeforeComma) 1655 return true; 1656 if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators) 1657 return true; 1658 if (Left.is(tok::greater) && Right.is(tok::greater) && 1659 Left.Type != TT_TemplateCloser) 1660 return false; 1661 if (Left.Type == TT_ArrayInitializerLSquare) 1662 return true; 1663 return (Left.isBinaryOperator() && Left.isNot(tok::lessless) && 1664 !Style.BreakBeforeBinaryOperators) || 1665 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 1666 tok::kw_class, tok::kw_struct) || 1667 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon, 1668 tok::l_square, tok::at) || 1669 (Left.is(tok::r_paren) && 1670 Right.isOneOf(tok::identifier, tok::kw_const)) || 1671 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 1672 } 1673 1674 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 1675 llvm::errs() << "AnnotatedTokens:\n"; 1676 const FormatToken *Tok = Line.First; 1677 while (Tok) { 1678 llvm::errs() << " M=" << Tok->MustBreakBefore 1679 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type 1680 << " S=" << Tok->SpacesRequiredBefore 1681 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() 1682 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind 1683 << " FakeLParens="; 1684 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 1685 llvm::errs() << Tok->FakeLParens[i] << "/"; 1686 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; 1687 if (Tok->Next == NULL) 1688 assert(Tok == Line.Last); 1689 Tok = Tok->Next; 1690 } 1691 llvm::errs() << "----\n"; 1692 } 1693 1694 } // namespace format 1695 } // namespace clang 1696