1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "llvm/Support/Debug.h" 19 20 namespace clang { 21 namespace format { 22 23 namespace { 24 25 /// \brief A parser that gathers additional information about tokens. 26 /// 27 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 28 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 29 /// into template parameter lists. 30 class AnnotatingParser { 31 public: 32 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, 33 IdentifierInfo &Ident_in) 34 : Style(Style), Line(Line), CurrentToken(Line.First), 35 KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) { 36 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); 37 resetTokenMetadata(CurrentToken); 38 } 39 40 private: 41 bool parseAngle() { 42 if (CurrentToken == NULL) 43 return false; 44 ScopedContextCreator ContextCreator(*this, tok::less, 10); 45 FormatToken *Left = CurrentToken->Previous; 46 Contexts.back().IsExpression = false; 47 // If there's a template keyword before the opening angle bracket, this is a 48 // template parameter, not an argument. 49 Contexts.back().InTemplateArgument = 50 Left->Previous != NULL && Left->Previous->Tok.isNot(tok::kw_template); 51 52 while (CurrentToken != NULL) { 53 if (CurrentToken->is(tok::greater)) { 54 Left->MatchingParen = CurrentToken; 55 CurrentToken->MatchingParen = Left; 56 CurrentToken->Type = TT_TemplateCloser; 57 next(); 58 return true; 59 } 60 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 61 tok::question, tok::colon)) 62 return false; 63 // If a && or || is found and interpreted as a binary operator, this set 64 // of angles is likely part of something like "a < b && c > d". If the 65 // angles are inside an expression, the ||/&& might also be a binary 66 // operator that was misinterpreted because we are parsing template 67 // parameters. 68 // FIXME: This is getting out of hand, write a decent parser. 69 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && 70 ((CurrentToken->Previous->Type == TT_BinaryOperator && 71 // Toplevel bool expressions do not make lots of sense; 72 // If we're on the top level, it contains only the base context and 73 // the context for the current opening angle bracket. 74 Contexts.size() > 2) || 75 Contexts[Contexts.size() - 2].IsExpression) && 76 Line.First->isNot(tok::kw_template)) 77 return false; 78 updateParameterCount(Left, CurrentToken); 79 if (!consumeToken()) 80 return false; 81 } 82 return false; 83 } 84 85 bool parseParens(bool LookForDecls = false) { 86 if (CurrentToken == NULL) 87 return false; 88 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 89 90 // FIXME: This is a bit of a hack. Do better. 91 Contexts.back().ColonIsForRangeExpr = 92 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 93 94 bool StartsObjCMethodExpr = false; 95 FormatToken *Left = CurrentToken->Previous; 96 if (CurrentToken->is(tok::caret)) { 97 // (^ can start a block type. 98 Left->Type = TT_ObjCBlockLParen; 99 } else if (FormatToken *MaybeSel = Left->Previous) { 100 // @selector( starts a selector. 101 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && 102 MaybeSel->Previous->is(tok::at)) { 103 StartsObjCMethodExpr = true; 104 } 105 } 106 107 if (Left->Previous && 108 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, 109 tok::kw_while, tok::l_paren, tok::comma) || 110 Left->Previous->Type == TT_BinaryOperator)) { 111 // static_assert, if and while usually contain expressions. 112 Contexts.back().IsExpression = true; 113 } else if (Left->Previous && Left->Previous->is(tok::r_square) && 114 Left->Previous->MatchingParen && 115 Left->Previous->MatchingParen->Type == TT_LambdaLSquare) { 116 // This is a parameter list of a lambda expression. 117 Contexts.back().IsExpression = false; 118 } else if (Contexts[Contexts.size() - 2].CaretFound) { 119 // This is the parameter list of an ObjC block. 120 Contexts.back().IsExpression = false; 121 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { 122 Left->Type = TT_AttributeParen; 123 } else if (Left->Previous && Left->Previous->IsForEachMacro) { 124 // The first argument to a foreach macro is a declaration. 125 Contexts.back().IsForEachMacro = true; 126 Contexts.back().IsExpression = false; 127 } 128 129 if (StartsObjCMethodExpr) { 130 Contexts.back().ColonIsObjCMethodExpr = true; 131 Left->Type = TT_ObjCMethodExpr; 132 } 133 134 bool MightBeFunctionType = CurrentToken->is(tok::star); 135 bool HasMultipleLines = false; 136 bool HasMultipleParametersOnALine = false; 137 while (CurrentToken != NULL) { 138 // LookForDecls is set when "if (" has been seen. Check for 139 // 'identifier' '*' 'identifier' followed by not '=' -- this 140 // '*' has to be a binary operator but determineStarAmpUsage() will 141 // categorize it as an unary operator, so set the right type here. 142 if (LookForDecls && CurrentToken->Next) { 143 FormatToken *Prev = CurrentToken->getPreviousNonComment(); 144 if (Prev) { 145 FormatToken *PrevPrev = Prev->getPreviousNonComment(); 146 FormatToken *Next = CurrentToken->Next; 147 if (PrevPrev && PrevPrev->is(tok::identifier) && 148 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && 149 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { 150 Prev->Type = TT_BinaryOperator; 151 LookForDecls = false; 152 } 153 } 154 } 155 156 if (CurrentToken->Previous->Type == TT_PointerOrReference && 157 CurrentToken->Previous->Previous->isOneOf(tok::l_paren, 158 tok::coloncolon)) 159 MightBeFunctionType = true; 160 if (CurrentToken->is(tok::r_paren)) { 161 if (MightBeFunctionType && CurrentToken->Next && 162 (CurrentToken->Next->is(tok::l_paren) || 163 (CurrentToken->Next->is(tok::l_square) && 164 !Contexts.back().IsExpression))) 165 Left->Type = TT_FunctionTypeLParen; 166 Left->MatchingParen = CurrentToken; 167 CurrentToken->MatchingParen = Left; 168 169 if (StartsObjCMethodExpr) { 170 CurrentToken->Type = TT_ObjCMethodExpr; 171 if (Contexts.back().FirstObjCSelectorName != NULL) { 172 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 173 Contexts.back().LongestObjCSelectorName; 174 } 175 } 176 177 if (Left->Type == TT_AttributeParen) 178 CurrentToken->Type = TT_AttributeParen; 179 180 if (!HasMultipleLines) 181 Left->PackingKind = PPK_Inconclusive; 182 else if (HasMultipleParametersOnALine) 183 Left->PackingKind = PPK_BinPacked; 184 else 185 Left->PackingKind = PPK_OnePerLine; 186 187 next(); 188 return true; 189 } 190 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 191 return false; 192 else if (CurrentToken->is(tok::l_brace)) 193 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen 194 updateParameterCount(Left, CurrentToken); 195 if (CurrentToken->is(tok::comma) && CurrentToken->Next && 196 !CurrentToken->Next->HasUnescapedNewline && 197 !CurrentToken->Next->isTrailingComment()) 198 HasMultipleParametersOnALine = true; 199 if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || 200 CurrentToken->isSimpleTypeSpecifier()) 201 Contexts.back().IsExpression = false; 202 if (!consumeToken()) 203 return false; 204 if (CurrentToken && CurrentToken->HasUnescapedNewline) 205 HasMultipleLines = true; 206 } 207 return false; 208 } 209 210 bool parseSquare() { 211 if (!CurrentToken) 212 return false; 213 214 // A '[' could be an index subscript (after an identifier or after 215 // ')' or ']'), it could be the start of an Objective-C method 216 // expression, or it could the the start of an Objective-C array literal. 217 FormatToken *Left = CurrentToken->Previous; 218 FormatToken *Parent = Left->getPreviousNonComment(); 219 bool StartsObjCMethodExpr = 220 Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare && 221 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 222 tok::kw_return, tok::kw_throw) || 223 Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || 224 Parent->Type == TT_CastRParen || 225 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); 226 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 227 Contexts.back().IsExpression = true; 228 bool ColonFound = false; 229 230 if (StartsObjCMethodExpr) { 231 Contexts.back().ColonIsObjCMethodExpr = true; 232 Left->Type = TT_ObjCMethodExpr; 233 } else if (Parent && Parent->is(tok::at)) { 234 Left->Type = TT_ArrayInitializerLSquare; 235 } else if (Left->Type == TT_Unknown) { 236 Left->Type = TT_ArraySubscriptLSquare; 237 } 238 239 while (CurrentToken != NULL) { 240 if (CurrentToken->is(tok::r_square)) { 241 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && 242 Left->Type == TT_ObjCMethodExpr) { 243 // An ObjC method call is rarely followed by an open parenthesis. 244 // FIXME: Do we incorrectly label ":" with this? 245 StartsObjCMethodExpr = false; 246 Left->Type = TT_Unknown; 247 } 248 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { 249 CurrentToken->Type = TT_ObjCMethodExpr; 250 // determineStarAmpUsage() thinks that '*' '[' is allocating an 251 // array of pointers, but if '[' starts a selector then '*' is a 252 // binary operator. 253 if (Parent != NULL && Parent->Type == TT_PointerOrReference) 254 Parent->Type = TT_BinaryOperator; 255 } 256 Left->MatchingParen = CurrentToken; 257 CurrentToken->MatchingParen = Left; 258 if (Contexts.back().FirstObjCSelectorName != NULL) { 259 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 260 Contexts.back().LongestObjCSelectorName; 261 if (Contexts.back().NumBlockParameters > 1) 262 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; 263 } 264 next(); 265 return true; 266 } 267 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 268 return false; 269 if (CurrentToken->is(tok::colon)) 270 ColonFound = true; 271 if (CurrentToken->is(tok::comma) && 272 Style.Language != FormatStyle::LK_Proto && 273 (Left->Type == TT_ArraySubscriptLSquare || 274 (Left->Type == TT_ObjCMethodExpr && !ColonFound))) 275 Left->Type = TT_ArrayInitializerLSquare; 276 updateParameterCount(Left, CurrentToken); 277 if (!consumeToken()) 278 return false; 279 } 280 return false; 281 } 282 283 bool parseBrace() { 284 if (CurrentToken != NULL) { 285 FormatToken *Left = CurrentToken->Previous; 286 287 if (Contexts.back().CaretFound) 288 Left->Type = TT_ObjCBlockLBrace; 289 Contexts.back().CaretFound = false; 290 291 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 292 Contexts.back().ColonIsDictLiteral = true; 293 294 while (CurrentToken != NULL) { 295 if (CurrentToken->is(tok::r_brace)) { 296 Left->MatchingParen = CurrentToken; 297 CurrentToken->MatchingParen = Left; 298 next(); 299 return true; 300 } 301 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 302 return false; 303 updateParameterCount(Left, CurrentToken); 304 if (CurrentToken->is(tok::colon) && 305 Style.Language != FormatStyle::LK_Proto) 306 Left->Type = TT_DictLiteral; 307 if (!consumeToken()) 308 return false; 309 } 310 } 311 // No closing "}" found, this probably starts a definition. 312 Line.StartsDefinition = true; 313 return true; 314 } 315 316 void updateParameterCount(FormatToken *Left, FormatToken *Current) { 317 if (Current->is(tok::comma)) { 318 ++Left->ParameterCount; 319 if (!Left->Role) 320 Left->Role.reset(new CommaSeparatedList(Style)); 321 Left->Role->CommaFound(Current); 322 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { 323 Left->ParameterCount = 1; 324 } 325 } 326 327 bool parseConditional() { 328 while (CurrentToken != NULL) { 329 if (CurrentToken->is(tok::colon)) { 330 CurrentToken->Type = TT_ConditionalExpr; 331 next(); 332 return true; 333 } 334 if (!consumeToken()) 335 return false; 336 } 337 return false; 338 } 339 340 bool parseTemplateDeclaration() { 341 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 342 CurrentToken->Type = TT_TemplateOpener; 343 next(); 344 if (!parseAngle()) 345 return false; 346 if (CurrentToken != NULL) 347 CurrentToken->Previous->ClosesTemplateDeclaration = true; 348 return true; 349 } 350 return false; 351 } 352 353 bool consumeToken() { 354 FormatToken *Tok = CurrentToken; 355 next(); 356 switch (Tok->Tok.getKind()) { 357 case tok::plus: 358 case tok::minus: 359 if (Tok->Previous == NULL && Line.MustBeDeclaration) 360 Tok->Type = TT_ObjCMethodSpecifier; 361 break; 362 case tok::colon: 363 if (Tok->Previous == NULL) 364 return false; 365 // Colons from ?: are handled in parseConditional(). 366 if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) { 367 Tok->Type = TT_CtorInitializerColon; 368 } else if (Contexts.back().ColonIsDictLiteral) { 369 Tok->Type = TT_DictLiteral; 370 } else if (Contexts.back().ColonIsObjCMethodExpr || 371 Line.First->Type == TT_ObjCMethodSpecifier) { 372 Tok->Type = TT_ObjCMethodExpr; 373 Tok->Previous->Type = TT_ObjCSelectorName; 374 if (Tok->Previous->ColumnWidth > 375 Contexts.back().LongestObjCSelectorName) { 376 Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; 377 } 378 if (Contexts.back().FirstObjCSelectorName == NULL) 379 Contexts.back().FirstObjCSelectorName = Tok->Previous; 380 } else if (Contexts.back().ColonIsForRangeExpr) { 381 Tok->Type = TT_RangeBasedForLoopColon; 382 } else if (CurrentToken != NULL && 383 CurrentToken->is(tok::numeric_constant)) { 384 Tok->Type = TT_BitFieldColon; 385 } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) { 386 Tok->Type = TT_InheritanceColon; 387 } else if (Contexts.back().ContextKind == tok::l_paren) { 388 Tok->Type = TT_InlineASMColon; 389 } 390 break; 391 case tok::kw_if: 392 case tok::kw_while: 393 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 394 next(); 395 if (!parseParens(/*LookForDecls=*/true)) 396 return false; 397 } 398 break; 399 case tok::kw_for: 400 Contexts.back().ColonIsForRangeExpr = true; 401 next(); 402 if (!parseParens()) 403 return false; 404 break; 405 case tok::l_paren: 406 if (!parseParens()) 407 return false; 408 if (Line.MustBeDeclaration && Contexts.size() == 1 && 409 !Contexts.back().IsExpression && Line.First->Type != TT_ObjCProperty) 410 Line.MightBeFunctionDecl = true; 411 break; 412 case tok::l_square: 413 if (!parseSquare()) 414 return false; 415 break; 416 case tok::l_brace: 417 if (!parseBrace()) 418 return false; 419 break; 420 case tok::less: 421 if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle()) 422 Tok->Type = TT_TemplateOpener; 423 else { 424 Tok->Type = TT_BinaryOperator; 425 CurrentToken = Tok; 426 next(); 427 } 428 break; 429 case tok::r_paren: 430 case tok::r_square: 431 return false; 432 case tok::r_brace: 433 // Lines can start with '}'. 434 if (Tok->Previous != NULL) 435 return false; 436 break; 437 case tok::greater: 438 Tok->Type = TT_BinaryOperator; 439 break; 440 case tok::kw_operator: 441 while (CurrentToken && 442 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { 443 if (CurrentToken->isOneOf(tok::star, tok::amp)) 444 CurrentToken->Type = TT_PointerOrReference; 445 consumeToken(); 446 if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator) 447 CurrentToken->Previous->Type = TT_OverloadedOperator; 448 } 449 if (CurrentToken) { 450 CurrentToken->Type = TT_OverloadedOperatorLParen; 451 if (CurrentToken->Previous->Type == TT_BinaryOperator) 452 CurrentToken->Previous->Type = TT_OverloadedOperator; 453 } 454 break; 455 case tok::question: 456 parseConditional(); 457 break; 458 case tok::kw_template: 459 parseTemplateDeclaration(); 460 break; 461 case tok::identifier: 462 if (Line.First->is(tok::kw_for) && 463 Tok->Tok.getIdentifierInfo() == &Ident_in) 464 Tok->Type = TT_ObjCForIn; 465 break; 466 case tok::comma: 467 if (Contexts.back().FirstStartOfName) 468 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 469 if (Contexts.back().InCtorInitializer) 470 Tok->Type = TT_CtorInitializerComma; 471 if (Contexts.back().IsForEachMacro) 472 Contexts.back().IsExpression = true; 473 break; 474 default: 475 break; 476 } 477 return true; 478 } 479 480 void parseIncludeDirective() { 481 next(); 482 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 483 next(); 484 while (CurrentToken != NULL) { 485 if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) 486 CurrentToken->Type = TT_ImplicitStringLiteral; 487 next(); 488 } 489 } else { 490 while (CurrentToken != NULL) { 491 if (CurrentToken->is(tok::string_literal)) 492 // Mark these string literals as "implicit" literals, too, so that 493 // they are not split or line-wrapped. 494 CurrentToken->Type = TT_ImplicitStringLiteral; 495 next(); 496 } 497 } 498 } 499 500 void parseWarningOrError() { 501 next(); 502 // We still want to format the whitespace left of the first token of the 503 // warning or error. 504 next(); 505 while (CurrentToken != NULL) { 506 CurrentToken->Type = TT_ImplicitStringLiteral; 507 next(); 508 } 509 } 510 511 void parsePragma() { 512 next(); // Consume "pragma". 513 if (CurrentToken && CurrentToken->TokenText == "mark") { 514 next(); // Consume "mark". 515 next(); // Consume first token (so we fix leading whitespace). 516 while (CurrentToken != NULL) { 517 CurrentToken->Type = TT_ImplicitStringLiteral; 518 next(); 519 } 520 } 521 } 522 523 void parsePreprocessorDirective() { 524 next(); 525 if (CurrentToken == NULL) 526 return; 527 if (CurrentToken->Tok.is(tok::numeric_constant)) { 528 CurrentToken->SpacesRequiredBefore = 1; 529 return; 530 } 531 // Hashes in the middle of a line can lead to any strange token 532 // sequence. 533 if (CurrentToken->Tok.getIdentifierInfo() == NULL) 534 return; 535 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { 536 case tok::pp_include: 537 case tok::pp_import: 538 parseIncludeDirective(); 539 break; 540 case tok::pp_error: 541 case tok::pp_warning: 542 parseWarningOrError(); 543 break; 544 case tok::pp_pragma: 545 parsePragma(); 546 break; 547 case tok::pp_if: 548 case tok::pp_elif: 549 Contexts.back().IsExpression = true; 550 parseLine(); 551 break; 552 default: 553 break; 554 } 555 while (CurrentToken != NULL) 556 next(); 557 } 558 559 public: 560 LineType parseLine() { 561 if (CurrentToken->is(tok::hash)) { 562 parsePreprocessorDirective(); 563 return LT_PreprocessorDirective; 564 } 565 566 // Directly allow to 'import <string-literal>' to support protocol buffer 567 // definitions (code.google.com/p/protobuf) or missing "#" (either way we 568 // should not break the line). 569 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); 570 if (Info && Info->getPPKeywordID() == tok::pp_import && 571 CurrentToken->Next && CurrentToken->Next->is(tok::string_literal)) 572 parseIncludeDirective(); 573 574 while (CurrentToken != NULL) { 575 if (CurrentToken->is(tok::kw_virtual)) 576 KeywordVirtualFound = true; 577 if (!consumeToken()) 578 return LT_Invalid; 579 } 580 if (KeywordVirtualFound) 581 return LT_VirtualFunctionDecl; 582 583 if (Line.First->Type == TT_ObjCMethodSpecifier) { 584 if (Contexts.back().FirstObjCSelectorName != NULL) 585 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 586 Contexts.back().LongestObjCSelectorName; 587 return LT_ObjCMethodDecl; 588 } 589 590 return LT_Other; 591 } 592 593 private: 594 void resetTokenMetadata(FormatToken *Token) { 595 if (Token == nullptr) return; 596 597 // Reset token type in case we have already looked at it and then 598 // recovered from an error (e.g. failure to find the matching >). 599 if (CurrentToken->Type != TT_LambdaLSquare && 600 CurrentToken->Type != TT_FunctionLBrace && 601 CurrentToken->Type != TT_ImplicitStringLiteral && 602 CurrentToken->Type != TT_TrailingReturnArrow) 603 CurrentToken->Type = TT_Unknown; 604 if (CurrentToken->Role) 605 CurrentToken->Role.reset(NULL); 606 CurrentToken->FakeLParens.clear(); 607 CurrentToken->FakeRParens = 0; 608 } 609 610 void next() { 611 if (CurrentToken != NULL) { 612 determineTokenType(*CurrentToken); 613 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 614 CurrentToken->NestingLevel = Contexts.size() - 1; 615 } 616 617 if (CurrentToken != NULL) 618 CurrentToken = CurrentToken->Next; 619 620 resetTokenMetadata(CurrentToken); 621 } 622 623 /// \brief A struct to hold information valid in a specific context, e.g. 624 /// a pair of parenthesis. 625 struct Context { 626 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 627 bool IsExpression) 628 : ContextKind(ContextKind), BindingStrength(BindingStrength), 629 LongestObjCSelectorName(0), NumBlockParameters(0), 630 ColonIsForRangeExpr(false), ColonIsDictLiteral(false), 631 ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL), 632 FirstStartOfName(NULL), IsExpression(IsExpression), 633 CanBeExpression(true), InTemplateArgument(false), 634 InCtorInitializer(false), CaretFound(false), IsForEachMacro(false) {} 635 636 tok::TokenKind ContextKind; 637 unsigned BindingStrength; 638 unsigned LongestObjCSelectorName; 639 unsigned NumBlockParameters; 640 bool ColonIsForRangeExpr; 641 bool ColonIsDictLiteral; 642 bool ColonIsObjCMethodExpr; 643 FormatToken *FirstObjCSelectorName; 644 FormatToken *FirstStartOfName; 645 bool IsExpression; 646 bool CanBeExpression; 647 bool InTemplateArgument; 648 bool InCtorInitializer; 649 bool CaretFound; 650 bool IsForEachMacro; 651 }; 652 653 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 654 /// of each instance. 655 struct ScopedContextCreator { 656 AnnotatingParser &P; 657 658 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 659 unsigned Increase) 660 : P(P) { 661 P.Contexts.push_back(Context(ContextKind, 662 P.Contexts.back().BindingStrength + Increase, 663 P.Contexts.back().IsExpression)); 664 } 665 666 ~ScopedContextCreator() { P.Contexts.pop_back(); } 667 }; 668 669 void determineTokenType(FormatToken &Current) { 670 if (Current.getPrecedence() == prec::Assignment && 671 !Line.First->isOneOf(tok::kw_template, tok::kw_using) && 672 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { 673 Contexts.back().IsExpression = true; 674 for (FormatToken *Previous = Current.Previous; 675 Previous && !Previous->isOneOf(tok::comma, tok::semi); 676 Previous = Previous->Previous) { 677 if (Previous->is(tok::r_square)) 678 Previous = Previous->MatchingParen; 679 if (Previous->Type == TT_BinaryOperator && 680 Previous->isOneOf(tok::star, tok::amp)) { 681 Previous->Type = TT_PointerOrReference; 682 } 683 } 684 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { 685 Contexts.back().IsExpression = true; 686 } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 687 !Line.InPPDirective) { 688 bool ParametersOfFunctionType = 689 Current.Previous && Current.Previous->is(tok::r_paren) && 690 Current.Previous->MatchingParen && 691 Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen; 692 bool IsForOrCatch = Current.Previous && 693 Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); 694 Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; 695 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 696 for (FormatToken *Previous = Current.Previous; 697 Previous && Previous->isOneOf(tok::star, tok::amp); 698 Previous = Previous->Previous) 699 Previous->Type = TT_PointerOrReference; 700 } else if (Current.Previous && 701 Current.Previous->Type == TT_CtorInitializerColon) { 702 Contexts.back().IsExpression = true; 703 Contexts.back().InCtorInitializer = true; 704 } else if (Current.is(tok::kw_new)) { 705 Contexts.back().CanBeExpression = false; 706 } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) { 707 // This should be the condition or increment in a for-loop. 708 Contexts.back().IsExpression = true; 709 } 710 711 if (Current.Type == TT_Unknown) { 712 // Line.MightBeFunctionDecl can only be true after the parentheses of a 713 // function declaration have been found. In this case, 'Current' is a 714 // trailing token of this declaration and thus cannot be a name. 715 if (isStartOfName(Current) && !Line.MightBeFunctionDecl) { 716 Contexts.back().FirstStartOfName = &Current; 717 Current.Type = TT_StartOfName; 718 } else if (Current.is(tok::kw_auto)) { 719 AutoFound = true; 720 } else if (Current.is(tok::arrow) && AutoFound && 721 Line.MustBeDeclaration) { 722 Current.Type = TT_TrailingReturnArrow; 723 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 724 Current.Type = 725 determineStarAmpUsage(Current, Contexts.back().CanBeExpression && 726 Contexts.back().IsExpression, 727 Contexts.back().InTemplateArgument); 728 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 729 Current.Type = determinePlusMinusCaretUsage(Current); 730 if (Current.Type == TT_UnaryOperator) { 731 ++Contexts.back().NumBlockParameters; 732 if (Current.is(tok::caret)) 733 Contexts.back().CaretFound = true; 734 } 735 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 736 Current.Type = determineIncrementUsage(Current); 737 } else if (Current.is(tok::exclaim)) { 738 Current.Type = TT_UnaryOperator; 739 } else if (Current.isBinaryOperator() && 740 (!Current.Previous || 741 Current.Previous->isNot(tok::l_square))) { 742 Current.Type = TT_BinaryOperator; 743 } else if (Current.is(tok::comment)) { 744 if (Current.TokenText.startswith("//")) 745 Current.Type = TT_LineComment; 746 else 747 Current.Type = TT_BlockComment; 748 } else if (Current.is(tok::r_paren)) { 749 FormatToken *LeftOfParens = NULL; 750 if (Current.MatchingParen) 751 LeftOfParens = Current.MatchingParen->getPreviousNonComment(); 752 bool IsCast = false; 753 bool ParensAreEmpty = Current.Previous == Current.MatchingParen; 754 bool ParensAreType = !Current.Previous || 755 Current.Previous->Type == TT_PointerOrReference || 756 Current.Previous->Type == TT_TemplateCloser || 757 Current.Previous->isSimpleTypeSpecifier(); 758 bool ParensCouldEndDecl = 759 Current.Next && 760 Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); 761 bool IsSizeOfOrAlignOf = 762 LeftOfParens && 763 LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); 764 if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 765 ((Contexts.size() > 1 && 766 Contexts[Contexts.size() - 2].IsExpression) || 767 (Current.Next && Current.Next->isBinaryOperator()))) 768 IsCast = true; 769 if (Current.Next && Current.Next->isNot(tok::string_literal) && 770 (Current.Next->Tok.isLiteral() || 771 Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) 772 IsCast = true; 773 // If there is an identifier after the (), it is likely a cast, unless 774 // there is also an identifier before the (). 775 if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL || 776 LeftOfParens->is(tok::kw_return)) && 777 LeftOfParens->Type != TT_OverloadedOperator && 778 LeftOfParens->isNot(tok::at) && 779 LeftOfParens->Type != TT_TemplateCloser && Current.Next && 780 Current.Next->is(tok::identifier)) 781 IsCast = true; 782 if (IsCast && !ParensAreEmpty) 783 Current.Type = TT_CastRParen; 784 } else if (Current.is(tok::at) && Current.Next) { 785 switch (Current.Next->Tok.getObjCKeywordID()) { 786 case tok::objc_interface: 787 case tok::objc_implementation: 788 case tok::objc_protocol: 789 Current.Type = TT_ObjCDecl; 790 break; 791 case tok::objc_property: 792 Current.Type = TT_ObjCProperty; 793 break; 794 default: 795 break; 796 } 797 } else if (Current.is(tok::period)) { 798 FormatToken *PreviousNoComment = Current.getPreviousNonComment(); 799 if (PreviousNoComment && 800 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) 801 Current.Type = TT_DesignatedInitializerPeriod; 802 } else if (Current.isOneOf(tok::identifier, tok::kw_const) && 803 Line.MightBeFunctionDecl && Contexts.size() == 1) { 804 // Line.MightBeFunctionDecl can only be true after the parentheses of a 805 // function declaration have been found. 806 Current.Type = TT_TrailingAnnotation; 807 } 808 } 809 } 810 811 /// \brief Take a guess at whether \p Tok starts a name of a function or 812 /// variable declaration. 813 /// 814 /// This is a heuristic based on whether \p Tok is an identifier following 815 /// something that is likely a type. 816 bool isStartOfName(const FormatToken &Tok) { 817 if (Tok.isNot(tok::identifier) || Tok.Previous == NULL) 818 return false; 819 820 // Skip "const" as it does not have an influence on whether this is a name. 821 FormatToken *PreviousNotConst = Tok.Previous; 822 while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const)) 823 PreviousNotConst = PreviousNotConst->Previous; 824 825 if (PreviousNotConst == NULL) 826 return false; 827 828 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && 829 PreviousNotConst->Previous && 830 PreviousNotConst->Previous->is(tok::hash); 831 832 if (PreviousNotConst->Type == TT_TemplateCloser) 833 return PreviousNotConst && PreviousNotConst->MatchingParen && 834 PreviousNotConst->MatchingParen->Previous && 835 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); 836 837 return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || 838 PreviousNotConst->Type == TT_PointerOrReference || 839 PreviousNotConst->isSimpleTypeSpecifier(); 840 } 841 842 /// \brief Return the type of the given token assuming it is * or &. 843 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, 844 bool InTemplateArgument) { 845 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 846 if (PrevToken == NULL) 847 return TT_UnaryOperator; 848 849 const FormatToken *NextToken = Tok.getNextNonComment(); 850 if (NextToken == NULL) 851 return TT_Unknown; 852 853 if (PrevToken->is(tok::coloncolon) || 854 (PrevToken->is(tok::l_paren) && !IsExpression)) 855 return TT_PointerOrReference; 856 857 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 858 tok::comma, tok::semi, tok::kw_return, tok::colon, 859 tok::equal, tok::kw_delete, tok::kw_sizeof) || 860 PrevToken->Type == TT_BinaryOperator || 861 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 862 return TT_UnaryOperator; 863 864 if (NextToken->is(tok::l_square)) 865 return TT_PointerOrReference; 866 867 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && 868 PrevToken->MatchingParen->Previous && 869 PrevToken->MatchingParen->Previous->is(tok::kw_typeof)) 870 return TT_PointerOrReference; 871 872 if (PrevToken->Tok.isLiteral() || 873 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, 874 tok::kw_false) || 875 NextToken->Tok.isLiteral() || 876 NextToken->isOneOf(tok::kw_true, tok::kw_false) || 877 NextToken->isUnaryOperator() || 878 // If we know we're in a template argument, there are no named 879 // declarations. Thus, having an identifier on the right-hand side 880 // indicates a binary operator. 881 (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) 882 return TT_BinaryOperator; 883 884 // It is very unlikely that we are going to find a pointer or reference type 885 // definition on the RHS of an assignment. 886 if (IsExpression) 887 return TT_BinaryOperator; 888 889 return TT_PointerOrReference; 890 } 891 892 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { 893 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 894 if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) 895 return TT_UnaryOperator; 896 897 // Use heuristics to recognize unary operators. 898 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 899 tok::question, tok::colon, tok::kw_return, 900 tok::kw_case, tok::at, tok::l_brace)) 901 return TT_UnaryOperator; 902 903 // There can't be two consecutive binary operators. 904 if (PrevToken->Type == TT_BinaryOperator) 905 return TT_UnaryOperator; 906 907 // Fall back to marking the token as binary operator. 908 return TT_BinaryOperator; 909 } 910 911 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 912 TokenType determineIncrementUsage(const FormatToken &Tok) { 913 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 914 if (PrevToken == NULL || PrevToken->Type == TT_CastRParen) 915 return TT_UnaryOperator; 916 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 917 return TT_TrailingUnaryOperator; 918 919 return TT_UnaryOperator; 920 } 921 922 923 SmallVector<Context, 8> Contexts; 924 925 const FormatStyle &Style; 926 AnnotatedLine &Line; 927 FormatToken *CurrentToken; 928 bool KeywordVirtualFound; 929 bool AutoFound; 930 IdentifierInfo &Ident_in; 931 }; 932 933 static int PrecedenceUnaryOperator = prec::PointerToMember + 1; 934 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 935 936 /// \brief Parses binary expressions by inserting fake parenthesis based on 937 /// operator precedence. 938 class ExpressionParser { 939 public: 940 ExpressionParser(AnnotatedLine &Line) : Current(Line.First) { 941 // Skip leading "}", e.g. in "} else if (...) {". 942 if (Current->is(tok::r_brace)) 943 next(); 944 } 945 946 /// \brief Parse expressions with the given operatore precedence. 947 void parse(int Precedence = 0) { 948 // Skip 'return' and ObjC selector colons as they are not part of a binary 949 // expression. 950 while (Current && 951 (Current->is(tok::kw_return) || 952 (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr))) 953 next(); 954 955 if (Current == NULL || Precedence > PrecedenceArrowAndPeriod) 956 return; 957 958 // Conditional expressions need to be parsed separately for proper nesting. 959 if (Precedence == prec::Conditional) { 960 parseConditionalExpr(); 961 return; 962 } 963 964 // Parse unary operators, which all have a higher precedence than binary 965 // operators. 966 if (Precedence == PrecedenceUnaryOperator) { 967 parseUnaryOperator(); 968 return; 969 } 970 971 FormatToken *Start = Current; 972 FormatToken *LatestOperator = NULL; 973 974 while (Current) { 975 // Consume operators with higher precedence. 976 parse(Precedence + 1); 977 978 int CurrentPrecedence = getCurrentPrecedence(); 979 980 if (Current && Current->Type == TT_ObjCSelectorName && 981 Precedence == CurrentPrecedence) { 982 if (LatestOperator) 983 addFakeParenthesis(Start, prec::Level(Precedence)); 984 Start = Current; 985 } 986 987 // At the end of the line or when an operator with higher precedence is 988 // found, insert fake parenthesis and return. 989 if (Current == NULL || Current->closesScope() || 990 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) { 991 if (LatestOperator) { 992 if (Precedence == PrecedenceArrowAndPeriod) { 993 LatestOperator->LastInChainOfCalls = true; 994 // Call expressions don't have a binary operator precedence. 995 addFakeParenthesis(Start, prec::Unknown); 996 } else { 997 addFakeParenthesis(Start, prec::Level(Precedence)); 998 } 999 } 1000 return; 1001 } 1002 1003 // Consume scopes: (), [], <> and {} 1004 if (Current->opensScope()) { 1005 while (Current && !Current->closesScope()) { 1006 next(); 1007 parse(); 1008 } 1009 next(); 1010 } else { 1011 // Operator found. 1012 if (CurrentPrecedence == Precedence) 1013 LatestOperator = Current; 1014 1015 next(); 1016 } 1017 } 1018 } 1019 1020 private: 1021 /// \brief Gets the precedence (+1) of the given token for binary operators 1022 /// and other tokens that we treat like binary operators. 1023 int getCurrentPrecedence() { 1024 if (Current) { 1025 if (Current->Type == TT_ConditionalExpr) 1026 return prec::Conditional; 1027 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || 1028 Current->Type == TT_ObjCSelectorName) 1029 return 0; 1030 else if (Current->Type == TT_RangeBasedForLoopColon) 1031 return prec::Comma; 1032 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 1033 return Current->getPrecedence(); 1034 else if (Current->isOneOf(tok::period, tok::arrow)) 1035 return PrecedenceArrowAndPeriod; 1036 } 1037 return -1; 1038 } 1039 1040 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { 1041 Start->FakeLParens.push_back(Precedence); 1042 if (Precedence > prec::Unknown) 1043 Start->StartsBinaryExpression = true; 1044 if (Current) { 1045 ++Current->Previous->FakeRParens; 1046 if (Precedence > prec::Unknown) 1047 Current->Previous->EndsBinaryExpression = true; 1048 } 1049 } 1050 1051 /// \brief Parse unary operator expressions and surround them with fake 1052 /// parentheses if appropriate. 1053 void parseUnaryOperator() { 1054 if (Current == NULL || Current->Type != TT_UnaryOperator) { 1055 parse(PrecedenceArrowAndPeriod); 1056 return; 1057 } 1058 1059 FormatToken *Start = Current; 1060 next(); 1061 parseUnaryOperator(); 1062 1063 // The actual precedence doesn't matter. 1064 addFakeParenthesis(Start, prec::Unknown); 1065 } 1066 1067 void parseConditionalExpr() { 1068 FormatToken *Start = Current; 1069 parse(prec::LogicalOr); 1070 if (!Current || !Current->is(tok::question)) 1071 return; 1072 next(); 1073 parse(prec::LogicalOr); 1074 if (!Current || Current->Type != TT_ConditionalExpr) 1075 return; 1076 next(); 1077 parseConditionalExpr(); 1078 addFakeParenthesis(Start, prec::Conditional); 1079 } 1080 1081 void next() { 1082 if (Current) 1083 Current = Current->Next; 1084 while (Current && Current->isTrailingComment()) 1085 Current = Current->Next; 1086 } 1087 1088 FormatToken *Current; 1089 }; 1090 1091 } // end anonymous namespace 1092 1093 void 1094 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) { 1095 const AnnotatedLine *NextNonCommentLine = NULL; 1096 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), 1097 E = Lines.rend(); 1098 I != E; ++I) { 1099 if (NextNonCommentLine && (*I)->First->is(tok::comment) && 1100 (*I)->First->Next == NULL) 1101 (*I)->Level = NextNonCommentLine->Level; 1102 else 1103 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL; 1104 1105 setCommentLineLevels((*I)->Children); 1106 } 1107 } 1108 1109 void TokenAnnotator::annotate(AnnotatedLine &Line) { 1110 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1111 E = Line.Children.end(); 1112 I != E; ++I) { 1113 annotate(**I); 1114 } 1115 AnnotatingParser Parser(Style, Line, Ident_in); 1116 Line.Type = Parser.parseLine(); 1117 if (Line.Type == LT_Invalid) 1118 return; 1119 1120 ExpressionParser ExprParser(Line); 1121 ExprParser.parse(); 1122 1123 if (Line.First->Type == TT_ObjCMethodSpecifier) 1124 Line.Type = LT_ObjCMethodDecl; 1125 else if (Line.First->Type == TT_ObjCDecl) 1126 Line.Type = LT_ObjCDecl; 1127 else if (Line.First->Type == TT_ObjCProperty) 1128 Line.Type = LT_ObjCProperty; 1129 1130 Line.First->SpacesRequiredBefore = 1; 1131 Line.First->CanBreakBefore = Line.First->MustBreakBefore; 1132 } 1133 1134 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 1135 Line.First->TotalLength = 1136 Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; 1137 if (!Line.First->Next) 1138 return; 1139 FormatToken *Current = Line.First->Next; 1140 bool InFunctionDecl = Line.MightBeFunctionDecl; 1141 while (Current != NULL) { 1142 if (Current->Type == TT_LineComment) { 1143 if (Current->Previous->BlockKind == BK_BracedInit && 1144 Current->Previous->opensScope()) 1145 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; 1146 else 1147 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 1148 1149 // If we find a trailing comment, iterate backwards to determine whether 1150 // it seems to relate to a specific parameter. If so, break before that 1151 // parameter to avoid changing the comment's meaning. E.g. don't move 'b' 1152 // to the previous line in: 1153 // SomeFunction(a, 1154 // b, // comment 1155 // c); 1156 if (!Current->HasUnescapedNewline) { 1157 for (FormatToken *Parameter = Current->Previous; Parameter; 1158 Parameter = Parameter->Previous) { 1159 if (Parameter->isOneOf(tok::comment, tok::r_brace)) 1160 break; 1161 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { 1162 if (Parameter->Previous->Type != TT_CtorInitializerComma && 1163 Parameter->HasUnescapedNewline) 1164 Parameter->MustBreakBefore = true; 1165 break; 1166 } 1167 } 1168 } 1169 } else if (Current->SpacesRequiredBefore == 0 && 1170 spaceRequiredBefore(Line, *Current)) { 1171 Current->SpacesRequiredBefore = 1; 1172 } 1173 1174 Current->MustBreakBefore = 1175 Current->MustBreakBefore || mustBreakBefore(Line, *Current); 1176 1177 Current->CanBreakBefore = 1178 Current->MustBreakBefore || canBreakBefore(Line, *Current); 1179 if (Current->MustBreakBefore || !Current->Children.empty() || 1180 Current->IsMultiline) 1181 Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; 1182 else 1183 Current->TotalLength = Current->Previous->TotalLength + 1184 Current->ColumnWidth + 1185 Current->SpacesRequiredBefore; 1186 1187 if (Current->Type == TT_CtorInitializerColon) 1188 InFunctionDecl = false; 1189 1190 // FIXME: Only calculate this if CanBreakBefore is true once static 1191 // initializers etc. are sorted out. 1192 // FIXME: Move magic numbers to a better place. 1193 Current->SplitPenalty = 20 * Current->BindingStrength + 1194 splitPenalty(Line, *Current, InFunctionDecl); 1195 1196 Current = Current->Next; 1197 } 1198 1199 calculateUnbreakableTailLengths(Line); 1200 for (Current = Line.First; Current != NULL; Current = Current->Next) { 1201 if (Current->Role) 1202 Current->Role->precomputeFormattingInfos(Current); 1203 } 1204 1205 DEBUG({ printDebugInfo(Line); }); 1206 1207 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1208 E = Line.Children.end(); 1209 I != E; ++I) { 1210 calculateFormattingInformation(**I); 1211 } 1212 } 1213 1214 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { 1215 unsigned UnbreakableTailLength = 0; 1216 FormatToken *Current = Line.Last; 1217 while (Current != NULL) { 1218 Current->UnbreakableTailLength = UnbreakableTailLength; 1219 if (Current->CanBreakBefore || 1220 Current->isOneOf(tok::comment, tok::string_literal)) { 1221 UnbreakableTailLength = 0; 1222 } else { 1223 UnbreakableTailLength += 1224 Current->ColumnWidth + Current->SpacesRequiredBefore; 1225 } 1226 Current = Current->Previous; 1227 } 1228 } 1229 1230 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 1231 const FormatToken &Tok, 1232 bool InFunctionDecl) { 1233 const FormatToken &Left = *Tok.Previous; 1234 const FormatToken &Right = Tok; 1235 1236 if (Left.is(tok::semi)) 1237 return 0; 1238 if (Left.is(tok::comma)) 1239 return 1; 1240 if (Right.is(tok::l_square)) { 1241 if (Style.Language == FormatStyle::LK_Proto) 1242 return 1; 1243 if (Right.Type != TT_ObjCMethodExpr) 1244 return 250; 1245 } 1246 if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) { 1247 if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 1248 return 3; 1249 if (Left.Type == TT_StartOfName) 1250 return 20; 1251 if (InFunctionDecl && Right.NestingLevel == 0) 1252 return Style.PenaltyReturnTypeOnItsOwnLine; 1253 return 200; 1254 } 1255 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 1256 return 150; 1257 if (Left.Type == TT_CastRParen) 1258 return 100; 1259 if (Left.is(tok::coloncolon) || 1260 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) 1261 return 500; 1262 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 1263 return 5000; 1264 1265 if (Left.Type == TT_RangeBasedForLoopColon || 1266 Left.Type == TT_InheritanceColon) 1267 return 2; 1268 1269 if (Right.isMemberAccess()) { 1270 if (Left.is(tok::r_paren) && Left.MatchingParen && 1271 Left.MatchingParen->ParameterCount > 0) 1272 return 20; // Should be smaller than breaking at a nested comma. 1273 return 150; 1274 } 1275 1276 if (Right.Type == TT_TrailingAnnotation && Right.Next && 1277 Right.Next->isNot(tok::l_paren)) { 1278 // Generally, breaking before a trailing annotation is bad unless it is 1279 // function-like. It seems to be especially preferable to keep standard 1280 // annotations (i.e. "const", "final" and "override") on the same line. 1281 // Use a slightly higher penalty after ")" so that annotations like 1282 // "const override" are kept together. 1283 bool is_standard_annotation = Right.is(tok::kw_const) || 1284 Right.TokenText == "override" || 1285 Right.TokenText == "final"; 1286 return (Left.is(tok::r_paren) ? 100 : 120) + 1287 (is_standard_annotation ? 50 : 0); 1288 } 1289 1290 // In for-loops, prefer breaking at ',' and ';'. 1291 if (Line.First->is(tok::kw_for) && Left.is(tok::equal)) 1292 return 4; 1293 1294 // In Objective-C method expressions, prefer breaking before "param:" over 1295 // breaking after it. 1296 if (Right.Type == TT_ObjCSelectorName) 1297 return 0; 1298 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1299 return Line.MightBeFunctionDecl ? 50 : 500; 1300 1301 if (Left.is(tok::l_paren) && InFunctionDecl) 1302 return 100; 1303 if (Left.is(tok::equal) && InFunctionDecl) 1304 return 110; 1305 if (Left.opensScope()) 1306 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter 1307 : 19; 1308 1309 if (Right.is(tok::lessless)) { 1310 if (Left.is(tok::string_literal)) { 1311 StringRef Content = Left.TokenText; 1312 if (Content.startswith("\"")) 1313 Content = Content.drop_front(1); 1314 if (Content.endswith("\"")) 1315 Content = Content.drop_back(1); 1316 Content = Content.trim(); 1317 if (Content.size() > 1 && 1318 (Content.back() == ':' || Content.back() == '=')) 1319 return 25; 1320 } 1321 return 1; // Breaking at a << is really cheap. 1322 } 1323 if (Left.Type == TT_ConditionalExpr) 1324 return prec::Conditional; 1325 prec::Level Level = Left.getPrecedence(); 1326 1327 if (Level != prec::Unknown) 1328 return Level; 1329 1330 return 3; 1331 } 1332 1333 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 1334 const FormatToken &Left, 1335 const FormatToken &Right) { 1336 if (Style.Language == FormatStyle::LK_Proto) { 1337 if (Right.is(tok::l_paren) && 1338 (Left.TokenText == "returns" || Left.TokenText == "option")) 1339 return true; 1340 } 1341 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && 1342 Left.Tok.getObjCKeywordID() == tok::objc_property) 1343 return true; 1344 if (Right.is(tok::hashhash)) 1345 return Left.is(tok::hash); 1346 if (Left.isOneOf(tok::hashhash, tok::hash)) 1347 return Right.is(tok::hash); 1348 if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) 1349 return Style.SpaceInEmptyParentheses; 1350 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) 1351 return (Right.Type == TT_CastRParen || 1352 (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen)) 1353 ? Style.SpacesInCStyleCastParentheses 1354 : Style.SpacesInParentheses; 1355 if (Style.SpacesInAngles && 1356 ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser))) 1357 return true; 1358 if (Right.isOneOf(tok::semi, tok::comma)) 1359 return false; 1360 if (Right.is(tok::less) && 1361 (Left.is(tok::kw_template) || 1362 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1363 return true; 1364 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1365 return false; 1366 if (Left.isOneOf(tok::exclaim, tok::tilde)) 1367 return false; 1368 if (Left.is(tok::at) && 1369 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 1370 tok::numeric_constant, tok::l_paren, tok::l_brace, 1371 tok::kw_true, tok::kw_false)) 1372 return false; 1373 if (Left.is(tok::coloncolon)) 1374 return false; 1375 if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) 1376 return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) || 1377 !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren, 1378 tok::r_paren, tok::less); 1379 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 1380 return false; 1381 if (Right.is(tok::ellipsis)) 1382 return Left.Tok.isLiteral(); 1383 if (Left.is(tok::l_square) && Right.is(tok::amp)) 1384 return false; 1385 if (Right.Type == TT_PointerOrReference) 1386 return Left.Tok.isLiteral() || 1387 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 1388 !Style.PointerBindsToType); 1389 if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) && 1390 (Left.Type != TT_PointerOrReference || Style.PointerBindsToType)) 1391 return true; 1392 if (Left.Type == TT_PointerOrReference) 1393 return Right.Tok.isLiteral() || Right.Type == TT_BlockComment || 1394 ((Right.Type != TT_PointerOrReference) && 1395 Right.isNot(tok::l_paren) && Style.PointerBindsToType && 1396 Left.Previous && 1397 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); 1398 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1399 return false; 1400 if (Left.is(tok::l_square)) 1401 return Left.Type == TT_ArrayInitializerLSquare && 1402 Style.SpacesInContainerLiterals && Right.isNot(tok::r_square); 1403 if (Right.is(tok::r_square)) 1404 return Right.MatchingParen && Style.SpacesInContainerLiterals && 1405 Right.MatchingParen->Type == TT_ArrayInitializerLSquare; 1406 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr && 1407 Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant)) 1408 return false; 1409 if (Left.is(tok::colon)) 1410 return Left.Type != TT_ObjCMethodExpr; 1411 if (Right.is(tok::l_paren)) { 1412 if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen) 1413 return true; 1414 return Line.Type == LT_ObjCDecl || 1415 Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete, 1416 tok::semi) || 1417 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && 1418 (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, 1419 tok::kw_switch, tok::kw_catch) || 1420 Left.IsForEachMacro)) || 1421 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && 1422 Left.isOneOf(tok::identifier, tok::kw___attribute) && 1423 Line.Type != LT_PreprocessorDirective); 1424 } 1425 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1426 return false; 1427 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1428 return !Left.Children.empty(); // No spaces in "{}". 1429 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || 1430 (Right.is(tok::r_brace) && Right.MatchingParen && 1431 Right.MatchingParen->BlockKind != BK_Block)) 1432 return !Style.Cpp11BracedListStyle; 1433 if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/")) 1434 return false; 1435 if (Right.Type == TT_UnaryOperator) 1436 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && 1437 (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr); 1438 if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) && 1439 Right.is(tok::l_brace) && Right.getNextNonComment() && 1440 Right.BlockKind != BK_Block) 1441 return false; 1442 if (Left.is(tok::period) || Right.is(tok::period)) 1443 return false; 1444 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") 1445 return false; 1446 return true; 1447 } 1448 1449 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1450 const FormatToken &Tok) { 1451 if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo()) 1452 return true; // Never ever merge two identifiers. 1453 if (Tok.Previous->Type == TT_ImplicitStringLiteral) 1454 return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd(); 1455 if (Line.Type == LT_ObjCMethodDecl) { 1456 if (Tok.Previous->Type == TT_ObjCMethodSpecifier) 1457 return true; 1458 if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier)) 1459 // Don't space between ')' and <id> 1460 return false; 1461 } 1462 if (Line.Type == LT_ObjCProperty && 1463 (Tok.is(tok::equal) || Tok.Previous->is(tok::equal))) 1464 return false; 1465 1466 if (Tok.Type == TT_TrailingReturnArrow || 1467 Tok.Previous->Type == TT_TrailingReturnArrow) 1468 return true; 1469 if (Tok.Previous->is(tok::comma)) 1470 return true; 1471 if (Tok.is(tok::comma)) 1472 return false; 1473 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1474 return true; 1475 if (Tok.Previous->Tok.is(tok::kw_operator)) 1476 return Tok.is(tok::coloncolon); 1477 if (Tok.Type == TT_OverloadedOperatorLParen) 1478 return false; 1479 if (Tok.is(tok::colon)) 1480 return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && 1481 Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr && 1482 !Tok.Previous->is(tok::question) && 1483 (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals); 1484 if (Tok.Previous->Type == TT_UnaryOperator || 1485 Tok.Previous->Type == TT_CastRParen) 1486 return Tok.Type == TT_BinaryOperator; 1487 if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) { 1488 return Tok.Type == TT_TemplateCloser && 1489 Tok.Previous->Type == TT_TemplateCloser && 1490 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); 1491 } 1492 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || 1493 Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar)) 1494 return false; 1495 if (!Style.SpaceBeforeAssignmentOperators && 1496 Tok.getPrecedence() == prec::Assignment) 1497 return false; 1498 if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) || 1499 Tok.Previous->Type == TT_BinaryOperator) 1500 return true; 1501 if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1502 return false; 1503 if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) && 1504 Line.First->is(tok::hash)) 1505 return true; 1506 if (Tok.Type == TT_TrailingUnaryOperator) 1507 return false; 1508 return spaceRequiredBetween(Line, *Tok.Previous, Tok); 1509 } 1510 1511 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, 1512 const FormatToken &Right) { 1513 const FormatToken &Left = *Right.Previous; 1514 if (Right.is(tok::comment)) { 1515 return Right.Previous->BlockKind != BK_BracedInit && 1516 Right.Previous->Type != TT_CtorInitializerColon && 1517 Right.NewlinesBefore > 0; 1518 } else if (Right.Previous->isTrailingComment() || 1519 (Right.isStringLiteral() && Right.Previous->isStringLiteral())) { 1520 return true; 1521 } else if (Right.Previous->IsUnterminatedLiteral) { 1522 return true; 1523 } else if (Right.is(tok::lessless) && Right.Next && 1524 Right.Previous->is(tok::string_literal) && 1525 Right.Next->is(tok::string_literal)) { 1526 return true; 1527 } else if (Right.Previous->ClosesTemplateDeclaration && 1528 Right.Previous->MatchingParen && 1529 Right.Previous->MatchingParen->NestingLevel == 0 && 1530 Style.AlwaysBreakTemplateDeclarations) { 1531 return true; 1532 } else if ((Right.Type == TT_CtorInitializerComma || 1533 Right.Type == TT_CtorInitializerColon) && 1534 Style.BreakConstructorInitializersBeforeComma && 1535 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) { 1536 return true; 1537 } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) { 1538 return Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1539 Style.BreakBeforeBraces == FormatStyle::BS_GNU; 1540 } else if (Right.is(tok::string_literal) && 1541 Right.TokenText.startswith("R\"")) { 1542 // Raw string literals are special wrt. line breaks. The author has made a 1543 // deliberate choice and might have aligned the contents of the string 1544 // literal accordingly. Thus, we try keep existing line breaks. 1545 return Right.NewlinesBefore > 0; 1546 } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && 1547 Style.Language == FormatStyle::LK_Proto) { 1548 // Don't enums onto single lines in protocol buffers. 1549 return true; 1550 } else if ((Left.is(tok::l_brace) && Left.MatchingParen && 1551 Left.MatchingParen->Previous && 1552 Left.MatchingParen->Previous->is(tok::comma)) || 1553 (Right.is(tok::r_brace) && Left.is(tok::comma))) { 1554 // If the last token before a '}' is a comma, the intention is to insert a 1555 // line break after it in order to make shuffling around entries easier. 1556 return true; 1557 } 1558 return false; 1559 } 1560 1561 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1562 const FormatToken &Right) { 1563 const FormatToken &Left = *Right.Previous; 1564 if (Left.is(tok::at)) 1565 return false; 1566 if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) 1567 return true; 1568 if (Right.isTrailingComment()) 1569 // We rely on MustBreakBefore being set correctly here as we should not 1570 // change the "binding" behavior of a comment. 1571 // The first comment in a braced lists is always interpreted as belonging to 1572 // the first list element. Otherwise, it should be placed outside of the 1573 // list. 1574 return Left.BlockKind == BK_BracedInit; 1575 if (Left.is(tok::question) && Right.is(tok::colon)) 1576 return false; 1577 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1578 return Style.BreakBeforeTernaryOperators; 1579 if (Left.Type == TT_ConditionalExpr || Left.is(tok::question)) 1580 return !Style.BreakBeforeTernaryOperators; 1581 if (Right.is(tok::colon) && 1582 (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr)) 1583 return false; 1584 if (Right.Type == TT_InheritanceColon) 1585 return true; 1586 if (Left.is(tok::colon) && 1587 (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr)) 1588 return true; 1589 if (Right.Type == TT_ObjCSelectorName) 1590 return true; 1591 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) 1592 return true; 1593 if (Left.ClosesTemplateDeclaration) 1594 return true; 1595 if (Right.Type == TT_RangeBasedForLoopColon || 1596 Right.Type == TT_OverloadedOperatorLParen || 1597 Right.Type == TT_OverloadedOperator) 1598 return false; 1599 if (Left.Type == TT_RangeBasedForLoopColon) 1600 return true; 1601 if (Right.Type == TT_RangeBasedForLoopColon) 1602 return false; 1603 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1604 Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator)) 1605 return false; 1606 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1607 return false; 1608 if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen) 1609 return false; 1610 if (Left.is(tok::l_paren) && Left.Previous && 1611 (Left.Previous->Type == TT_BinaryOperator || 1612 Left.Previous->Type == TT_CastRParen)) 1613 return false; 1614 if (Right.Type == TT_ImplicitStringLiteral) 1615 return false; 1616 1617 if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser) 1618 return false; 1619 1620 // We only break before r_brace if there was a corresponding break before 1621 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1622 if (Right.is(tok::r_brace)) 1623 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; 1624 1625 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1626 // unless it is follow by ';', '{' or '='. 1627 if (Left.is(tok::kw_const) && Left.Previous != NULL && 1628 Left.Previous->is(tok::r_paren)) 1629 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); 1630 1631 if (Right.is(tok::kw___attribute)) 1632 return true; 1633 1634 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1635 return true; 1636 1637 if (Left.Type == TT_CtorInitializerComma && 1638 Style.BreakConstructorInitializersBeforeComma) 1639 return false; 1640 if (Right.Type == TT_CtorInitializerComma && 1641 Style.BreakConstructorInitializersBeforeComma) 1642 return true; 1643 if (Right.Type == TT_BinaryOperator && Style.BreakBeforeBinaryOperators) 1644 return true; 1645 if (Left.is(tok::greater) && Right.is(tok::greater) && 1646 Left.Type != TT_TemplateCloser) 1647 return false; 1648 if (Left.Type == TT_ArrayInitializerLSquare) 1649 return true; 1650 return (Left.isBinaryOperator() && Left.isNot(tok::lessless) && 1651 !Style.BreakBeforeBinaryOperators) || 1652 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 1653 tok::kw_class, tok::kw_struct) || 1654 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon, 1655 tok::l_square, tok::at) || 1656 (Left.is(tok::r_paren) && 1657 Right.isOneOf(tok::identifier, tok::kw_const)) || 1658 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 1659 } 1660 1661 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 1662 llvm::errs() << "AnnotatedTokens:\n"; 1663 const FormatToken *Tok = Line.First; 1664 while (Tok) { 1665 llvm::errs() << " M=" << Tok->MustBreakBefore 1666 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type 1667 << " S=" << Tok->SpacesRequiredBefore 1668 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() 1669 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind 1670 << " FakeLParens="; 1671 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 1672 llvm::errs() << Tok->FakeLParens[i] << "/"; 1673 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; 1674 if (Tok->Next == NULL) 1675 assert(Tok == Line.Last); 1676 Tok = Tok->Next; 1677 } 1678 llvm::errs() << "----\n"; 1679 } 1680 1681 } // namespace format 1682 } // namespace clang 1683