1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "llvm/Support/Debug.h" 19 20 #define DEBUG_TYPE "format-token-annotator" 21 22 namespace clang { 23 namespace format { 24 25 namespace { 26 27 /// \brief A parser that gathers additional information about tokens. 28 /// 29 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 30 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 31 /// into template parameter lists. 32 class AnnotatingParser { 33 public: 34 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, 35 IdentifierInfo &Ident_in) 36 : Style(Style), Line(Line), CurrentToken(Line.First), 37 KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) { 38 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); 39 resetTokenMetadata(CurrentToken); 40 } 41 42 private: 43 bool parseAngle() { 44 if (!CurrentToken) 45 return false; 46 ScopedContextCreator ContextCreator(*this, tok::less, 10); 47 FormatToken *Left = CurrentToken->Previous; 48 Contexts.back().IsExpression = false; 49 // If there's a template keyword before the opening angle bracket, this is a 50 // template parameter, not an argument. 51 Contexts.back().InTemplateArgument = 52 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); 53 54 while (CurrentToken) { 55 if (CurrentToken->is(tok::greater)) { 56 Left->MatchingParen = CurrentToken; 57 CurrentToken->MatchingParen = Left; 58 CurrentToken->Type = TT_TemplateCloser; 59 next(); 60 return true; 61 } 62 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 63 tok::question, tok::colon)) 64 return false; 65 // If a && or || is found and interpreted as a binary operator, this set 66 // of angles is likely part of something like "a < b && c > d". If the 67 // angles are inside an expression, the ||/&& might also be a binary 68 // operator that was misinterpreted because we are parsing template 69 // parameters. 70 // FIXME: This is getting out of hand, write a decent parser. 71 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && 72 CurrentToken->Previous->Type == TT_BinaryOperator && 73 Contexts[Contexts.size() - 2].IsExpression && 74 Line.First->isNot(tok::kw_template)) 75 return false; 76 updateParameterCount(Left, CurrentToken); 77 if (!consumeToken()) 78 return false; 79 } 80 return false; 81 } 82 83 bool parseParens(bool LookForDecls = false) { 84 if (!CurrentToken) 85 return false; 86 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 87 88 // FIXME: This is a bit of a hack. Do better. 89 Contexts.back().ColonIsForRangeExpr = 90 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 91 92 bool StartsObjCMethodExpr = false; 93 FormatToken *Left = CurrentToken->Previous; 94 if (CurrentToken->is(tok::caret)) { 95 // (^ can start a block type. 96 Left->Type = TT_ObjCBlockLParen; 97 } else if (FormatToken *MaybeSel = Left->Previous) { 98 // @selector( starts a selector. 99 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && 100 MaybeSel->Previous->is(tok::at)) { 101 StartsObjCMethodExpr = true; 102 } 103 } 104 105 if (Left->Previous && 106 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, 107 tok::kw_while, tok::l_paren, tok::comma) || 108 Left->Previous->Type == TT_BinaryOperator)) { 109 // static_assert, if and while usually contain expressions. 110 Contexts.back().IsExpression = true; 111 } else if (Line.InPPDirective && 112 (!Left->Previous || 113 (Left->Previous->isNot(tok::identifier) && 114 Left->Previous->Type != TT_OverloadedOperator))) { 115 Contexts.back().IsExpression = true; 116 } else if (Left->Previous && Left->Previous->is(tok::r_square) && 117 Left->Previous->MatchingParen && 118 Left->Previous->MatchingParen->Type == TT_LambdaLSquare) { 119 // This is a parameter list of a lambda expression. 120 Contexts.back().IsExpression = false; 121 } else if (Contexts[Contexts.size() - 2].CaretFound) { 122 // This is the parameter list of an ObjC block. 123 Contexts.back().IsExpression = false; 124 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { 125 Left->Type = TT_AttributeParen; 126 } else if (Left->Previous && Left->Previous->IsForEachMacro) { 127 // The first argument to a foreach macro is a declaration. 128 Contexts.back().IsForEachMacro = true; 129 Contexts.back().IsExpression = false; 130 } 131 132 if (StartsObjCMethodExpr) { 133 Contexts.back().ColonIsObjCMethodExpr = true; 134 Left->Type = TT_ObjCMethodExpr; 135 } 136 137 bool MightBeFunctionType = CurrentToken->is(tok::star); 138 bool HasMultipleLines = false; 139 bool HasMultipleParametersOnALine = false; 140 while (CurrentToken) { 141 // LookForDecls is set when "if (" has been seen. Check for 142 // 'identifier' '*' 'identifier' followed by not '=' -- this 143 // '*' has to be a binary operator but determineStarAmpUsage() will 144 // categorize it as an unary operator, so set the right type here. 145 if (LookForDecls && CurrentToken->Next) { 146 FormatToken *Prev = CurrentToken->getPreviousNonComment(); 147 if (Prev) { 148 FormatToken *PrevPrev = Prev->getPreviousNonComment(); 149 FormatToken *Next = CurrentToken->Next; 150 if (PrevPrev && PrevPrev->is(tok::identifier) && 151 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && 152 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { 153 Prev->Type = TT_BinaryOperator; 154 LookForDecls = false; 155 } 156 } 157 } 158 159 if (CurrentToken->Previous->Type == TT_PointerOrReference && 160 CurrentToken->Previous->Previous->isOneOf(tok::l_paren, 161 tok::coloncolon)) 162 MightBeFunctionType = true; 163 if (CurrentToken->Previous->Type == TT_BinaryOperator) 164 Contexts.back().IsExpression = true; 165 if (CurrentToken->is(tok::r_paren)) { 166 if (MightBeFunctionType && CurrentToken->Next && 167 (CurrentToken->Next->is(tok::l_paren) || 168 (CurrentToken->Next->is(tok::l_square) && 169 !Contexts.back().IsExpression))) 170 Left->Type = TT_FunctionTypeLParen; 171 Left->MatchingParen = CurrentToken; 172 CurrentToken->MatchingParen = Left; 173 174 if (StartsObjCMethodExpr) { 175 CurrentToken->Type = TT_ObjCMethodExpr; 176 if (Contexts.back().FirstObjCSelectorName) { 177 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 178 Contexts.back().LongestObjCSelectorName; 179 } 180 } 181 182 if (Left->Type == TT_AttributeParen) 183 CurrentToken->Type = TT_AttributeParen; 184 185 if (!HasMultipleLines) 186 Left->PackingKind = PPK_Inconclusive; 187 else if (HasMultipleParametersOnALine) 188 Left->PackingKind = PPK_BinPacked; 189 else 190 Left->PackingKind = PPK_OnePerLine; 191 192 next(); 193 return true; 194 } 195 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 196 return false; 197 else if (CurrentToken->is(tok::l_brace)) 198 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen 199 if (CurrentToken->is(tok::comma) && CurrentToken->Next && 200 !CurrentToken->Next->HasUnescapedNewline && 201 !CurrentToken->Next->isTrailingComment()) 202 HasMultipleParametersOnALine = true; 203 if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || 204 CurrentToken->isSimpleTypeSpecifier()) 205 Contexts.back().IsExpression = false; 206 FormatToken *Tok = CurrentToken; 207 if (!consumeToken()) 208 return false; 209 updateParameterCount(Left, Tok); 210 if (CurrentToken && CurrentToken->HasUnescapedNewline) 211 HasMultipleLines = true; 212 } 213 return false; 214 } 215 216 bool parseSquare() { 217 if (!CurrentToken) 218 return false; 219 220 // A '[' could be an index subscript (after an identifier or after 221 // ')' or ']'), it could be the start of an Objective-C method 222 // expression, or it could the the start of an Objective-C array literal. 223 FormatToken *Left = CurrentToken->Previous; 224 FormatToken *Parent = Left->getPreviousNonComment(); 225 bool StartsObjCMethodExpr = 226 Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare && 227 CurrentToken->isNot(tok::l_brace) && 228 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 229 tok::kw_return, tok::kw_throw) || 230 Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || 231 Parent->Type == TT_CastRParen || 232 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); 233 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 234 Contexts.back().IsExpression = true; 235 bool ColonFound = false; 236 237 if (StartsObjCMethodExpr) { 238 Contexts.back().ColonIsObjCMethodExpr = true; 239 Left->Type = TT_ObjCMethodExpr; 240 } else if (Parent && Parent->is(tok::at)) { 241 Left->Type = TT_ArrayInitializerLSquare; 242 } else if (Left->Type == TT_Unknown) { 243 Left->Type = TT_ArraySubscriptLSquare; 244 } 245 246 while (CurrentToken) { 247 if (CurrentToken->is(tok::r_square)) { 248 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && 249 Left->Type == TT_ObjCMethodExpr) { 250 // An ObjC method call is rarely followed by an open parenthesis. 251 // FIXME: Do we incorrectly label ":" with this? 252 StartsObjCMethodExpr = false; 253 Left->Type = TT_Unknown; 254 } 255 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { 256 CurrentToken->Type = TT_ObjCMethodExpr; 257 // determineStarAmpUsage() thinks that '*' '[' is allocating an 258 // array of pointers, but if '[' starts a selector then '*' is a 259 // binary operator. 260 if (Parent && Parent->Type == TT_PointerOrReference) 261 Parent->Type = TT_BinaryOperator; 262 } 263 Left->MatchingParen = CurrentToken; 264 CurrentToken->MatchingParen = Left; 265 if (Contexts.back().FirstObjCSelectorName) { 266 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 267 Contexts.back().LongestObjCSelectorName; 268 if (Left->BlockParameterCount > 1) 269 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; 270 } 271 next(); 272 return true; 273 } 274 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 275 return false; 276 if (CurrentToken->is(tok::colon)) 277 ColonFound = true; 278 if (CurrentToken->is(tok::comma) && 279 Style.Language != FormatStyle::LK_Proto && 280 (Left->Type == TT_ArraySubscriptLSquare || 281 (Left->Type == TT_ObjCMethodExpr && !ColonFound))) 282 Left->Type = TT_ArrayInitializerLSquare; 283 FormatToken* Tok = CurrentToken; 284 if (!consumeToken()) 285 return false; 286 updateParameterCount(Left, Tok); 287 } 288 return false; 289 } 290 291 bool parseBrace() { 292 if (CurrentToken) { 293 FormatToken *Left = CurrentToken->Previous; 294 295 if (Contexts.back().CaretFound) 296 Left->Type = TT_ObjCBlockLBrace; 297 Contexts.back().CaretFound = false; 298 299 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 300 Contexts.back().ColonIsDictLiteral = true; 301 if (Left->BlockKind == BK_BracedInit) 302 Contexts.back().IsExpression = true; 303 304 while (CurrentToken) { 305 if (CurrentToken->is(tok::r_brace)) { 306 Left->MatchingParen = CurrentToken; 307 CurrentToken->MatchingParen = Left; 308 next(); 309 return true; 310 } 311 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 312 return false; 313 updateParameterCount(Left, CurrentToken); 314 if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { 315 FormatToken *Previous = CurrentToken->getPreviousNonComment(); 316 if ((CurrentToken->is(tok::colon) || 317 Style.Language == FormatStyle::LK_Proto) && 318 Previous->is(tok::identifier)) 319 Previous->Type = TT_SelectorName; 320 if (CurrentToken->is(tok::colon)) 321 Left->Type = TT_DictLiteral; 322 } 323 if (!consumeToken()) 324 return false; 325 } 326 } 327 return true; 328 } 329 330 void updateParameterCount(FormatToken *Left, FormatToken *Current) { 331 if (Current->Type == TT_LambdaLSquare || 332 (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) || 333 (Style.Language == FormatStyle::LK_JavaScript && 334 Current->TokenText == "function")) { 335 ++Left->BlockParameterCount; 336 } 337 if (Current->is(tok::comma)) { 338 ++Left->ParameterCount; 339 if (!Left->Role) 340 Left->Role.reset(new CommaSeparatedList(Style)); 341 Left->Role->CommaFound(Current); 342 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { 343 Left->ParameterCount = 1; 344 } 345 } 346 347 bool parseConditional() { 348 while (CurrentToken) { 349 if (CurrentToken->is(tok::colon)) { 350 CurrentToken->Type = TT_ConditionalExpr; 351 next(); 352 return true; 353 } 354 if (!consumeToken()) 355 return false; 356 } 357 return false; 358 } 359 360 bool parseTemplateDeclaration() { 361 if (CurrentToken && CurrentToken->is(tok::less)) { 362 CurrentToken->Type = TT_TemplateOpener; 363 next(); 364 if (!parseAngle()) 365 return false; 366 if (CurrentToken) 367 CurrentToken->Previous->ClosesTemplateDeclaration = true; 368 return true; 369 } 370 return false; 371 } 372 373 bool consumeToken() { 374 FormatToken *Tok = CurrentToken; 375 next(); 376 switch (Tok->Tok.getKind()) { 377 case tok::plus: 378 case tok::minus: 379 if (!Tok->Previous && Line.MustBeDeclaration) 380 Tok->Type = TT_ObjCMethodSpecifier; 381 break; 382 case tok::colon: 383 if (!Tok->Previous) 384 return false; 385 // Colons from ?: are handled in parseConditional(). 386 if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 && 387 Line.First->isNot(tok::kw_case)) { 388 Tok->Type = TT_CtorInitializerColon; 389 } else if (Contexts.back().ColonIsDictLiteral) { 390 Tok->Type = TT_DictLiteral; 391 } else if (Contexts.back().ColonIsObjCMethodExpr || 392 Line.First->Type == TT_ObjCMethodSpecifier) { 393 Tok->Type = TT_ObjCMethodExpr; 394 Tok->Previous->Type = TT_SelectorName; 395 if (Tok->Previous->ColumnWidth > 396 Contexts.back().LongestObjCSelectorName) { 397 Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; 398 } 399 if (!Contexts.back().FirstObjCSelectorName) 400 Contexts.back().FirstObjCSelectorName = Tok->Previous; 401 } else if (Contexts.back().ColonIsForRangeExpr) { 402 Tok->Type = TT_RangeBasedForLoopColon; 403 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { 404 Tok->Type = TT_BitFieldColon; 405 } else if (Contexts.size() == 1 && 406 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { 407 Tok->Type = TT_InheritanceColon; 408 } else if (Contexts.back().ContextKind == tok::l_paren) { 409 Tok->Type = TT_InlineASMColon; 410 } 411 break; 412 case tok::kw_if: 413 case tok::kw_while: 414 if (CurrentToken && CurrentToken->is(tok::l_paren)) { 415 next(); 416 if (!parseParens(/*LookForDecls=*/true)) 417 return false; 418 } 419 break; 420 case tok::kw_for: 421 Contexts.back().ColonIsForRangeExpr = true; 422 next(); 423 if (!parseParens()) 424 return false; 425 break; 426 case tok::l_paren: 427 if (!parseParens()) 428 return false; 429 if (Line.MustBeDeclaration && Contexts.size() == 1 && 430 !Contexts.back().IsExpression && 431 Line.First->Type != TT_ObjCProperty && 432 (!Tok->Previous || Tok->Previous->isNot(tok::kw_decltype))) 433 Line.MightBeFunctionDecl = true; 434 break; 435 case tok::l_square: 436 if (!parseSquare()) 437 return false; 438 break; 439 case tok::l_brace: 440 if (!parseBrace()) 441 return false; 442 break; 443 case tok::less: 444 if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle()) 445 Tok->Type = TT_TemplateOpener; 446 else { 447 Tok->Type = TT_BinaryOperator; 448 CurrentToken = Tok; 449 next(); 450 } 451 break; 452 case tok::r_paren: 453 case tok::r_square: 454 return false; 455 case tok::r_brace: 456 // Lines can start with '}'. 457 if (Tok->Previous) 458 return false; 459 break; 460 case tok::greater: 461 Tok->Type = TT_BinaryOperator; 462 break; 463 case tok::kw_operator: 464 while (CurrentToken && 465 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { 466 if (CurrentToken->isOneOf(tok::star, tok::amp)) 467 CurrentToken->Type = TT_PointerOrReference; 468 consumeToken(); 469 if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator) 470 CurrentToken->Previous->Type = TT_OverloadedOperator; 471 } 472 if (CurrentToken) { 473 CurrentToken->Type = TT_OverloadedOperatorLParen; 474 if (CurrentToken->Previous->Type == TT_BinaryOperator) 475 CurrentToken->Previous->Type = TT_OverloadedOperator; 476 } 477 break; 478 case tok::question: 479 parseConditional(); 480 break; 481 case tok::kw_template: 482 parseTemplateDeclaration(); 483 break; 484 case tok::identifier: 485 if (Line.First->is(tok::kw_for) && 486 Tok->Tok.getIdentifierInfo() == &Ident_in) 487 Tok->Type = TT_ObjCForIn; 488 break; 489 case tok::comma: 490 if (Contexts.back().FirstStartOfName) 491 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 492 if (Contexts.back().InCtorInitializer) 493 Tok->Type = TT_CtorInitializerComma; 494 if (Contexts.back().IsForEachMacro) 495 Contexts.back().IsExpression = true; 496 break; 497 default: 498 break; 499 } 500 return true; 501 } 502 503 void parseIncludeDirective() { 504 if (CurrentToken && CurrentToken->is(tok::less)) { 505 next(); 506 while (CurrentToken) { 507 if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) 508 CurrentToken->Type = TT_ImplicitStringLiteral; 509 next(); 510 } 511 } else { 512 while (CurrentToken) { 513 if (CurrentToken->is(tok::string_literal)) 514 // Mark these string literals as "implicit" literals, too, so that 515 // they are not split or line-wrapped. 516 CurrentToken->Type = TT_ImplicitStringLiteral; 517 next(); 518 } 519 } 520 } 521 522 void parseWarningOrError() { 523 next(); 524 // We still want to format the whitespace left of the first token of the 525 // warning or error. 526 next(); 527 while (CurrentToken) { 528 CurrentToken->Type = TT_ImplicitStringLiteral; 529 next(); 530 } 531 } 532 533 void parsePragma() { 534 next(); // Consume "pragma". 535 if (CurrentToken && CurrentToken->TokenText == "mark") { 536 next(); // Consume "mark". 537 next(); // Consume first token (so we fix leading whitespace). 538 while (CurrentToken) { 539 CurrentToken->Type = TT_ImplicitStringLiteral; 540 next(); 541 } 542 } 543 } 544 545 void parsePreprocessorDirective() { 546 next(); 547 if (!CurrentToken) 548 return; 549 if (CurrentToken->Tok.is(tok::numeric_constant)) { 550 CurrentToken->SpacesRequiredBefore = 1; 551 return; 552 } 553 // Hashes in the middle of a line can lead to any strange token 554 // sequence. 555 if (!CurrentToken->Tok.getIdentifierInfo()) 556 return; 557 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { 558 case tok::pp_include: 559 case tok::pp_import: 560 next(); 561 parseIncludeDirective(); 562 break; 563 case tok::pp_error: 564 case tok::pp_warning: 565 parseWarningOrError(); 566 break; 567 case tok::pp_pragma: 568 parsePragma(); 569 break; 570 case tok::pp_if: 571 case tok::pp_elif: 572 Contexts.back().IsExpression = true; 573 parseLine(); 574 break; 575 default: 576 break; 577 } 578 while (CurrentToken) 579 next(); 580 } 581 582 public: 583 LineType parseLine() { 584 if (CurrentToken->is(tok::hash)) { 585 parsePreprocessorDirective(); 586 return LT_PreprocessorDirective; 587 } 588 589 // Directly allow to 'import <string-literal>' to support protocol buffer 590 // definitions (code.google.com/p/protobuf) or missing "#" (either way we 591 // should not break the line). 592 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); 593 if (Info && Info->getPPKeywordID() == tok::pp_import && 594 CurrentToken->Next && CurrentToken->Next->is(tok::string_literal)) { 595 next(); 596 parseIncludeDirective(); 597 return LT_Other; 598 } 599 600 // If this line starts and ends in '<' and '>', respectively, it is likely 601 // part of "#define <a/b.h>". 602 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { 603 parseIncludeDirective(); 604 return LT_Other; 605 } 606 607 while (CurrentToken) { 608 if (CurrentToken->is(tok::kw_virtual)) 609 KeywordVirtualFound = true; 610 if (!consumeToken()) 611 return LT_Invalid; 612 } 613 if (KeywordVirtualFound) 614 return LT_VirtualFunctionDecl; 615 616 if (Line.First->Type == TT_ObjCMethodSpecifier) { 617 if (Contexts.back().FirstObjCSelectorName) 618 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 619 Contexts.back().LongestObjCSelectorName; 620 return LT_ObjCMethodDecl; 621 } 622 623 return LT_Other; 624 } 625 626 private: 627 void resetTokenMetadata(FormatToken *Token) { 628 if (!Token) 629 return; 630 631 // Reset token type in case we have already looked at it and then 632 // recovered from an error (e.g. failure to find the matching >). 633 if (CurrentToken->Type != TT_LambdaLSquare && 634 CurrentToken->Type != TT_FunctionLBrace && 635 CurrentToken->Type != TT_ImplicitStringLiteral && 636 CurrentToken->Type != TT_RegexLiteral && 637 CurrentToken->Type != TT_TrailingReturnArrow) 638 CurrentToken->Type = TT_Unknown; 639 CurrentToken->Role.reset(); 640 CurrentToken->FakeLParens.clear(); 641 CurrentToken->FakeRParens = 0; 642 } 643 644 void next() { 645 if (CurrentToken) { 646 CurrentToken->NestingLevel = Contexts.size() - 1; 647 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 648 determineTokenType(*CurrentToken); 649 CurrentToken = CurrentToken->Next; 650 } 651 652 resetTokenMetadata(CurrentToken); 653 } 654 655 /// \brief A struct to hold information valid in a specific context, e.g. 656 /// a pair of parenthesis. 657 struct Context { 658 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 659 bool IsExpression) 660 : ContextKind(ContextKind), BindingStrength(BindingStrength), 661 LongestObjCSelectorName(0), ColonIsForRangeExpr(false), 662 ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false), 663 FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr), 664 IsExpression(IsExpression), CanBeExpression(true), 665 InTemplateArgument(false), InCtorInitializer(false), 666 CaretFound(false), IsForEachMacro(false) {} 667 668 tok::TokenKind ContextKind; 669 unsigned BindingStrength; 670 unsigned LongestObjCSelectorName; 671 bool ColonIsForRangeExpr; 672 bool ColonIsDictLiteral; 673 bool ColonIsObjCMethodExpr; 674 FormatToken *FirstObjCSelectorName; 675 FormatToken *FirstStartOfName; 676 bool IsExpression; 677 bool CanBeExpression; 678 bool InTemplateArgument; 679 bool InCtorInitializer; 680 bool CaretFound; 681 bool IsForEachMacro; 682 }; 683 684 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 685 /// of each instance. 686 struct ScopedContextCreator { 687 AnnotatingParser &P; 688 689 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 690 unsigned Increase) 691 : P(P) { 692 P.Contexts.push_back(Context(ContextKind, 693 P.Contexts.back().BindingStrength + Increase, 694 P.Contexts.back().IsExpression)); 695 } 696 697 ~ScopedContextCreator() { P.Contexts.pop_back(); } 698 }; 699 700 void determineTokenType(FormatToken &Current) { 701 if (Current.getPrecedence() == prec::Assignment && 702 !Line.First->isOneOf(tok::kw_template, tok::kw_using) && 703 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { 704 Contexts.back().IsExpression = true; 705 for (FormatToken *Previous = Current.Previous; 706 Previous && !Previous->isOneOf(tok::comma, tok::semi); 707 Previous = Previous->Previous) { 708 if (Previous->isOneOf(tok::r_square, tok::r_paren)) { 709 Previous = Previous->MatchingParen; 710 if (!Previous) 711 break; 712 } 713 if ((Previous->Type == TT_BinaryOperator || 714 Previous->Type == TT_UnaryOperator) && 715 Previous->isOneOf(tok::star, tok::amp) && Previous->Previous && 716 Previous->Previous->isNot(tok::equal)) { 717 Previous->Type = TT_PointerOrReference; 718 } 719 } 720 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { 721 Contexts.back().IsExpression = true; 722 } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 723 !Line.InPPDirective && 724 (!Current.Previous || 725 Current.Previous->isNot(tok::kw_decltype))) { 726 bool ParametersOfFunctionType = 727 Current.Previous && Current.Previous->is(tok::r_paren) && 728 Current.Previous->MatchingParen && 729 Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen; 730 bool IsForOrCatch = Current.Previous && 731 Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); 732 Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; 733 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 734 for (FormatToken *Previous = Current.Previous; 735 Previous && Previous->isOneOf(tok::star, tok::amp); 736 Previous = Previous->Previous) 737 Previous->Type = TT_PointerOrReference; 738 } else if (Current.Previous && 739 Current.Previous->Type == TT_CtorInitializerColon) { 740 Contexts.back().IsExpression = true; 741 Contexts.back().InCtorInitializer = true; 742 } else if (Current.is(tok::kw_new)) { 743 Contexts.back().CanBeExpression = false; 744 } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) { 745 // This should be the condition or increment in a for-loop. 746 Contexts.back().IsExpression = true; 747 } 748 749 if (Current.Type == TT_Unknown) { 750 // Line.MightBeFunctionDecl can only be true after the parentheses of a 751 // function declaration have been found. In this case, 'Current' is a 752 // trailing token of this declaration and thus cannot be a name. 753 if (isStartOfName(Current) && 754 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { 755 Contexts.back().FirstStartOfName = &Current; 756 Current.Type = TT_StartOfName; 757 } else if (Current.is(tok::kw_auto)) { 758 AutoFound = true; 759 } else if (Current.is(tok::arrow) && AutoFound && 760 Line.MustBeDeclaration) { 761 Current.Type = TT_TrailingReturnArrow; 762 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 763 Current.Type = 764 determineStarAmpUsage(Current, Contexts.back().CanBeExpression && 765 Contexts.back().IsExpression, 766 Contexts.back().InTemplateArgument); 767 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 768 Current.Type = determinePlusMinusCaretUsage(Current); 769 if (Current.Type == TT_UnaryOperator && Current.is(tok::caret)) 770 Contexts.back().CaretFound = true; 771 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 772 Current.Type = determineIncrementUsage(Current); 773 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { 774 Current.Type = TT_UnaryOperator; 775 } else if (Current.is(tok::question)) { 776 Current.Type = TT_ConditionalExpr; 777 } else if (Current.isBinaryOperator() && 778 (!Current.Previous || 779 Current.Previous->isNot(tok::l_square))) { 780 Current.Type = TT_BinaryOperator; 781 } else if (Current.is(tok::comment)) { 782 if (Current.TokenText.startswith("//")) 783 Current.Type = TT_LineComment; 784 else 785 Current.Type = TT_BlockComment; 786 } else if (Current.is(tok::r_paren)) { 787 if (rParenEndsCast(Current)) 788 Current.Type = TT_CastRParen; 789 } else if (Current.is(tok::at) && Current.Next) { 790 switch (Current.Next->Tok.getObjCKeywordID()) { 791 case tok::objc_interface: 792 case tok::objc_implementation: 793 case tok::objc_protocol: 794 Current.Type = TT_ObjCDecl; 795 break; 796 case tok::objc_property: 797 Current.Type = TT_ObjCProperty; 798 break; 799 default: 800 break; 801 } 802 } else if (Current.is(tok::period)) { 803 FormatToken *PreviousNoComment = Current.getPreviousNonComment(); 804 if (PreviousNoComment && 805 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) 806 Current.Type = TT_DesignatedInitializerPeriod; 807 } else if (Current.isOneOf(tok::identifier, tok::kw_const) && 808 Current.Previous && Current.Previous->isNot(tok::equal) && 809 Line.MightBeFunctionDecl && Contexts.size() == 1) { 810 // Line.MightBeFunctionDecl can only be true after the parentheses of a 811 // function declaration have been found. 812 Current.Type = TT_TrailingAnnotation; 813 } 814 } 815 } 816 817 /// \brief Take a guess at whether \p Tok starts a name of a function or 818 /// variable declaration. 819 /// 820 /// This is a heuristic based on whether \p Tok is an identifier following 821 /// something that is likely a type. 822 bool isStartOfName(const FormatToken &Tok) { 823 if (Tok.isNot(tok::identifier) || !Tok.Previous) 824 return false; 825 826 // Skip "const" as it does not have an influence on whether this is a name. 827 FormatToken *PreviousNotConst = Tok.Previous; 828 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) 829 PreviousNotConst = PreviousNotConst->Previous; 830 831 if (!PreviousNotConst) 832 return false; 833 834 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && 835 PreviousNotConst->Previous && 836 PreviousNotConst->Previous->is(tok::hash); 837 838 if (PreviousNotConst->Type == TT_TemplateCloser) 839 return PreviousNotConst && PreviousNotConst->MatchingParen && 840 PreviousNotConst->MatchingParen->Previous && 841 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); 842 843 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && 844 PreviousNotConst->MatchingParen->Previous && 845 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) 846 return true; 847 848 return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || 849 PreviousNotConst->Type == TT_PointerOrReference || 850 PreviousNotConst->isSimpleTypeSpecifier(); 851 } 852 853 /// \brief Determine whether ')' is ending a cast. 854 bool rParenEndsCast(const FormatToken &Tok) { 855 FormatToken *LeftOfParens = nullptr; 856 if (Tok.MatchingParen) 857 LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); 858 if (LeftOfParens && LeftOfParens->is(tok::r_paren) && 859 LeftOfParens->MatchingParen) 860 LeftOfParens = LeftOfParens->MatchingParen->Previous; 861 if (LeftOfParens && LeftOfParens->is(tok::r_square) && 862 LeftOfParens->MatchingParen && 863 LeftOfParens->MatchingParen->Type == TT_LambdaLSquare) 864 return false; 865 bool IsCast = false; 866 bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen; 867 bool ParensAreType = !Tok.Previous || 868 Tok.Previous->Type == TT_PointerOrReference || 869 Tok.Previous->Type == TT_TemplateCloser || 870 Tok.Previous->isSimpleTypeSpecifier(); 871 if (Style.Language == FormatStyle::LK_JavaScript && Tok.Next && 872 Tok.Next->TokenText == "in") 873 return false; 874 bool ParensCouldEndDecl = 875 Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); 876 bool IsSizeOfOrAlignOf = 877 LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); 878 if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 879 ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) || 880 (Tok.Next && Tok.Next->isBinaryOperator()))) 881 IsCast = true; 882 else if (Tok.Next && Tok.Next->isNot(tok::string_literal) && 883 (Tok.Next->Tok.isLiteral() || 884 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) 885 IsCast = true; 886 // If there is an identifier after the (), it is likely a cast, unless 887 // there is also an identifier before the (). 888 else if (LeftOfParens && 889 (LeftOfParens->Tok.getIdentifierInfo() == nullptr || 890 LeftOfParens->is(tok::kw_return)) && 891 LeftOfParens->Type != TT_OverloadedOperator && 892 LeftOfParens->isNot(tok::at) && 893 LeftOfParens->Type != TT_TemplateCloser && Tok.Next) { 894 if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) { 895 IsCast = true; 896 } else { 897 // Use heuristics to recognize c style casting. 898 FormatToken *Prev = Tok.Previous; 899 if (Prev && Prev->isOneOf(tok::amp, tok::star)) 900 Prev = Prev->Previous; 901 902 if (Prev && Tok.Next && Tok.Next->Next) { 903 bool NextIsUnary = Tok.Next->isUnaryOperator() || 904 Tok.Next->isOneOf(tok::amp, tok::star); 905 IsCast = NextIsUnary && Tok.Next->Next->isOneOf( 906 tok::identifier, tok::numeric_constant); 907 } 908 909 for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) { 910 if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) { 911 IsCast = false; 912 break; 913 } 914 } 915 } 916 } 917 return IsCast && !ParensAreEmpty; 918 } 919 920 /// \brief Return the type of the given token assuming it is * or &. 921 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, 922 bool InTemplateArgument) { 923 if (Style.Language == FormatStyle::LK_JavaScript) 924 return TT_BinaryOperator; 925 926 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 927 if (!PrevToken) 928 return TT_UnaryOperator; 929 930 const FormatToken *NextToken = Tok.getNextNonComment(); 931 if (!NextToken || NextToken->is(tok::l_brace)) 932 return TT_Unknown; 933 934 if (PrevToken->is(tok::coloncolon)) 935 return TT_PointerOrReference; 936 937 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 938 tok::comma, tok::semi, tok::kw_return, tok::colon, 939 tok::equal, tok::kw_delete, tok::kw_sizeof) || 940 PrevToken->Type == TT_BinaryOperator || 941 PrevToken->Type == TT_ConditionalExpr || 942 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 943 return TT_UnaryOperator; 944 945 if (NextToken->is(tok::l_square) && NextToken->Type != TT_LambdaLSquare) 946 return TT_PointerOrReference; 947 if (NextToken->isOneOf(tok::kw_operator, tok::comma)) 948 return TT_PointerOrReference; 949 950 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && 951 PrevToken->MatchingParen->Previous && 952 PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof, 953 tok::kw_decltype)) 954 return TT_PointerOrReference; 955 956 if (PrevToken->Tok.isLiteral() || 957 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, 958 tok::kw_false) || 959 NextToken->Tok.isLiteral() || 960 NextToken->isOneOf(tok::kw_true, tok::kw_false) || 961 NextToken->isUnaryOperator() || 962 // If we know we're in a template argument, there are no named 963 // declarations. Thus, having an identifier on the right-hand side 964 // indicates a binary operator. 965 (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) 966 return TT_BinaryOperator; 967 968 // This catches some cases where evaluation order is used as control flow: 969 // aaa && aaa->f(); 970 const FormatToken *NextNextToken = NextToken->getNextNonComment(); 971 if (NextNextToken && NextNextToken->is(tok::arrow)) 972 return TT_BinaryOperator; 973 974 // It is very unlikely that we are going to find a pointer or reference type 975 // definition on the RHS of an assignment. 976 if (IsExpression) 977 return TT_BinaryOperator; 978 979 return TT_PointerOrReference; 980 } 981 982 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { 983 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 984 if (!PrevToken || PrevToken->Type == TT_CastRParen) 985 return TT_UnaryOperator; 986 987 // Use heuristics to recognize unary operators. 988 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 989 tok::question, tok::colon, tok::kw_return, 990 tok::kw_case, tok::at, tok::l_brace)) 991 return TT_UnaryOperator; 992 993 // There can't be two consecutive binary operators. 994 if (PrevToken->Type == TT_BinaryOperator) 995 return TT_UnaryOperator; 996 997 // Fall back to marking the token as binary operator. 998 return TT_BinaryOperator; 999 } 1000 1001 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1002 TokenType determineIncrementUsage(const FormatToken &Tok) { 1003 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1004 if (!PrevToken || PrevToken->Type == TT_CastRParen) 1005 return TT_UnaryOperator; 1006 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 1007 return TT_TrailingUnaryOperator; 1008 1009 return TT_UnaryOperator; 1010 } 1011 1012 SmallVector<Context, 8> Contexts; 1013 1014 const FormatStyle &Style; 1015 AnnotatedLine &Line; 1016 FormatToken *CurrentToken; 1017 bool KeywordVirtualFound; 1018 bool AutoFound; 1019 IdentifierInfo &Ident_in; 1020 }; 1021 1022 static int PrecedenceUnaryOperator = prec::PointerToMember + 1; 1023 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 1024 1025 /// \brief Parses binary expressions by inserting fake parenthesis based on 1026 /// operator precedence. 1027 class ExpressionParser { 1028 public: 1029 ExpressionParser(AnnotatedLine &Line) : Current(Line.First) { 1030 // Skip leading "}", e.g. in "} else if (...) {". 1031 if (Current->is(tok::r_brace)) 1032 next(); 1033 } 1034 1035 /// \brief Parse expressions with the given operatore precedence. 1036 void parse(int Precedence = 0) { 1037 // Skip 'return' and ObjC selector colons as they are not part of a binary 1038 // expression. 1039 while (Current && 1040 (Current->is(tok::kw_return) || 1041 (Current->is(tok::colon) && (Current->Type == TT_ObjCMethodExpr || 1042 Current->Type == TT_DictLiteral)))) 1043 next(); 1044 1045 if (!Current || Precedence > PrecedenceArrowAndPeriod) 1046 return; 1047 1048 // Conditional expressions need to be parsed separately for proper nesting. 1049 if (Precedence == prec::Conditional) { 1050 parseConditionalExpr(); 1051 return; 1052 } 1053 1054 // Parse unary operators, which all have a higher precedence than binary 1055 // operators. 1056 if (Precedence == PrecedenceUnaryOperator) { 1057 parseUnaryOperator(); 1058 return; 1059 } 1060 1061 FormatToken *Start = Current; 1062 FormatToken *LatestOperator = nullptr; 1063 unsigned OperatorIndex = 0; 1064 1065 while (Current) { 1066 // Consume operators with higher precedence. 1067 parse(Precedence + 1); 1068 1069 int CurrentPrecedence = getCurrentPrecedence(); 1070 1071 if (Current && Current->Type == TT_SelectorName && 1072 Precedence == CurrentPrecedence) { 1073 if (LatestOperator) 1074 addFakeParenthesis(Start, prec::Level(Precedence)); 1075 Start = Current; 1076 } 1077 1078 // At the end of the line or when an operator with higher precedence is 1079 // found, insert fake parenthesis and return. 1080 if (!Current || Current->closesScope() || 1081 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) { 1082 if (LatestOperator) { 1083 LatestOperator->LastOperator = true; 1084 if (Precedence == PrecedenceArrowAndPeriod) { 1085 // Call expressions don't have a binary operator precedence. 1086 addFakeParenthesis(Start, prec::Unknown); 1087 } else { 1088 addFakeParenthesis(Start, prec::Level(Precedence)); 1089 } 1090 } 1091 return; 1092 } 1093 1094 // Consume scopes: (), [], <> and {} 1095 if (Current->opensScope()) { 1096 while (Current && !Current->closesScope()) { 1097 next(); 1098 parse(); 1099 } 1100 next(); 1101 } else { 1102 // Operator found. 1103 if (CurrentPrecedence == Precedence) { 1104 LatestOperator = Current; 1105 Current->OperatorIndex = OperatorIndex; 1106 ++OperatorIndex; 1107 } 1108 1109 next(/*SkipPastLeadingComments=*/false); 1110 } 1111 } 1112 } 1113 1114 private: 1115 /// \brief Gets the precedence (+1) of the given token for binary operators 1116 /// and other tokens that we treat like binary operators. 1117 int getCurrentPrecedence() { 1118 if (Current) { 1119 const FormatToken *NextNonComment = Current->getNextNonComment(); 1120 if (Current->Type == TT_ConditionalExpr) 1121 return prec::Conditional; 1122 else if (NextNonComment && NextNonComment->is(tok::colon) && 1123 NextNonComment->Type == TT_DictLiteral) 1124 return prec::Comma; 1125 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || 1126 Current->Type == TT_SelectorName || 1127 (Current->is(tok::comment) && NextNonComment && 1128 NextNonComment->Type == TT_SelectorName)) 1129 return 0; 1130 else if (Current->Type == TT_RangeBasedForLoopColon) 1131 return prec::Comma; 1132 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 1133 return Current->getPrecedence(); 1134 else if (Current->isOneOf(tok::period, tok::arrow)) 1135 return PrecedenceArrowAndPeriod; 1136 } 1137 return -1; 1138 } 1139 1140 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { 1141 Start->FakeLParens.push_back(Precedence); 1142 if (Precedence > prec::Unknown) 1143 Start->StartsBinaryExpression = true; 1144 if (Current) { 1145 ++Current->Previous->FakeRParens; 1146 if (Precedence > prec::Unknown) 1147 Current->Previous->EndsBinaryExpression = true; 1148 } 1149 } 1150 1151 /// \brief Parse unary operator expressions and surround them with fake 1152 /// parentheses if appropriate. 1153 void parseUnaryOperator() { 1154 if (!Current || Current->Type != TT_UnaryOperator) { 1155 parse(PrecedenceArrowAndPeriod); 1156 return; 1157 } 1158 1159 FormatToken *Start = Current; 1160 next(); 1161 parseUnaryOperator(); 1162 1163 // The actual precedence doesn't matter. 1164 addFakeParenthesis(Start, prec::Unknown); 1165 } 1166 1167 void parseConditionalExpr() { 1168 FormatToken *Start = Current; 1169 parse(prec::LogicalOr); 1170 if (!Current || !Current->is(tok::question)) 1171 return; 1172 next(); 1173 parseConditionalExpr(); 1174 if (!Current || Current->Type != TT_ConditionalExpr) 1175 return; 1176 next(); 1177 parseConditionalExpr(); 1178 addFakeParenthesis(Start, prec::Conditional); 1179 } 1180 1181 void next(bool SkipPastLeadingComments = true) { 1182 if (Current) 1183 Current = Current->Next; 1184 while (Current && 1185 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && 1186 Current->isTrailingComment()) 1187 Current = Current->Next; 1188 } 1189 1190 FormatToken *Current; 1191 }; 1192 1193 } // end anonymous namespace 1194 1195 void 1196 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) { 1197 const AnnotatedLine *NextNonCommentLine = nullptr; 1198 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), 1199 E = Lines.rend(); 1200 I != E; ++I) { 1201 if (NextNonCommentLine && (*I)->First->is(tok::comment) && 1202 (*I)->First->Next == nullptr) 1203 (*I)->Level = NextNonCommentLine->Level; 1204 else 1205 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; 1206 1207 setCommentLineLevels((*I)->Children); 1208 } 1209 } 1210 1211 void TokenAnnotator::annotate(AnnotatedLine &Line) { 1212 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1213 E = Line.Children.end(); 1214 I != E; ++I) { 1215 annotate(**I); 1216 } 1217 AnnotatingParser Parser(Style, Line, Ident_in); 1218 Line.Type = Parser.parseLine(); 1219 if (Line.Type == LT_Invalid) 1220 return; 1221 1222 ExpressionParser ExprParser(Line); 1223 ExprParser.parse(); 1224 1225 if (Line.First->Type == TT_ObjCMethodSpecifier) 1226 Line.Type = LT_ObjCMethodDecl; 1227 else if (Line.First->Type == TT_ObjCDecl) 1228 Line.Type = LT_ObjCDecl; 1229 else if (Line.First->Type == TT_ObjCProperty) 1230 Line.Type = LT_ObjCProperty; 1231 1232 Line.First->SpacesRequiredBefore = 1; 1233 Line.First->CanBreakBefore = Line.First->MustBreakBefore; 1234 } 1235 1236 // This function heuristically determines whether 'Current' starts the name of a 1237 // function declaration. 1238 static bool isFunctionDeclarationName(const FormatToken &Current) { 1239 if (Current.Type != TT_StartOfName || 1240 Current.NestingLevel != 0 || 1241 Current.Previous->Type == TT_StartOfName) 1242 return false; 1243 const FormatToken *Next = Current.Next; 1244 for (; Next; Next = Next->Next) { 1245 if (Next->Type == TT_TemplateOpener) { 1246 Next = Next->MatchingParen; 1247 } else if (Next->is(tok::coloncolon)) { 1248 Next = Next->Next; 1249 if (!Next || !Next->is(tok::identifier)) 1250 return false; 1251 } else if (Next->is(tok::l_paren)) { 1252 break; 1253 } else { 1254 return false; 1255 } 1256 } 1257 if (!Next) 1258 return false; 1259 assert(Next->is(tok::l_paren)); 1260 if (Next->Next == Next->MatchingParen) 1261 return true; 1262 for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen; 1263 Tok = Tok->Next) { 1264 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || 1265 Tok->Type == TT_PointerOrReference || Tok->Type == TT_StartOfName) 1266 return true; 1267 if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral()) 1268 return false; 1269 } 1270 return false; 1271 } 1272 1273 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 1274 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1275 E = Line.Children.end(); 1276 I != E; ++I) { 1277 calculateFormattingInformation(**I); 1278 } 1279 1280 Line.First->TotalLength = 1281 Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; 1282 if (!Line.First->Next) 1283 return; 1284 FormatToken *Current = Line.First->Next; 1285 bool InFunctionDecl = Line.MightBeFunctionDecl; 1286 while (Current) { 1287 if (isFunctionDeclarationName(*Current)) 1288 Current->Type = TT_FunctionDeclarationName; 1289 if (Current->Type == TT_LineComment) { 1290 if (Current->Previous->BlockKind == BK_BracedInit && 1291 Current->Previous->opensScope()) 1292 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; 1293 else 1294 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 1295 1296 // If we find a trailing comment, iterate backwards to determine whether 1297 // it seems to relate to a specific parameter. If so, break before that 1298 // parameter to avoid changing the comment's meaning. E.g. don't move 'b' 1299 // to the previous line in: 1300 // SomeFunction(a, 1301 // b, // comment 1302 // c); 1303 if (!Current->HasUnescapedNewline) { 1304 for (FormatToken *Parameter = Current->Previous; Parameter; 1305 Parameter = Parameter->Previous) { 1306 if (Parameter->isOneOf(tok::comment, tok::r_brace)) 1307 break; 1308 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { 1309 if (Parameter->Previous->Type != TT_CtorInitializerComma && 1310 Parameter->HasUnescapedNewline) 1311 Parameter->MustBreakBefore = true; 1312 break; 1313 } 1314 } 1315 } 1316 } else if (Current->SpacesRequiredBefore == 0 && 1317 spaceRequiredBefore(Line, *Current)) { 1318 Current->SpacesRequiredBefore = 1; 1319 } 1320 1321 Current->MustBreakBefore = 1322 Current->MustBreakBefore || mustBreakBefore(Line, *Current); 1323 1324 if (Style.AlwaysBreakAfterDefinitionReturnType && 1325 InFunctionDecl && Current->Type == TT_FunctionDeclarationName && 1326 !Line.Last->isOneOf(tok::semi, tok::comment)) // Only for definitions. 1327 // FIXME: Line.Last points to other characters than tok::semi 1328 // and tok::lbrace. 1329 Current->MustBreakBefore = true; 1330 1331 Current->CanBreakBefore = 1332 Current->MustBreakBefore || canBreakBefore(Line, *Current); 1333 unsigned ChildSize = 0; 1334 if (Current->Previous->Children.size() == 1) { 1335 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last; 1336 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit 1337 : LastOfChild.TotalLength + 1; 1338 } 1339 if (Current->MustBreakBefore || Current->Previous->Children.size() > 1 || 1340 Current->IsMultiline) 1341 Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; 1342 else 1343 Current->TotalLength = Current->Previous->TotalLength + 1344 Current->ColumnWidth + ChildSize + 1345 Current->SpacesRequiredBefore; 1346 1347 if (Current->Type == TT_CtorInitializerColon) 1348 InFunctionDecl = false; 1349 1350 // FIXME: Only calculate this if CanBreakBefore is true once static 1351 // initializers etc. are sorted out. 1352 // FIXME: Move magic numbers to a better place. 1353 Current->SplitPenalty = 20 * Current->BindingStrength + 1354 splitPenalty(Line, *Current, InFunctionDecl); 1355 1356 Current = Current->Next; 1357 } 1358 1359 calculateUnbreakableTailLengths(Line); 1360 for (Current = Line.First; Current != nullptr; Current = Current->Next) { 1361 if (Current->Role) 1362 Current->Role->precomputeFormattingInfos(Current); 1363 } 1364 1365 DEBUG({ printDebugInfo(Line); }); 1366 } 1367 1368 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { 1369 unsigned UnbreakableTailLength = 0; 1370 FormatToken *Current = Line.Last; 1371 while (Current) { 1372 Current->UnbreakableTailLength = UnbreakableTailLength; 1373 if (Current->CanBreakBefore || 1374 Current->isOneOf(tok::comment, tok::string_literal)) { 1375 UnbreakableTailLength = 0; 1376 } else { 1377 UnbreakableTailLength += 1378 Current->ColumnWidth + Current->SpacesRequiredBefore; 1379 } 1380 Current = Current->Previous; 1381 } 1382 } 1383 1384 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 1385 const FormatToken &Tok, 1386 bool InFunctionDecl) { 1387 const FormatToken &Left = *Tok.Previous; 1388 const FormatToken &Right = Tok; 1389 1390 if (Left.is(tok::semi)) 1391 return 0; 1392 if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && 1393 Right.Next->Type == TT_DictLiteral)) 1394 return 1; 1395 if (Right.is(tok::l_square)) { 1396 if (Style.Language == FormatStyle::LK_Proto) 1397 return 1; 1398 if (Right.Type != TT_ObjCMethodExpr && Right.Type != TT_LambdaLSquare) 1399 return 500; 1400 } 1401 if (Right.Type == TT_StartOfName || 1402 Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) { 1403 if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 1404 return 3; 1405 if (Left.Type == TT_StartOfName) 1406 return 20; 1407 if (InFunctionDecl && Right.NestingLevel == 0) 1408 return Style.PenaltyReturnTypeOnItsOwnLine; 1409 return 200; 1410 } 1411 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 1412 return 150; 1413 if (Left.Type == TT_CastRParen) 1414 return 100; 1415 if (Left.is(tok::coloncolon) || 1416 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) 1417 return 500; 1418 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 1419 return 5000; 1420 1421 if (Left.Type == TT_RangeBasedForLoopColon || 1422 Left.Type == TT_InheritanceColon) 1423 return 2; 1424 1425 if (Right.isMemberAccess()) { 1426 if (Left.is(tok::r_paren) && Left.MatchingParen && 1427 Left.MatchingParen->ParameterCount > 0) 1428 return 20; // Should be smaller than breaking at a nested comma. 1429 return 150; 1430 } 1431 1432 if (Right.Type == TT_TrailingAnnotation && 1433 (!Right.Next || Right.Next->isNot(tok::l_paren))) { 1434 // Generally, breaking before a trailing annotation is bad unless it is 1435 // function-like. It seems to be especially preferable to keep standard 1436 // annotations (i.e. "const", "final" and "override") on the same line. 1437 // Use a slightly higher penalty after ")" so that annotations like 1438 // "const override" are kept together. 1439 bool is_short_annotation = Right.TokenText.size() < 10; 1440 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); 1441 } 1442 1443 // In for-loops, prefer breaking at ',' and ';'. 1444 if (Line.First->is(tok::kw_for) && Left.is(tok::equal)) 1445 return 4; 1446 1447 // In Objective-C method expressions, prefer breaking before "param:" over 1448 // breaking after it. 1449 if (Right.Type == TT_SelectorName) 1450 return 0; 1451 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1452 return Line.MightBeFunctionDecl ? 50 : 500; 1453 1454 if (Left.is(tok::l_paren) && InFunctionDecl) 1455 return 100; 1456 if (Left.is(tok::equal) && InFunctionDecl) 1457 return 110; 1458 if (Right.is(tok::r_brace)) 1459 return 1; 1460 if (Left.Type == TT_TemplateOpener) 1461 return 100; 1462 if (Left.opensScope()) 1463 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter 1464 : 19; 1465 1466 if (Right.is(tok::lessless)) { 1467 if (Left.is(tok::string_literal)) { 1468 StringRef Content = Left.TokenText; 1469 if (Content.startswith("\"")) 1470 Content = Content.drop_front(1); 1471 if (Content.endswith("\"")) 1472 Content = Content.drop_back(1); 1473 Content = Content.trim(); 1474 if (Content.size() > 1 && 1475 (Content.back() == ':' || Content.back() == '=')) 1476 return 25; 1477 } 1478 return 1; // Breaking at a << is really cheap. 1479 } 1480 if (Left.Type == TT_ConditionalExpr) 1481 return prec::Conditional; 1482 prec::Level Level = Left.getPrecedence(); 1483 1484 if (Level != prec::Unknown) 1485 return Level; 1486 1487 return 3; 1488 } 1489 1490 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 1491 const FormatToken &Left, 1492 const FormatToken &Right) { 1493 if (Style.Language == FormatStyle::LK_Proto) { 1494 if (Right.is(tok::period) && 1495 (Left.TokenText == "optional" || Left.TokenText == "required" || 1496 Left.TokenText == "repeated")) 1497 return true; 1498 if (Right.is(tok::l_paren) && 1499 (Left.TokenText == "returns" || Left.TokenText == "option")) 1500 return true; 1501 } else if (Style.Language == FormatStyle::LK_JavaScript) { 1502 if (Left.TokenText == "var") 1503 return true; 1504 } 1505 if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) 1506 return true; 1507 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && 1508 Left.Tok.getObjCKeywordID() == tok::objc_property) 1509 return true; 1510 if (Right.is(tok::hashhash)) 1511 return Left.is(tok::hash); 1512 if (Left.isOneOf(tok::hashhash, tok::hash)) 1513 return Right.is(tok::hash); 1514 if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) 1515 return Style.SpaceInEmptyParentheses; 1516 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) 1517 return (Right.Type == TT_CastRParen || 1518 (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen)) 1519 ? Style.SpacesInCStyleCastParentheses 1520 : Style.SpacesInParentheses; 1521 if (Style.SpacesInAngles && 1522 ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser))) 1523 return true; 1524 if (Right.isOneOf(tok::semi, tok::comma)) 1525 return false; 1526 if (Right.is(tok::less) && 1527 (Left.isOneOf(tok::kw_template, tok::r_paren) || 1528 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1529 return true; 1530 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1531 return false; 1532 if (Left.isOneOf(tok::exclaim, tok::tilde)) 1533 return false; 1534 if (Left.is(tok::at) && 1535 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 1536 tok::numeric_constant, tok::l_paren, tok::l_brace, 1537 tok::kw_true, tok::kw_false)) 1538 return false; 1539 if (Left.is(tok::coloncolon)) 1540 return false; 1541 if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) 1542 return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) || 1543 !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren, 1544 tok::r_paren, tok::less); 1545 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 1546 return false; 1547 if (Right.is(tok::ellipsis)) 1548 return Left.Tok.isLiteral(); 1549 if (Left.is(tok::l_square) && Right.is(tok::amp)) 1550 return false; 1551 if (Right.Type == TT_PointerOrReference) 1552 return Left.Tok.isLiteral() || 1553 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 1554 Style.PointerAlignment != FormatStyle::PAS_Left); 1555 if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) && 1556 (Left.Type != TT_PointerOrReference || 1557 Style.PointerAlignment != FormatStyle::PAS_Right)) 1558 return true; 1559 if (Left.Type == TT_PointerOrReference) 1560 return Right.Tok.isLiteral() || Right.Type == TT_BlockComment || 1561 ((Right.Type != TT_PointerOrReference) && 1562 Right.isNot(tok::l_paren) && 1563 Style.PointerAlignment != FormatStyle::PAS_Right && Left.Previous && 1564 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); 1565 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1566 return false; 1567 if (Left.is(tok::l_square)) 1568 return (Left.Type == TT_ArrayInitializerLSquare && 1569 Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) || 1570 (Left.Type == TT_ArraySubscriptLSquare && 1571 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); 1572 if (Right.is(tok::r_square)) 1573 return Right.MatchingParen && 1574 ((Style.SpacesInContainerLiterals && 1575 Right.MatchingParen->Type == TT_ArrayInitializerLSquare) || 1576 (Style.SpacesInSquareBrackets && 1577 Right.MatchingParen->Type == TT_ArraySubscriptLSquare)); 1578 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr && 1579 Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant) && 1580 Left.Type != TT_DictLiteral) 1581 return false; 1582 if (Left.is(tok::colon)) 1583 return Left.Type != TT_ObjCMethodExpr; 1584 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1585 return !Left.Children.empty(); // No spaces in "{}". 1586 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || 1587 (Right.is(tok::r_brace) && Right.MatchingParen && 1588 Right.MatchingParen->BlockKind != BK_Block)) 1589 return !Style.Cpp11BracedListStyle; 1590 if (Left.Type == TT_BlockComment) 1591 return !Left.TokenText.endswith("=*/"); 1592 if (Right.is(tok::l_paren)) { 1593 if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen) 1594 return true; 1595 return Line.Type == LT_ObjCDecl || 1596 Left.isOneOf(tok::kw_new, tok::kw_delete, tok::semi) || 1597 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && 1598 (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, 1599 tok::kw_switch, tok::kw_case) || 1600 (Left.is(tok::kw_catch) && 1601 (!Left.Previous || Left.Previous->isNot(tok::period))) || 1602 Left.IsForEachMacro)) || 1603 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && 1604 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) && 1605 Line.Type != LT_PreprocessorDirective); 1606 } 1607 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1608 return false; 1609 if (Right.Type == TT_UnaryOperator) 1610 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && 1611 (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr); 1612 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, 1613 tok::r_paren) || 1614 Left.isSimpleTypeSpecifier()) && 1615 Right.is(tok::l_brace) && Right.getNextNonComment() && 1616 Right.BlockKind != BK_Block) 1617 return false; 1618 if (Left.is(tok::period) || Right.is(tok::period)) 1619 return false; 1620 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") 1621 return false; 1622 return true; 1623 } 1624 1625 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1626 const FormatToken &Tok) { 1627 if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo()) 1628 return true; // Never ever merge two identifiers. 1629 if (Tok.Previous->Type == TT_ImplicitStringLiteral) 1630 return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd(); 1631 if (Line.Type == LT_ObjCMethodDecl) { 1632 if (Tok.Previous->Type == TT_ObjCMethodSpecifier) 1633 return true; 1634 if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier)) 1635 // Don't space between ')' and <id> 1636 return false; 1637 } 1638 if (Line.Type == LT_ObjCProperty && 1639 (Tok.is(tok::equal) || Tok.Previous->is(tok::equal))) 1640 return false; 1641 1642 if (Tok.Type == TT_TrailingReturnArrow || 1643 Tok.Previous->Type == TT_TrailingReturnArrow) 1644 return true; 1645 if (Tok.Previous->is(tok::comma)) 1646 return true; 1647 if (Tok.is(tok::comma)) 1648 return false; 1649 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1650 return true; 1651 if (Tok.Previous->Tok.is(tok::kw_operator)) 1652 return Tok.is(tok::coloncolon); 1653 if (Tok.Type == TT_OverloadedOperatorLParen) 1654 return false; 1655 if (Tok.is(tok::colon)) 1656 return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && 1657 Tok.getNextNonComment() && Tok.Type != TT_ObjCMethodExpr && 1658 !Tok.Previous->is(tok::question) && 1659 !(Tok.Type == TT_InlineASMColon && 1660 Tok.Previous->is(tok::coloncolon)) && 1661 (Tok.Type != TT_DictLiteral || Style.SpacesInContainerLiterals); 1662 if (Tok.Previous->Type == TT_UnaryOperator) 1663 return Tok.Type == TT_BinaryOperator; 1664 if (Tok.Previous->Type == TT_CastRParen) 1665 return Style.SpaceAfterCStyleCast || Tok.Type == TT_BinaryOperator; 1666 if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) { 1667 return Tok.Type == TT_TemplateCloser && 1668 Tok.Previous->Type == TT_TemplateCloser && 1669 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); 1670 } 1671 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || 1672 Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar)) 1673 return false; 1674 if (!Style.SpaceBeforeAssignmentOperators && 1675 Tok.getPrecedence() == prec::Assignment) 1676 return false; 1677 if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) || 1678 Tok.Previous->Type == TT_BinaryOperator || 1679 Tok.Previous->Type == TT_ConditionalExpr) 1680 return true; 1681 if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1682 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; 1683 if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) && 1684 Line.First->is(tok::hash)) 1685 return true; 1686 if (Tok.Type == TT_TrailingUnaryOperator) 1687 return false; 1688 if (Tok.Previous->Type == TT_RegexLiteral) 1689 return false; 1690 return spaceRequiredBetween(Line, *Tok.Previous, Tok); 1691 } 1692 1693 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. 1694 static bool isAllmanBrace(const FormatToken &Tok) { 1695 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && 1696 Tok.Type != TT_ObjCBlockLBrace && Tok.Type != TT_DictLiteral; 1697 } 1698 1699 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, 1700 const FormatToken &Right) { 1701 const FormatToken &Left = *Right.Previous; 1702 if (Right.NewlinesBefore > 1) 1703 return true; 1704 if (Right.is(tok::comment)) { 1705 return Right.Previous->BlockKind != BK_BracedInit && 1706 Right.Previous->Type != TT_CtorInitializerColon && 1707 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); 1708 } else if (Right.Previous->isTrailingComment() || 1709 (Right.isStringLiteral() && Right.Previous->isStringLiteral())) { 1710 return true; 1711 } else if (Right.Previous->IsUnterminatedLiteral) { 1712 return true; 1713 } else if (Right.is(tok::lessless) && Right.Next && 1714 Right.Previous->is(tok::string_literal) && 1715 Right.Next->is(tok::string_literal)) { 1716 return true; 1717 } else if (Right.Previous->ClosesTemplateDeclaration && 1718 Right.Previous->MatchingParen && 1719 Right.Previous->MatchingParen->NestingLevel == 0 && 1720 Style.AlwaysBreakTemplateDeclarations) { 1721 return true; 1722 } else if ((Right.Type == TT_CtorInitializerComma || 1723 Right.Type == TT_CtorInitializerColon) && 1724 Style.BreakConstructorInitializersBeforeComma && 1725 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) { 1726 return true; 1727 } else if (Right.is(tok::string_literal) && 1728 Right.TokenText.startswith("R\"")) { 1729 // Raw string literals are special wrt. line breaks. The author has made a 1730 // deliberate choice and might have aligned the contents of the string 1731 // literal accordingly. Thus, we try keep existing line breaks. 1732 return Right.NewlinesBefore > 0; 1733 } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && 1734 Style.Language == FormatStyle::LK_Proto) { 1735 // Don't enums onto single lines in protocol buffers. 1736 return true; 1737 } else if (Style.Language == FormatStyle::LK_JavaScript && 1738 Right.is(tok::r_brace) && Left.is(tok::l_brace) && 1739 !Left.Children.empty()) { 1740 // Support AllowShortFunctionsOnASingleLine for JavaScript. 1741 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || 1742 (Left.NestingLevel == 0 && Line.Level == 0 && 1743 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline); 1744 } else if (isAllmanBrace(Left) || isAllmanBrace(Right)) { 1745 return Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1746 Style.BreakBeforeBraces == FormatStyle::BS_GNU; 1747 } else if (Style.Language == FormatStyle::LK_Proto && 1748 Left.isNot(tok::l_brace) && Right.Type == TT_SelectorName) { 1749 return true; 1750 } 1751 1752 // If the last token before a '}' is a comma or a trailing comment, the 1753 // intention is to insert a line break after it in order to make shuffling 1754 // around entries easier. 1755 const FormatToken *BeforeClosingBrace = nullptr; 1756 if (Left.is(tok::l_brace) && Left.MatchingParen) 1757 BeforeClosingBrace = Left.MatchingParen->Previous; 1758 else if (Right.is(tok::r_brace)) 1759 BeforeClosingBrace = Right.Previous; 1760 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || 1761 BeforeClosingBrace->isTrailingComment())) 1762 return true; 1763 1764 if (Style.Language == FormatStyle::LK_JavaScript) { 1765 // FIXME: This might apply to other languages and token kinds. 1766 if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous && 1767 Left.Previous->is(tok::char_constant)) 1768 return true; 1769 } 1770 1771 return false; 1772 } 1773 1774 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1775 const FormatToken &Right) { 1776 const FormatToken &Left = *Right.Previous; 1777 if (Left.is(tok::at)) 1778 return false; 1779 if (Left.Tok.getObjCKeywordID() == tok::objc_interface) 1780 return false; 1781 if (Right.Type == TT_StartOfName || 1782 Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) 1783 return true; 1784 if (Right.isTrailingComment()) 1785 // We rely on MustBreakBefore being set correctly here as we should not 1786 // change the "binding" behavior of a comment. 1787 // The first comment in a braced lists is always interpreted as belonging to 1788 // the first list element. Otherwise, it should be placed outside of the 1789 // list. 1790 return Left.BlockKind == BK_BracedInit; 1791 if (Left.is(tok::question) && Right.is(tok::colon)) 1792 return false; 1793 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1794 return Style.BreakBeforeTernaryOperators; 1795 if (Left.Type == TT_ConditionalExpr || Left.is(tok::question)) 1796 return !Style.BreakBeforeTernaryOperators; 1797 if (Right.Type == TT_InheritanceColon) 1798 return true; 1799 if (Right.is(tok::colon) && (Right.Type != TT_CtorInitializerColon && 1800 Right.Type != TT_InlineASMColon)) 1801 return false; 1802 if (Left.is(tok::colon) && 1803 (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr)) 1804 return true; 1805 if (Right.Type == TT_SelectorName) 1806 return true; 1807 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) 1808 return true; 1809 if (Left.ClosesTemplateDeclaration) 1810 return true; 1811 if (Right.Type == TT_RangeBasedForLoopColon || 1812 Right.Type == TT_OverloadedOperatorLParen || 1813 Right.Type == TT_OverloadedOperator) 1814 return false; 1815 if (Left.Type == TT_RangeBasedForLoopColon) 1816 return true; 1817 if (Right.Type == TT_RangeBasedForLoopColon) 1818 return false; 1819 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1820 Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator)) 1821 return false; 1822 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1823 return false; 1824 if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen) 1825 return false; 1826 if (Left.is(tok::l_paren) && Left.Previous && 1827 (Left.Previous->Type == TT_BinaryOperator || 1828 Left.Previous->Type == TT_CastRParen || Left.Previous->is(tok::kw_if))) 1829 return false; 1830 if (Right.Type == TT_ImplicitStringLiteral) 1831 return false; 1832 1833 if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser) 1834 return false; 1835 1836 // We only break before r_brace if there was a corresponding break before 1837 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1838 if (Right.is(tok::r_brace)) 1839 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; 1840 1841 // Allow breaking after a trailing annotation, e.g. after a method 1842 // declaration. 1843 if (Left.Type == TT_TrailingAnnotation) 1844 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, 1845 tok::less, tok::coloncolon); 1846 1847 if (Right.is(tok::kw___attribute)) 1848 return true; 1849 1850 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1851 return true; 1852 1853 if (Right.is(tok::identifier) && Right.Next && 1854 Right.Next->Type == TT_DictLiteral) 1855 return true; 1856 1857 if (Left.Type == TT_CtorInitializerComma && 1858 Style.BreakConstructorInitializersBeforeComma) 1859 return false; 1860 if (Right.Type == TT_CtorInitializerComma && 1861 Style.BreakConstructorInitializersBeforeComma) 1862 return true; 1863 if (Left.is(tok::greater) && Right.is(tok::greater) && 1864 Left.Type != TT_TemplateCloser) 1865 return false; 1866 if (Right.Type == TT_BinaryOperator && 1867 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && 1868 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || 1869 Right.getPrecedence() != prec::Assignment)) 1870 return true; 1871 if (Left.Type == TT_ArrayInitializerLSquare) 1872 return true; 1873 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) 1874 return true; 1875 if (Left.isBinaryOperator() && !Left.isOneOf(tok::arrowstar, tok::lessless) && 1876 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && 1877 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || 1878 Left.getPrecedence() == prec::Assignment)) 1879 return true; 1880 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 1881 tok::kw_class, tok::kw_struct) || 1882 Right.isMemberAccess() || 1883 Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) || 1884 (Left.is(tok::r_paren) && 1885 Right.isOneOf(tok::identifier, tok::kw_const)) || 1886 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 1887 } 1888 1889 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 1890 llvm::errs() << "AnnotatedTokens:\n"; 1891 const FormatToken *Tok = Line.First; 1892 while (Tok) { 1893 llvm::errs() << " M=" << Tok->MustBreakBefore 1894 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type 1895 << " S=" << Tok->SpacesRequiredBefore 1896 << " B=" << Tok->BlockParameterCount 1897 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() 1898 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind 1899 << " FakeLParens="; 1900 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 1901 llvm::errs() << Tok->FakeLParens[i] << "/"; 1902 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; 1903 if (!Tok->Next) 1904 assert(Tok == Line.Last); 1905 Tok = Tok->Next; 1906 } 1907 llvm::errs() << "----\n"; 1908 } 1909 1910 } // namespace format 1911 } // namespace clang 1912