1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "clang/Lex/Lexer.h" 19 20 namespace clang { 21 namespace format { 22 23 static bool isUnaryOperator(const AnnotatedToken &Tok) { 24 switch (Tok.FormatTok.Tok.getKind()) { 25 case tok::plus: 26 case tok::plusplus: 27 case tok::minus: 28 case tok::minusminus: 29 case tok::exclaim: 30 case tok::tilde: 31 case tok::kw_sizeof: 32 case tok::kw_alignof: 33 return true; 34 default: 35 return false; 36 } 37 } 38 39 static bool isBinaryOperator(const AnnotatedToken &Tok) { 40 // Comma is a binary operator, but does not behave as such wrt. formatting. 41 return getPrecedence(Tok) > prec::Comma; 42 } 43 44 // Returns the previous token ignoring comments. 45 static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) { 46 AnnotatedToken *PrevToken = Tok.Parent; 47 while (PrevToken != NULL && PrevToken->is(tok::comment)) 48 PrevToken = PrevToken->Parent; 49 return PrevToken; 50 } 51 static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) { 52 return getPreviousToken(const_cast<AnnotatedToken &>(Tok)); 53 } 54 55 static bool isTrailingComment(AnnotatedToken *Tok) { 56 return Tok != NULL && Tok->is(tok::comment) && 57 (Tok->Children.empty() || 58 Tok->Children[0].FormatTok.NewlinesBefore > 0); 59 } 60 61 // Returns the next token ignoring comments. 62 static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) { 63 if (Tok.Children.empty()) 64 return NULL; 65 const AnnotatedToken *NextToken = &Tok.Children[0]; 66 while (NextToken->is(tok::comment)) { 67 if (NextToken->Children.empty()) 68 return NULL; 69 NextToken = &NextToken->Children[0]; 70 } 71 return NextToken; 72 } 73 74 static bool closesScope(const AnnotatedToken &Tok) { 75 return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || 76 Tok.Type == TT_TemplateCloser; 77 } 78 79 static bool opensScope(const AnnotatedToken &Tok) { 80 return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || 81 Tok.Type == TT_TemplateOpener; 82 } 83 84 /// \brief A parser that gathers additional information about tokens. 85 /// 86 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 87 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 88 /// into template parameter lists. 89 class AnnotatingParser { 90 public: 91 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, 92 IdentifierInfo &Ident_in) 93 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), 94 KeywordVirtualFound(false), Ident_in(Ident_in) { 95 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false)); 96 } 97 98 private: 99 bool parseAngle() { 100 if (CurrentToken == NULL) 101 return false; 102 ScopedContextCreator ContextCreator(*this, tok::less, 10); 103 AnnotatedToken *Left = CurrentToken->Parent; 104 Contexts.back().IsExpression = false; 105 while (CurrentToken != NULL) { 106 if (CurrentToken->is(tok::greater)) { 107 Left->MatchingParen = CurrentToken; 108 CurrentToken->MatchingParen = Left; 109 CurrentToken->Type = TT_TemplateCloser; 110 next(); 111 return true; 112 } 113 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 114 tok::pipepipe, tok::ampamp, tok::question, 115 tok::colon)) 116 return false; 117 updateParameterCount(Left, CurrentToken); 118 if (!consumeToken()) 119 return false; 120 } 121 return false; 122 } 123 124 bool parseParens(bool LookForDecls = false) { 125 if (CurrentToken == NULL) 126 return false; 127 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 128 129 // FIXME: This is a bit of a hack. Do better. 130 Contexts.back().ColonIsForRangeExpr = 131 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 132 133 bool StartsObjCMethodExpr = false; 134 AnnotatedToken *Left = CurrentToken->Parent; 135 if (CurrentToken->is(tok::caret)) { 136 // ^( starts a block. 137 Left->Type = TT_ObjCBlockLParen; 138 } else if (AnnotatedToken *MaybeSel = Left->Parent) { 139 // @selector( starts a selector. 140 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && 141 MaybeSel->Parent->is(tok::at)) { 142 StartsObjCMethodExpr = true; 143 } 144 } 145 146 if (StartsObjCMethodExpr) { 147 Contexts.back().ColonIsObjCMethodExpr = true; 148 Left->Type = TT_ObjCMethodExpr; 149 } 150 151 while (CurrentToken != NULL) { 152 // LookForDecls is set when "if (" has been seen. Check for 153 // 'identifier' '*' 'identifier' followed by not '=' -- this 154 // '*' has to be a binary operator but determineStarAmpUsage() will 155 // categorize it as an unary operator, so set the right type here. 156 if (LookForDecls && !CurrentToken->Children.empty()) { 157 AnnotatedToken &Prev = *CurrentToken->Parent; 158 AnnotatedToken &Next = CurrentToken->Children[0]; 159 if (Prev.Parent->is(tok::identifier) && 160 Prev.isOneOf(tok::star, tok::amp, tok::ampamp) && 161 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { 162 Prev.Type = TT_BinaryOperator; 163 LookForDecls = false; 164 } 165 } 166 167 if (CurrentToken->is(tok::r_paren)) { 168 Left->MatchingParen = CurrentToken; 169 CurrentToken->MatchingParen = Left; 170 171 if (StartsObjCMethodExpr) { 172 CurrentToken->Type = TT_ObjCMethodExpr; 173 if (Contexts.back().FirstObjCSelectorName != NULL) { 174 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 175 Contexts.back().LongestObjCSelectorName; 176 } 177 } 178 179 next(); 180 return true; 181 } 182 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 183 return false; 184 updateParameterCount(Left, CurrentToken); 185 if (!consumeToken()) 186 return false; 187 } 188 return false; 189 } 190 191 bool parseSquare() { 192 if (!CurrentToken) 193 return false; 194 195 // A '[' could be an index subscript (after an indentifier or after 196 // ')' or ']'), it could be the start of an Objective-C method 197 // expression, or it could the the start of an Objective-C array literal. 198 AnnotatedToken *Left = CurrentToken->Parent; 199 AnnotatedToken *Parent = getPreviousToken(*Left); 200 bool StartsObjCMethodExpr = 201 Contexts.back().CanBeExpression && 202 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 203 tok::kw_return, tok::kw_throw) || 204 isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn || 205 Parent->Type == TT_CastRParen || 206 getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > 207 prec::Unknown); 208 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 209 Contexts.back().IsExpression = true; 210 bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at); 211 212 if (StartsObjCMethodExpr) { 213 Contexts.back().ColonIsObjCMethodExpr = true; 214 Left->Type = TT_ObjCMethodExpr; 215 } else if (StartsObjCArrayLiteral) { 216 Left->Type = TT_ObjCArrayLiteral; 217 } 218 219 while (CurrentToken != NULL) { 220 if (CurrentToken->is(tok::r_square)) { 221 if (!CurrentToken->Children.empty() && 222 CurrentToken->Children[0].is(tok::l_paren)) { 223 // An ObjC method call is rarely followed by an open parenthesis. 224 // FIXME: Do we incorrectly label ":" with this? 225 StartsObjCMethodExpr = false; 226 Left->Type = TT_Unknown; 227 } 228 if (StartsObjCMethodExpr) { 229 CurrentToken->Type = TT_ObjCMethodExpr; 230 // determineStarAmpUsage() thinks that '*' '[' is allocating an 231 // array of pointers, but if '[' starts a selector then '*' is a 232 // binary operator. 233 if (Parent != NULL && Parent->Type == TT_PointerOrReference) 234 Parent->Type = TT_BinaryOperator; 235 } else if (StartsObjCArrayLiteral) { 236 CurrentToken->Type = TT_ObjCArrayLiteral; 237 } 238 Left->MatchingParen = CurrentToken; 239 CurrentToken->MatchingParen = Left; 240 if (Contexts.back().FirstObjCSelectorName != NULL) 241 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 242 Contexts.back().LongestObjCSelectorName; 243 next(); 244 return true; 245 } 246 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 247 return false; 248 updateParameterCount(Left, CurrentToken); 249 if (!consumeToken()) 250 return false; 251 } 252 return false; 253 } 254 255 bool parseBrace() { 256 // Lines are fine to end with '{'. 257 if (CurrentToken == NULL) 258 return true; 259 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 260 AnnotatedToken *Left = CurrentToken->Parent; 261 while (CurrentToken != NULL) { 262 if (CurrentToken->is(tok::r_brace)) { 263 Left->MatchingParen = CurrentToken; 264 CurrentToken->MatchingParen = Left; 265 next(); 266 return true; 267 } 268 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 269 return false; 270 updateParameterCount(Left, CurrentToken); 271 if (!consumeToken()) 272 return false; 273 } 274 return true; 275 } 276 277 void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) { 278 if (Current->is(tok::comma)) 279 ++Left->ParameterCount; 280 else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) 281 Left->ParameterCount = 1; 282 } 283 284 bool parseConditional() { 285 while (CurrentToken != NULL) { 286 if (CurrentToken->is(tok::colon)) { 287 CurrentToken->Type = TT_ConditionalExpr; 288 next(); 289 return true; 290 } 291 if (!consumeToken()) 292 return false; 293 } 294 return false; 295 } 296 297 bool parseTemplateDeclaration() { 298 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 299 CurrentToken->Type = TT_TemplateOpener; 300 next(); 301 if (!parseAngle()) 302 return false; 303 if (CurrentToken != NULL) 304 CurrentToken->Parent->ClosesTemplateDeclaration = true; 305 return true; 306 } 307 return false; 308 } 309 310 bool consumeToken() { 311 AnnotatedToken *Tok = CurrentToken; 312 next(); 313 switch (Tok->FormatTok.Tok.getKind()) { 314 case tok::plus: 315 case tok::minus: 316 // At the start of the line, +/- specific ObjectiveC method 317 // declarations. 318 if (Tok->Parent == NULL) 319 Tok->Type = TT_ObjCMethodSpecifier; 320 break; 321 case tok::colon: 322 if (Tok->Parent == NULL) 323 return false; 324 // Colons from ?: are handled in parseConditional(). 325 if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) { 326 Tok->Type = TT_CtorInitializerColon; 327 } else if (Contexts.back().ColonIsObjCMethodExpr || 328 Line.First.Type == TT_ObjCMethodSpecifier) { 329 Tok->Type = TT_ObjCMethodExpr; 330 Tok->Parent->Type = TT_ObjCSelectorName; 331 if (Tok->Parent->FormatTok.TokenLength > 332 Contexts.back().LongestObjCSelectorName) 333 Contexts.back().LongestObjCSelectorName = 334 Tok->Parent->FormatTok.TokenLength; 335 if (Contexts.back().FirstObjCSelectorName == NULL) 336 Contexts.back().FirstObjCSelectorName = Tok->Parent; 337 } else if (Contexts.back().ColonIsForRangeExpr) { 338 Tok->Type = TT_RangeBasedForLoopColon; 339 } else if (Contexts.size() == 1) { 340 Tok->Type = TT_InheritanceColon; 341 } else if (Contexts.back().ContextKind == tok::l_paren) { 342 Tok->Type = TT_InlineASMColon; 343 } 344 break; 345 case tok::kw_if: 346 case tok::kw_while: 347 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 348 next(); 349 if (!parseParens(/*LookForDecls=*/ true)) 350 return false; 351 } 352 break; 353 case tok::kw_for: 354 Contexts.back().ColonIsForRangeExpr = true; 355 next(); 356 if (!parseParens()) 357 return false; 358 break; 359 case tok::l_paren: 360 if (!parseParens()) 361 return false; 362 if (Line.MustBeDeclaration) 363 Line.MightBeFunctionDecl = true; 364 break; 365 case tok::l_square: 366 if (!parseSquare()) 367 return false; 368 break; 369 case tok::l_brace: 370 if (!parseBrace()) 371 return false; 372 break; 373 case tok::less: 374 if (parseAngle()) 375 Tok->Type = TT_TemplateOpener; 376 else { 377 Tok->Type = TT_BinaryOperator; 378 CurrentToken = Tok; 379 next(); 380 } 381 break; 382 case tok::r_paren: 383 case tok::r_square: 384 return false; 385 case tok::r_brace: 386 // Lines can start with '}'. 387 if (Tok->Parent != NULL) 388 return false; 389 break; 390 case tok::greater: 391 Tok->Type = TT_BinaryOperator; 392 break; 393 case tok::kw_operator: 394 while (CurrentToken && CurrentToken->isNot(tok::l_paren)) { 395 if (CurrentToken->isOneOf(tok::star, tok::amp)) 396 CurrentToken->Type = TT_PointerOrReference; 397 consumeToken(); 398 } 399 if (CurrentToken) 400 CurrentToken->Type = TT_OverloadedOperatorLParen; 401 break; 402 case tok::question: 403 parseConditional(); 404 break; 405 case tok::kw_template: 406 parseTemplateDeclaration(); 407 break; 408 case tok::identifier: 409 if (Line.First.is(tok::kw_for) && 410 Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in) 411 Tok->Type = TT_ObjCForIn; 412 break; 413 default: 414 break; 415 } 416 return true; 417 } 418 419 void parseIncludeDirective() { 420 next(); 421 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 422 next(); 423 while (CurrentToken != NULL) { 424 if (CurrentToken->isNot(tok::comment) || 425 !CurrentToken->Children.empty()) 426 CurrentToken->Type = TT_ImplicitStringLiteral; 427 next(); 428 } 429 } else { 430 while (CurrentToken != NULL) { 431 if (CurrentToken->is(tok::string_literal)) 432 // Mark these string literals as "implicit" literals, too, so that 433 // they are not split or line-wrapped. 434 CurrentToken->Type = TT_ImplicitStringLiteral; 435 next(); 436 } 437 } 438 } 439 440 void parseWarningOrError() { 441 next(); 442 // We still want to format the whitespace left of the first token of the 443 // warning or error. 444 next(); 445 while (CurrentToken != NULL) { 446 CurrentToken->Type = TT_ImplicitStringLiteral; 447 next(); 448 } 449 } 450 451 void parsePreprocessorDirective() { 452 next(); 453 if (CurrentToken == NULL) 454 return; 455 // Hashes in the middle of a line can lead to any strange token 456 // sequence. 457 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 458 return; 459 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 460 case tok::pp_include: 461 case tok::pp_import: 462 parseIncludeDirective(); 463 break; 464 case tok::pp_error: 465 case tok::pp_warning: 466 parseWarningOrError(); 467 break; 468 default: 469 break; 470 } 471 while (CurrentToken != NULL) 472 next(); 473 } 474 475 public: 476 LineType parseLine() { 477 int PeriodsAndArrows = 0; 478 AnnotatedToken *LastPeriodOrArrow = NULL; 479 bool CanBeBuilderTypeStmt = true; 480 if (CurrentToken->is(tok::hash)) { 481 parsePreprocessorDirective(); 482 return LT_PreprocessorDirective; 483 } 484 while (CurrentToken != NULL) { 485 if (CurrentToken->is(tok::kw_virtual)) 486 KeywordVirtualFound = true; 487 if (CurrentToken->isOneOf(tok::period, tok::arrow)) { 488 ++PeriodsAndArrows; 489 LastPeriodOrArrow = CurrentToken; 490 } 491 AnnotatedToken *TheToken = CurrentToken; 492 if (!consumeToken()) 493 return LT_Invalid; 494 if (getPrecedence(*TheToken) > prec::Assignment && 495 TheToken->Type == TT_BinaryOperator) 496 CanBeBuilderTypeStmt = false; 497 } 498 if (KeywordVirtualFound) 499 return LT_VirtualFunctionDecl; 500 501 // Assume a builder-type call if there are 2 or more "." and "->". 502 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) { 503 LastPeriodOrArrow->LastInChainOfCalls = true; 504 return LT_BuilderTypeCall; 505 } 506 507 if (Line.First.Type == TT_ObjCMethodSpecifier) { 508 if (Contexts.back().FirstObjCSelectorName != NULL) 509 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 510 Contexts.back().LongestObjCSelectorName; 511 return LT_ObjCMethodDecl; 512 } 513 514 return LT_Other; 515 } 516 517 private: 518 void next() { 519 if (CurrentToken != NULL) { 520 determineTokenType(*CurrentToken); 521 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 522 } 523 524 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 525 CurrentToken = &CurrentToken->Children[0]; 526 else 527 CurrentToken = NULL; 528 529 // Reset token type in case we have already looked at it and then recovered 530 // from an error (e.g. failure to find the matching >). 531 if (CurrentToken != NULL) 532 CurrentToken->Type = TT_Unknown; 533 } 534 535 /// \brief A struct to hold information valid in a specific context, e.g. 536 /// a pair of parenthesis. 537 struct Context { 538 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 539 bool IsExpression) 540 : ContextKind(ContextKind), BindingStrength(BindingStrength), 541 LongestObjCSelectorName(0), ColonIsForRangeExpr(false), 542 ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL), 543 IsExpression(IsExpression), CanBeExpression(true) {} 544 545 tok::TokenKind ContextKind; 546 unsigned BindingStrength; 547 unsigned LongestObjCSelectorName; 548 bool ColonIsForRangeExpr; 549 bool ColonIsObjCMethodExpr; 550 AnnotatedToken *FirstObjCSelectorName; 551 bool IsExpression; 552 bool CanBeExpression; 553 }; 554 555 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 556 /// of each instance. 557 struct ScopedContextCreator { 558 AnnotatingParser &P; 559 560 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 561 unsigned Increase) 562 : P(P) { 563 P.Contexts.push_back( 564 Context(ContextKind, P.Contexts.back().BindingStrength + Increase, 565 P.Contexts.back().IsExpression)); 566 } 567 568 ~ScopedContextCreator() { P.Contexts.pop_back(); } 569 }; 570 571 void determineTokenType(AnnotatedToken &Current) { 572 if (getPrecedence(Current) == prec::Assignment) { 573 Contexts.back().IsExpression = true; 574 for (AnnotatedToken *Previous = Current.Parent; 575 Previous && Previous->isNot(tok::comma); 576 Previous = Previous->Parent) { 577 if (Previous->is(tok::r_square)) 578 Previous = Previous->MatchingParen; 579 if (Previous->Type == TT_BinaryOperator && 580 Previous->isOneOf(tok::star, tok::amp)) { 581 Previous->Type = TT_PointerOrReference; 582 } 583 } 584 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) || 585 (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 586 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) { 587 Contexts.back().IsExpression = true; 588 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 589 for (AnnotatedToken *Previous = Current.Parent; 590 Previous && Previous->isOneOf(tok::star, tok::amp); 591 Previous = Previous->Parent) 592 Previous->Type = TT_PointerOrReference; 593 } else if (Current.Parent && 594 Current.Parent->Type == TT_CtorInitializerColon) { 595 Contexts.back().IsExpression = true; 596 } else if (Current.is(tok::kw_new)) { 597 Contexts.back().CanBeExpression = false; 598 } 599 600 if (Current.Type == TT_Unknown) { 601 if (Current.Parent && Current.is(tok::identifier) && 602 ((Current.Parent->is(tok::identifier) && 603 Current.Parent->FormatTok.Tok.getIdentifierInfo() 604 ->getPPKeywordID() == tok::pp_not_keyword) || 605 Current.Parent->Type == TT_PointerOrReference || 606 Current.Parent->Type == TT_TemplateCloser)) { 607 Current.Type = TT_StartOfName; 608 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 609 Current.Type = 610 determineStarAmpUsage(Current, Contexts.back().IsExpression); 611 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 612 Current.Type = determinePlusMinusCaretUsage(Current); 613 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 614 Current.Type = determineIncrementUsage(Current); 615 } else if (Current.is(tok::exclaim)) { 616 Current.Type = TT_UnaryOperator; 617 } else if (isBinaryOperator(Current)) { 618 Current.Type = TT_BinaryOperator; 619 } else if (Current.is(tok::comment)) { 620 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 621 Lex.getLangOpts())); 622 if (StringRef(Data).startswith("//")) 623 Current.Type = TT_LineComment; 624 else 625 Current.Type = TT_BlockComment; 626 } else if (Current.is(tok::r_paren)) { 627 bool ParensNotExpr = !Current.Parent || 628 Current.Parent->Type == TT_PointerOrReference || 629 Current.Parent->Type == TT_TemplateCloser; 630 bool ParensCouldEndDecl = 631 !Current.Children.empty() && 632 Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace); 633 bool IsSizeOfOrAlignOf = 634 Current.MatchingParen && Current.MatchingParen->Parent && 635 Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof, 636 tok::kw_alignof); 637 if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 638 Contexts.back().IsExpression) 639 // FIXME: We need to get smarter and understand more cases of casts. 640 Current.Type = TT_CastRParen; 641 } else if (Current.is(tok::at) && Current.Children.size()) { 642 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 643 case tok::objc_interface: 644 case tok::objc_implementation: 645 case tok::objc_protocol: 646 Current.Type = TT_ObjCDecl; 647 break; 648 case tok::objc_property: 649 Current.Type = TT_ObjCProperty; 650 break; 651 default: 652 break; 653 } 654 } 655 } 656 } 657 658 /// \brief Return the type of the given token assuming it is * or &. 659 TokenType 660 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { 661 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 662 if (PrevToken == NULL) 663 return TT_UnaryOperator; 664 665 const AnnotatedToken *NextToken = getNextToken(Tok); 666 if (NextToken == NULL) 667 return TT_Unknown; 668 669 if (PrevToken->is(tok::l_paren) && !IsExpression) 670 return TT_PointerOrReference; 671 672 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 673 tok::comma, tok::semi, tok::kw_return, tok::colon, 674 tok::equal) || 675 PrevToken->Type == TT_BinaryOperator || 676 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 677 return TT_UnaryOperator; 678 679 if (NextToken->is(tok::l_square)) 680 return TT_PointerOrReference; 681 682 if (PrevToken->FormatTok.Tok.isLiteral() || 683 PrevToken->isOneOf(tok::r_paren, tok::r_square) || 684 NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken)) 685 return TT_BinaryOperator; 686 687 // "*(" is probably part of a function type if within template parameters. 688 // Otherwise, it is probably a binary operator. 689 if (NextToken->is(tok::l_paren)) 690 return Contexts.back().ContextKind == tok::less ? TT_PointerOrReference 691 : TT_BinaryOperator; 692 693 // It is very unlikely that we are going to find a pointer or reference type 694 // definition on the RHS of an assignment. 695 if (IsExpression) 696 return TT_BinaryOperator; 697 698 return TT_PointerOrReference; 699 } 700 701 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 702 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 703 if (PrevToken == NULL) 704 return TT_UnaryOperator; 705 706 // Use heuristics to recognize unary operators. 707 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 708 tok::question, tok::colon, tok::kw_return, 709 tok::kw_case, tok::at, tok::l_brace)) 710 return TT_UnaryOperator; 711 712 // There can't be two consecutive binary operators. 713 if (PrevToken->Type == TT_BinaryOperator) 714 return TT_UnaryOperator; 715 716 // Fall back to marking the token as binary operator. 717 return TT_BinaryOperator; 718 } 719 720 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 721 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 722 const AnnotatedToken *PrevToken = getPreviousToken(Tok); 723 if (PrevToken == NULL) 724 return TT_UnaryOperator; 725 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 726 return TT_TrailingUnaryOperator; 727 728 return TT_UnaryOperator; 729 } 730 731 SmallVector<Context, 8> Contexts; 732 733 SourceManager &SourceMgr; 734 Lexer &Lex; 735 AnnotatedLine &Line; 736 AnnotatedToken *CurrentToken; 737 bool KeywordVirtualFound; 738 IdentifierInfo &Ident_in; 739 }; 740 741 /// \brief Parses binary expressions by inserting fake parenthesis based on 742 /// operator precedence. 743 class ExpressionParser { 744 public: 745 ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} 746 747 /// \brief Parse expressions with the given operatore precedence. 748 void parse(int Precedence = 0) { 749 if (Precedence > prec::PointerToMember || Current == NULL) 750 return; 751 752 // Skip over "return" until we can properly parse it. 753 if (Current->is(tok::kw_return)) 754 next(); 755 756 // Eagerly consume trailing comments. 757 while (isTrailingComment(Current)) { 758 next(); 759 } 760 761 AnnotatedToken *Start = Current; 762 bool OperatorFound = false; 763 764 while (Current) { 765 // Consume operators with higher precedence. 766 parse(prec::Level(Precedence + 1)); 767 768 int CurrentPrecedence = 0; 769 if (Current) { 770 if (Current->Type == TT_ConditionalExpr) 771 CurrentPrecedence = 1 + (int) prec::Conditional; 772 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || 773 Current->Type == TT_CtorInitializerColon) 774 CurrentPrecedence = 1; 775 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 776 CurrentPrecedence = 1 + (int) getPrecedence(*Current); 777 } 778 779 // At the end of the line or when an operator with higher precedence is 780 // found, insert fake parenthesis and return. 781 if (Current == NULL || closesScope(*Current) || 782 (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { 783 if (OperatorFound) { 784 ++Start->FakeLParens; 785 if (Current) 786 ++Current->Parent->FakeRParens; 787 } 788 return; 789 } 790 791 // Consume scopes: (), [], <> and {} 792 if (opensScope(*Current)) { 793 AnnotatedToken *Left = Current; 794 while (Current && !closesScope(*Current)) { 795 next(); 796 parse(); 797 } 798 // Remove fake parens that just duplicate the real parens. 799 if (Current && Left->Children[0].FakeLParens > 0 && 800 Current->Parent->FakeRParens > 0) { 801 --Left->Children[0].FakeLParens; 802 --Current->Parent->FakeRParens; 803 } 804 next(); 805 } else { 806 // Operator found. 807 if (CurrentPrecedence == Precedence) 808 OperatorFound = true; 809 810 next(); 811 } 812 } 813 } 814 815 private: 816 void next() { 817 if (Current != NULL) 818 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 819 } 820 821 AnnotatedToken *Current; 822 }; 823 824 void TokenAnnotator::annotate(AnnotatedLine &Line) { 825 AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); 826 Line.Type = Parser.parseLine(); 827 if (Line.Type == LT_Invalid) 828 return; 829 830 ExpressionParser ExprParser(Line); 831 ExprParser.parse(); 832 833 if (Line.First.Type == TT_ObjCMethodSpecifier) 834 Line.Type = LT_ObjCMethodDecl; 835 else if (Line.First.Type == TT_ObjCDecl) 836 Line.Type = LT_ObjCDecl; 837 else if (Line.First.Type == TT_ObjCProperty) 838 Line.Type = LT_ObjCProperty; 839 840 Line.First.SpacesRequiredBefore = 1; 841 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; 842 Line.First.CanBreakBefore = Line.First.MustBreakBefore; 843 844 Line.First.TotalLength = Line.First.FormatTok.TokenLength; 845 } 846 847 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 848 if (Line.First.Children.empty()) 849 return; 850 AnnotatedToken *Current = &Line.First.Children[0]; 851 while (Current != NULL) { 852 if (Current->Type == TT_LineComment) 853 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 854 else 855 Current->SpacesRequiredBefore = 856 spaceRequiredBefore(Line, *Current) ? 1 : 0; 857 858 if (Current->FormatTok.MustBreakBefore) { 859 Current->MustBreakBefore = true; 860 } else if (Current->Type == TT_LineComment) { 861 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; 862 } else if (isTrailingComment(Current->Parent) || 863 (Current->is(tok::string_literal) && 864 Current->Parent->is(tok::string_literal))) { 865 Current->MustBreakBefore = true; 866 } else if (Current->is(tok::lessless) && !Current->Children.empty() && 867 Current->Parent->is(tok::string_literal) && 868 Current->Children[0].is(tok::string_literal)) { 869 Current->MustBreakBefore = true; 870 } else { 871 Current->MustBreakBefore = false; 872 } 873 Current->CanBreakBefore = 874 Current->MustBreakBefore || canBreakBefore(Line, *Current); 875 if (Current->MustBreakBefore) 876 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit; 877 else 878 Current->TotalLength = 879 Current->Parent->TotalLength + Current->FormatTok.TokenLength + 880 Current->SpacesRequiredBefore; 881 // FIXME: Only calculate this if CanBreakBefore is true once static 882 // initializers etc. are sorted out. 883 // FIXME: Move magic numbers to a better place. 884 Current->SplitPenalty = 885 20 * Current->BindingStrength + splitPenalty(Line, *Current); 886 887 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 888 } 889 } 890 891 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 892 const AnnotatedToken &Tok) { 893 const AnnotatedToken &Left = *Tok.Parent; 894 const AnnotatedToken &Right = Tok; 895 896 if (Right.Type == TT_StartOfName) { 897 if (Line.First.is(tok::kw_for)) 898 return 3; 899 else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1) 900 // FIXME: Clean up hack of using BindingStrength to find top-level names. 901 return Style.PenaltyReturnTypeOnItsOwnLine; 902 else 903 return 100; 904 } 905 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 906 return 150; 907 if (Left.is(tok::coloncolon)) 908 return 500; 909 910 if (Left.Type == TT_RangeBasedForLoopColon || 911 Left.Type == TT_InheritanceColon) 912 return 2; 913 914 if (Right.isOneOf(tok::arrow, tok::period)) { 915 if (Line.Type == LT_BuilderTypeCall) 916 return prec::PointerToMember; 917 if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen && 918 Left.MatchingParen->ParameterCount > 0) 919 return 20; // Should be smaller than breaking at a nested comma. 920 return 150; 921 } 922 923 // In for-loops, prefer breaking at ',' and ';'. 924 if (Line.First.is(tok::kw_for) && Left.is(tok::equal)) 925 return 4; 926 927 if (Left.is(tok::semi)) 928 return 0; 929 if (Left.is(tok::comma)) 930 return 1; 931 932 // In Objective-C method expressions, prefer breaking before "param:" over 933 // breaking after it. 934 if (Right.Type == TT_ObjCSelectorName) 935 return 0; 936 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 937 return 20; 938 939 if (opensScope(Left)) 940 return Left.ParameterCount > 1 ? prec::Comma : 20; 941 942 if (Right.is(tok::lessless)) { 943 if (Left.is(tok::string_literal)) { 944 StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(), 945 Left.FormatTok.TokenLength); 946 Content = Content.drop_back(1).drop_front(1).trim(); 947 if (Content.size() > 1 && 948 (Content.back() == ':' || Content.back() == '=')) 949 return 100; 950 } 951 return prec::Shift; 952 } 953 if (Left.Type == TT_ConditionalExpr) 954 return prec::Conditional; 955 prec::Level Level = getPrecedence(Left); 956 957 if (Level != prec::Unknown) 958 return Level; 959 960 return 3; 961 } 962 963 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 964 const AnnotatedToken &Left, 965 const AnnotatedToken &Right) { 966 if (Right.is(tok::hashhash)) 967 return Left.is(tok::hash); 968 if (Left.isOneOf(tok::hashhash, tok::hash)) 969 return Right.is(tok::hash); 970 if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma)) 971 return false; 972 if (Right.is(tok::less) && 973 (Left.is(tok::kw_template) || 974 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 975 return true; 976 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 977 return false; 978 if (Left.isOneOf(tok::exclaim, tok::tilde)) 979 return false; 980 if (Left.is(tok::at) && 981 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 982 tok::numeric_constant, tok::l_paren, tok::l_brace, 983 tok::kw_true, tok::kw_false)) 984 return false; 985 if (Left.is(tok::coloncolon)) 986 return false; 987 if (Right.is(tok::coloncolon)) 988 return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren); 989 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 990 return false; 991 if (Right.Type == TT_PointerOrReference) 992 return Left.FormatTok.Tok.isLiteral() || 993 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 994 !Style.PointerBindsToType); 995 if (Left.Type == TT_PointerOrReference) 996 return Right.FormatTok.Tok.isLiteral() || 997 ((Right.Type != TT_PointerOrReference) && 998 Right.isNot(tok::l_paren) && Style.PointerBindsToType); 999 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1000 return false; 1001 if (Left.is(tok::l_square)) 1002 return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square); 1003 if (Right.is(tok::r_square)) 1004 return Right.Type == TT_ObjCArrayLiteral; 1005 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 1006 return false; 1007 if (Left.is(tok::period) || Right.is(tok::period)) 1008 return false; 1009 if (Left.is(tok::colon)) 1010 return Left.Type != TT_ObjCMethodExpr; 1011 if (Right.is(tok::colon)) 1012 return Right.Type != TT_ObjCMethodExpr; 1013 if (Left.is(tok::l_paren)) 1014 return false; 1015 if (Right.is(tok::l_paren)) { 1016 return Line.Type == LT_ObjCDecl || 1017 Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, 1018 tok::kw_return, tok::kw_catch, tok::kw_new, 1019 tok::kw_delete); 1020 } 1021 if (Left.is(tok::at) && 1022 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1023 return false; 1024 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1025 return false; 1026 return true; 1027 } 1028 1029 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1030 const AnnotatedToken &Tok) { 1031 if (Tok.FormatTok.Tok.getIdentifierInfo() && 1032 Tok.Parent->FormatTok.Tok.getIdentifierInfo()) 1033 return true; // Never ever merge two identifiers. 1034 if (Line.Type == LT_ObjCMethodDecl) { 1035 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1036 return true; 1037 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1038 // Don't space between ')' and <id> 1039 return false; 1040 } 1041 if (Line.Type == LT_ObjCProperty && 1042 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1043 return false; 1044 1045 if (Tok.Parent->is(tok::comma)) 1046 return true; 1047 if (Tok.is(tok::comma)) 1048 return false; 1049 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1050 return true; 1051 if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator)) 1052 return false; 1053 if (Tok.Type == TT_OverloadedOperatorLParen) 1054 return false; 1055 if (Tok.is(tok::colon)) 1056 return !Line.First.isOneOf(tok::kw_case, tok::kw_default) && 1057 !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr; 1058 if (Tok.is(tok::l_paren) && !Tok.Children.empty() && 1059 Tok.Children[0].Type == TT_PointerOrReference && 1060 !Tok.Children[0].Children.empty() && 1061 Tok.Children[0].Children[0].isNot(tok::r_paren) && 1062 Tok.Parent->isNot(tok::l_paren)) 1063 return true; 1064 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) 1065 return false; 1066 if (Tok.Type == TT_UnaryOperator) 1067 return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) && 1068 (Tok.Parent->isNot(tok::colon) || 1069 Tok.Parent->Type != TT_ObjCMethodExpr); 1070 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1071 return Tok.Type == TT_TemplateCloser && 1072 Tok.Parent->Type == TT_TemplateCloser && 1073 Style.Standard != FormatStyle::LS_Cpp11; 1074 } 1075 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || 1076 Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar)) 1077 return false; 1078 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1079 return true; 1080 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1081 return false; 1082 if (Tok.is(tok::less) && Line.First.is(tok::hash)) 1083 return true; 1084 if (Tok.Type == TT_TrailingUnaryOperator) 1085 return false; 1086 return spaceRequiredBetween(Line, *Tok.Parent, Tok); 1087 } 1088 1089 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1090 const AnnotatedToken &Right) { 1091 const AnnotatedToken &Left = *Right.Parent; 1092 if (Right.Type == TT_StartOfName) 1093 return true; 1094 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1095 return false; 1096 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1097 return true; 1098 if (Right.Type == TT_ObjCSelectorName) 1099 return true; 1100 if (Left.ClosesTemplateDeclaration) 1101 return true; 1102 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1103 return true; 1104 if (Right.Type == TT_RangeBasedForLoopColon || 1105 Right.Type == TT_InheritanceColon) 1106 return false; 1107 if (Left.Type == TT_RangeBasedForLoopColon || 1108 Left.Type == TT_InheritanceColon) 1109 return true; 1110 if (Right.Type == TT_RangeBasedForLoopColon) 1111 return false; 1112 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1113 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || 1114 Left.isOneOf(tok::question, tok::kw_operator)) 1115 return false; 1116 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1117 return false; 1118 if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent && 1119 Left.Parent->is(tok::kw___attribute)) 1120 return false; 1121 1122 if (Right.Type == TT_LineComment) 1123 // We rely on MustBreakBefore being set correctly here as we should not 1124 // change the "binding" behavior of a comment. 1125 return false; 1126 1127 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1128 // unless it is follow by ';', '{' or '='. 1129 if (Left.is(tok::kw_const) && Left.Parent != NULL && 1130 Left.Parent->is(tok::r_paren)) 1131 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); 1132 1133 if (Right.is(tok::kw___attribute)) 1134 return true; 1135 1136 // We only break before r_brace if there was a corresponding break before 1137 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1138 if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater)) 1139 return false; 1140 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1141 return true; 1142 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) || 1143 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) || 1144 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) || 1145 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1146 Right.isOneOf(tok::identifier, tok::kw___attribute)) || 1147 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 1148 (Left.is(tok::l_square) && !Right.is(tok::r_square)); 1149 } 1150 1151 } // namespace format 1152 } // namespace clang 1153