1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "llvm/Support/Debug.h" 19 20 #define DEBUG_TYPE "format-token-annotator" 21 22 namespace clang { 23 namespace format { 24 25 namespace { 26 27 /// \brief A parser that gathers additional information about tokens. 28 /// 29 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 30 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 31 /// into template parameter lists. 32 class AnnotatingParser { 33 public: 34 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, 35 const AdditionalKeywords &Keywords) 36 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), 37 Keywords(Keywords) { 38 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); 39 resetTokenMetadata(CurrentToken); 40 } 41 42 private: 43 bool parseAngle() { 44 if (!CurrentToken) 45 return false; 46 FormatToken *Left = CurrentToken->Previous; 47 Left->ParentBracket = Contexts.back().ContextKind; 48 ScopedContextCreator ContextCreator(*this, tok::less, 10); 49 Contexts.back().IsExpression = false; 50 // If there's a template keyword before the opening angle bracket, this is a 51 // template parameter, not an argument. 52 Contexts.back().InTemplateArgument = 53 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); 54 55 if (Style.Language == FormatStyle::LK_Java && 56 CurrentToken->is(tok::question)) 57 next(); 58 59 while (CurrentToken) { 60 if (CurrentToken->is(tok::greater)) { 61 Left->MatchingParen = CurrentToken; 62 CurrentToken->MatchingParen = Left; 63 CurrentToken->Type = TT_TemplateCloser; 64 next(); 65 return true; 66 } 67 if (CurrentToken->is(tok::question) && 68 Style.Language == FormatStyle::LK_Java) { 69 next(); 70 continue; 71 } 72 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 73 tok::colon, tok::question)) 74 return false; 75 // If a && or || is found and interpreted as a binary operator, this set 76 // of angles is likely part of something like "a < b && c > d". If the 77 // angles are inside an expression, the ||/&& might also be a binary 78 // operator that was misinterpreted because we are parsing template 79 // parameters. 80 // FIXME: This is getting out of hand, write a decent parser. 81 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && 82 CurrentToken->Previous->is(TT_BinaryOperator) && 83 Contexts[Contexts.size() - 2].IsExpression && 84 Line.First->isNot(tok::kw_template)) 85 return false; 86 updateParameterCount(Left, CurrentToken); 87 if (!consumeToken()) 88 return false; 89 } 90 return false; 91 } 92 93 bool parseParens(bool LookForDecls = false) { 94 if (!CurrentToken) 95 return false; 96 FormatToken *Left = CurrentToken->Previous; 97 Left->ParentBracket = Contexts.back().ContextKind; 98 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 99 100 // FIXME: This is a bit of a hack. Do better. 101 Contexts.back().ColonIsForRangeExpr = 102 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 103 104 bool StartsObjCMethodExpr = false; 105 if (CurrentToken->is(tok::caret)) { 106 // (^ can start a block type. 107 Left->Type = TT_ObjCBlockLParen; 108 } else if (FormatToken *MaybeSel = Left->Previous) { 109 // @selector( starts a selector. 110 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && 111 MaybeSel->Previous->is(tok::at)) { 112 StartsObjCMethodExpr = true; 113 } 114 } 115 116 if (Left->Previous && 117 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, 118 tok::kw_while, tok::l_paren, tok::comma) || 119 Left->Previous->is(TT_BinaryOperator))) { 120 // static_assert, if and while usually contain expressions. 121 Contexts.back().IsExpression = true; 122 } else if (Line.InPPDirective && 123 (!Left->Previous || 124 !Left->Previous->isOneOf(tok::identifier, 125 TT_OverloadedOperator))) { 126 Contexts.back().IsExpression = true; 127 } else if (Left->Previous && Left->Previous->is(tok::r_square) && 128 Left->Previous->MatchingParen && 129 Left->Previous->MatchingParen->is(TT_LambdaLSquare)) { 130 // This is a parameter list of a lambda expression. 131 Contexts.back().IsExpression = false; 132 } else if (Contexts[Contexts.size() - 2].CaretFound) { 133 // This is the parameter list of an ObjC block. 134 Contexts.back().IsExpression = false; 135 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { 136 Left->Type = TT_AttributeParen; 137 } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { 138 // The first argument to a foreach macro is a declaration. 139 Contexts.back().IsForEachMacro = true; 140 Contexts.back().IsExpression = false; 141 } else if (Left->Previous && Left->Previous->MatchingParen && 142 Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) { 143 Contexts.back().IsExpression = false; 144 } 145 146 if (StartsObjCMethodExpr) { 147 Contexts.back().ColonIsObjCMethodExpr = true; 148 Left->Type = TT_ObjCMethodExpr; 149 } 150 151 bool MightBeFunctionType = CurrentToken->is(tok::star); 152 bool HasMultipleLines = false; 153 bool HasMultipleParametersOnALine = false; 154 bool MightBeObjCForRangeLoop = 155 Left->Previous && Left->Previous->is(tok::kw_for); 156 while (CurrentToken) { 157 // LookForDecls is set when "if (" has been seen. Check for 158 // 'identifier' '*' 'identifier' followed by not '=' -- this 159 // '*' has to be a binary operator but determineStarAmpUsage() will 160 // categorize it as an unary operator, so set the right type here. 161 if (LookForDecls && CurrentToken->Next) { 162 FormatToken *Prev = CurrentToken->getPreviousNonComment(); 163 if (Prev) { 164 FormatToken *PrevPrev = Prev->getPreviousNonComment(); 165 FormatToken *Next = CurrentToken->Next; 166 if (PrevPrev && PrevPrev->is(tok::identifier) && 167 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && 168 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { 169 Prev->Type = TT_BinaryOperator; 170 LookForDecls = false; 171 } 172 } 173 } 174 175 if (CurrentToken->Previous->is(TT_PointerOrReference) && 176 CurrentToken->Previous->Previous->isOneOf(tok::l_paren, 177 tok::coloncolon)) 178 MightBeFunctionType = true; 179 if (CurrentToken->Previous->is(TT_BinaryOperator)) 180 Contexts.back().IsExpression = true; 181 if (CurrentToken->is(tok::r_paren)) { 182 if (MightBeFunctionType && CurrentToken->Next && 183 (CurrentToken->Next->is(tok::l_paren) || 184 (CurrentToken->Next->is(tok::l_square) && 185 !Contexts.back().IsExpression))) 186 Left->Type = TT_FunctionTypeLParen; 187 Left->MatchingParen = CurrentToken; 188 CurrentToken->MatchingParen = Left; 189 190 if (StartsObjCMethodExpr) { 191 CurrentToken->Type = TT_ObjCMethodExpr; 192 if (Contexts.back().FirstObjCSelectorName) { 193 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 194 Contexts.back().LongestObjCSelectorName; 195 } 196 } 197 198 if (Left->is(TT_AttributeParen)) 199 CurrentToken->Type = TT_AttributeParen; 200 if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) 201 CurrentToken->Type = TT_JavaAnnotation; 202 if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) 203 CurrentToken->Type = TT_LeadingJavaAnnotation; 204 205 if (!HasMultipleLines) 206 Left->PackingKind = PPK_Inconclusive; 207 else if (HasMultipleParametersOnALine) 208 Left->PackingKind = PPK_BinPacked; 209 else 210 Left->PackingKind = PPK_OnePerLine; 211 212 next(); 213 return true; 214 } 215 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 216 return false; 217 218 if (CurrentToken->is(tok::l_brace)) 219 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen 220 if (CurrentToken->is(tok::comma) && CurrentToken->Next && 221 !CurrentToken->Next->HasUnescapedNewline && 222 !CurrentToken->Next->isTrailingComment()) 223 HasMultipleParametersOnALine = true; 224 if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || 225 CurrentToken->isSimpleTypeSpecifier()) 226 Contexts.back().IsExpression = false; 227 if (CurrentToken->isOneOf(tok::semi, tok::colon)) 228 MightBeObjCForRangeLoop = false; 229 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) 230 CurrentToken->Type = TT_ObjCForIn; 231 232 FormatToken *Tok = CurrentToken; 233 if (!consumeToken()) 234 return false; 235 updateParameterCount(Left, Tok); 236 if (CurrentToken && CurrentToken->HasUnescapedNewline) 237 HasMultipleLines = true; 238 } 239 return false; 240 } 241 242 bool parseSquare() { 243 if (!CurrentToken) 244 return false; 245 246 // A '[' could be an index subscript (after an identifier or after 247 // ')' or ']'), it could be the start of an Objective-C method 248 // expression, or it could the the start of an Objective-C array literal. 249 FormatToken *Left = CurrentToken->Previous; 250 Left->ParentBracket = Contexts.back().ContextKind; 251 FormatToken *Parent = Left->getPreviousNonComment(); 252 bool StartsObjCMethodExpr = 253 Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && 254 CurrentToken->isNot(tok::l_brace) && 255 (!Parent || 256 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 257 tok::kw_return, tok::kw_throw) || 258 Parent->isUnaryOperator() || 259 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || 260 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); 261 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 262 Contexts.back().IsExpression = true; 263 bool ColonFound = false; 264 265 if (StartsObjCMethodExpr) { 266 Contexts.back().ColonIsObjCMethodExpr = true; 267 Left->Type = TT_ObjCMethodExpr; 268 } else if (Parent && Parent->is(tok::at)) { 269 Left->Type = TT_ArrayInitializerLSquare; 270 } else if (Left->is(TT_Unknown)) { 271 Left->Type = TT_ArraySubscriptLSquare; 272 } 273 274 while (CurrentToken) { 275 if (CurrentToken->is(tok::r_square)) { 276 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && 277 Left->is(TT_ObjCMethodExpr)) { 278 // An ObjC method call is rarely followed by an open parenthesis. 279 // FIXME: Do we incorrectly label ":" with this? 280 StartsObjCMethodExpr = false; 281 Left->Type = TT_Unknown; 282 } 283 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { 284 CurrentToken->Type = TT_ObjCMethodExpr; 285 // determineStarAmpUsage() thinks that '*' '[' is allocating an 286 // array of pointers, but if '[' starts a selector then '*' is a 287 // binary operator. 288 if (Parent && Parent->is(TT_PointerOrReference)) 289 Parent->Type = TT_BinaryOperator; 290 } 291 Left->MatchingParen = CurrentToken; 292 CurrentToken->MatchingParen = Left; 293 if (Contexts.back().FirstObjCSelectorName) { 294 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 295 Contexts.back().LongestObjCSelectorName; 296 if (Left->BlockParameterCount > 1) 297 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; 298 } 299 next(); 300 return true; 301 } 302 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 303 return false; 304 if (CurrentToken->is(tok::colon)) { 305 if (Left->is(TT_ArraySubscriptLSquare)) { 306 Left->Type = TT_ObjCMethodExpr; 307 StartsObjCMethodExpr = true; 308 Contexts.back().ColonIsObjCMethodExpr = true; 309 if (Parent && Parent->is(tok::r_paren)) 310 Parent->Type = TT_CastRParen; 311 } 312 ColonFound = true; 313 } 314 if (CurrentToken->is(tok::comma) && 315 Style.Language != FormatStyle::LK_Proto && 316 (Left->is(TT_ArraySubscriptLSquare) || 317 (Left->is(TT_ObjCMethodExpr) && !ColonFound))) 318 Left->Type = TT_ArrayInitializerLSquare; 319 FormatToken *Tok = CurrentToken; 320 if (!consumeToken()) 321 return false; 322 updateParameterCount(Left, Tok); 323 } 324 return false; 325 } 326 327 bool parseBrace() { 328 if (CurrentToken) { 329 FormatToken *Left = CurrentToken->Previous; 330 Left->ParentBracket = Contexts.back().ContextKind; 331 332 if (Contexts.back().CaretFound) 333 Left->Type = TT_ObjCBlockLBrace; 334 Contexts.back().CaretFound = false; 335 336 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 337 Contexts.back().ColonIsDictLiteral = true; 338 if (Left->BlockKind == BK_BracedInit) 339 Contexts.back().IsExpression = true; 340 341 while (CurrentToken) { 342 if (CurrentToken->is(tok::r_brace)) { 343 Left->MatchingParen = CurrentToken; 344 CurrentToken->MatchingParen = Left; 345 next(); 346 return true; 347 } 348 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 349 return false; 350 updateParameterCount(Left, CurrentToken); 351 if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { 352 FormatToken *Previous = CurrentToken->getPreviousNonComment(); 353 if ((CurrentToken->is(tok::colon) || 354 Style.Language == FormatStyle::LK_Proto) && 355 Previous->is(tok::identifier)) 356 Previous->Type = TT_SelectorName; 357 if (CurrentToken->is(tok::colon) || 358 Style.Language == FormatStyle::LK_JavaScript) 359 Left->Type = TT_DictLiteral; 360 } 361 if (!consumeToken()) 362 return false; 363 } 364 } 365 return true; 366 } 367 368 void updateParameterCount(FormatToken *Left, FormatToken *Current) { 369 if (Current->is(TT_LambdaLSquare) || 370 (Current->is(tok::caret) && Current->is(TT_UnaryOperator)) || 371 (Style.Language == FormatStyle::LK_JavaScript && 372 Current->is(Keywords.kw_function))) { 373 ++Left->BlockParameterCount; 374 } 375 if (Current->is(tok::comma)) { 376 ++Left->ParameterCount; 377 if (!Left->Role) 378 Left->Role.reset(new CommaSeparatedList(Style)); 379 Left->Role->CommaFound(Current); 380 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { 381 Left->ParameterCount = 1; 382 } 383 } 384 385 bool parseConditional() { 386 while (CurrentToken) { 387 if (CurrentToken->is(tok::colon)) { 388 CurrentToken->Type = TT_ConditionalExpr; 389 next(); 390 return true; 391 } 392 if (!consumeToken()) 393 return false; 394 } 395 return false; 396 } 397 398 bool parseTemplateDeclaration() { 399 if (CurrentToken && CurrentToken->is(tok::less)) { 400 CurrentToken->Type = TT_TemplateOpener; 401 next(); 402 if (!parseAngle()) 403 return false; 404 if (CurrentToken) 405 CurrentToken->Previous->ClosesTemplateDeclaration = true; 406 return true; 407 } 408 return false; 409 } 410 411 bool consumeToken() { 412 FormatToken *Tok = CurrentToken; 413 next(); 414 switch (Tok->Tok.getKind()) { 415 case tok::plus: 416 case tok::minus: 417 if (!Tok->Previous && Line.MustBeDeclaration) 418 Tok->Type = TT_ObjCMethodSpecifier; 419 break; 420 case tok::colon: 421 if (!Tok->Previous) 422 return false; 423 // Colons from ?: are handled in parseConditional(). 424 if (Style.Language == FormatStyle::LK_JavaScript) { 425 if (Contexts.back().ColonIsForRangeExpr || 426 (Contexts.size() == 1 && 427 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) || 428 Contexts.back().ContextKind == tok::l_paren || 429 Contexts.back().ContextKind == tok::l_square) { 430 Tok->Type = TT_JsTypeColon; 431 break; 432 } 433 } 434 if (Contexts.back().ColonIsDictLiteral) { 435 Tok->Type = TT_DictLiteral; 436 } else if (Contexts.back().ColonIsObjCMethodExpr || 437 Line.First->is(TT_ObjCMethodSpecifier)) { 438 Tok->Type = TT_ObjCMethodExpr; 439 Tok->Previous->Type = TT_SelectorName; 440 if (Tok->Previous->ColumnWidth > 441 Contexts.back().LongestObjCSelectorName) { 442 Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; 443 } 444 if (!Contexts.back().FirstObjCSelectorName) 445 Contexts.back().FirstObjCSelectorName = Tok->Previous; 446 } else if (Contexts.back().ColonIsForRangeExpr) { 447 Tok->Type = TT_RangeBasedForLoopColon; 448 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { 449 Tok->Type = TT_BitFieldColon; 450 } else if (Contexts.size() == 1 && 451 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { 452 if (Tok->Previous->is(tok::r_paren)) 453 Tok->Type = TT_CtorInitializerColon; 454 else 455 Tok->Type = TT_InheritanceColon; 456 } else if (Tok->Previous->is(tok::identifier) && Tok->Next && 457 Tok->Next->isOneOf(tok::r_paren, tok::comma)) { 458 // This handles a special macro in ObjC code where selectors including 459 // the colon are passed as macro arguments. 460 Tok->Type = TT_ObjCMethodExpr; 461 } else if (Contexts.back().ContextKind == tok::l_paren) { 462 Tok->Type = TT_InlineASMColon; 463 } 464 break; 465 case tok::kw_if: 466 case tok::kw_while: 467 if (CurrentToken && CurrentToken->is(tok::l_paren)) { 468 next(); 469 if (!parseParens(/*LookForDecls=*/true)) 470 return false; 471 } 472 break; 473 case tok::kw_for: 474 Contexts.back().ColonIsForRangeExpr = true; 475 next(); 476 if (!parseParens()) 477 return false; 478 break; 479 case tok::l_paren: 480 if (!parseParens()) 481 return false; 482 if (Line.MustBeDeclaration && Contexts.size() == 1 && 483 !Contexts.back().IsExpression && Line.First->isNot(TT_ObjCProperty) && 484 (!Tok->Previous || 485 !Tok->Previous->isOneOf(tok::kw_decltype, TT_LeadingJavaAnnotation))) 486 Line.MightBeFunctionDecl = true; 487 break; 488 case tok::l_square: 489 if (!parseSquare()) 490 return false; 491 break; 492 case tok::l_brace: 493 if (!parseBrace()) 494 return false; 495 break; 496 case tok::less: 497 if ((!Tok->Previous || 498 (!Tok->Previous->Tok.isLiteral() && 499 !(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) && 500 parseAngle()) { 501 Tok->Type = TT_TemplateOpener; 502 } else { 503 Tok->Type = TT_BinaryOperator; 504 CurrentToken = Tok; 505 next(); 506 } 507 break; 508 case tok::r_paren: 509 case tok::r_square: 510 return false; 511 case tok::r_brace: 512 // Lines can start with '}'. 513 if (Tok->Previous) 514 return false; 515 break; 516 case tok::greater: 517 Tok->Type = TT_BinaryOperator; 518 break; 519 case tok::kw_operator: 520 while (CurrentToken && 521 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { 522 if (CurrentToken->isOneOf(tok::star, tok::amp)) 523 CurrentToken->Type = TT_PointerOrReference; 524 consumeToken(); 525 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator)) 526 CurrentToken->Previous->Type = TT_OverloadedOperator; 527 } 528 if (CurrentToken) { 529 CurrentToken->Type = TT_OverloadedOperatorLParen; 530 if (CurrentToken->Previous->is(TT_BinaryOperator)) 531 CurrentToken->Previous->Type = TT_OverloadedOperator; 532 } 533 break; 534 case tok::question: 535 if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next && 536 Tok->Next->isOneOf(tok::colon, tok::semi, tok::r_paren, 537 tok::r_brace)) { 538 // Question marks before semicolons, colons, commas, etc. indicate 539 // optional types (fields, parameters), e.g. 540 // `function(x?: string, y?) {...}` or `class X {y?;}` 541 Tok->Type = TT_JsTypeOptionalQuestion; 542 break; 543 } 544 parseConditional(); 545 break; 546 case tok::kw_template: 547 parseTemplateDeclaration(); 548 break; 549 case tok::comma: 550 if (Contexts.back().InCtorInitializer) 551 Tok->Type = TT_CtorInitializerComma; 552 else if (Contexts.back().FirstStartOfName && Contexts.size() == 1) { 553 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 554 Line.IsMultiVariableDeclStmt = true; 555 } 556 if (Contexts.back().IsForEachMacro) 557 Contexts.back().IsExpression = true; 558 break; 559 default: 560 break; 561 } 562 return true; 563 } 564 565 void parseIncludeDirective() { 566 if (CurrentToken && CurrentToken->is(tok::less)) { 567 next(); 568 while (CurrentToken) { 569 if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) 570 CurrentToken->Type = TT_ImplicitStringLiteral; 571 next(); 572 } 573 } 574 } 575 576 void parseWarningOrError() { 577 next(); 578 // We still want to format the whitespace left of the first token of the 579 // warning or error. 580 next(); 581 while (CurrentToken) { 582 CurrentToken->Type = TT_ImplicitStringLiteral; 583 next(); 584 } 585 } 586 587 void parsePragma() { 588 next(); // Consume "pragma". 589 if (CurrentToken && 590 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) { 591 bool IsMark = CurrentToken->is(Keywords.kw_mark); 592 next(); // Consume "mark". 593 next(); // Consume first token (so we fix leading whitespace). 594 while (CurrentToken) { 595 if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) 596 CurrentToken->Type = TT_ImplicitStringLiteral; 597 next(); 598 } 599 } 600 } 601 602 LineType parsePreprocessorDirective() { 603 LineType Type = LT_PreprocessorDirective; 604 next(); 605 if (!CurrentToken) 606 return Type; 607 if (CurrentToken->Tok.is(tok::numeric_constant)) { 608 CurrentToken->SpacesRequiredBefore = 1; 609 return Type; 610 } 611 // Hashes in the middle of a line can lead to any strange token 612 // sequence. 613 if (!CurrentToken->Tok.getIdentifierInfo()) 614 return Type; 615 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { 616 case tok::pp_include: 617 case tok::pp_import: 618 next(); 619 parseIncludeDirective(); 620 Type = LT_ImportStatement; 621 break; 622 case tok::pp_error: 623 case tok::pp_warning: 624 parseWarningOrError(); 625 break; 626 case tok::pp_pragma: 627 parsePragma(); 628 break; 629 case tok::pp_if: 630 case tok::pp_elif: 631 Contexts.back().IsExpression = true; 632 parseLine(); 633 break; 634 default: 635 break; 636 } 637 while (CurrentToken) 638 next(); 639 return Type; 640 } 641 642 public: 643 LineType parseLine() { 644 if (CurrentToken->is(tok::hash)) 645 return parsePreprocessorDirective(); 646 647 // Directly allow to 'import <string-literal>' to support protocol buffer 648 // definitions (code.google.com/p/protobuf) or missing "#" (either way we 649 // should not break the line). 650 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); 651 if ((Style.Language == FormatStyle::LK_Java && 652 CurrentToken->is(Keywords.kw_package)) || 653 (Info && Info->getPPKeywordID() == tok::pp_import && 654 CurrentToken->Next && 655 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier, 656 tok::kw_static))) { 657 next(); 658 parseIncludeDirective(); 659 return LT_ImportStatement; 660 } 661 662 // If this line starts and ends in '<' and '>', respectively, it is likely 663 // part of "#define <a/b.h>". 664 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { 665 parseIncludeDirective(); 666 return LT_ImportStatement; 667 } 668 669 // In .proto files, top-level options are very similar to import statements 670 // and should not be line-wrapped. 671 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 && 672 CurrentToken->is(Keywords.kw_option)) { 673 next(); 674 if (CurrentToken && CurrentToken->is(tok::identifier)) 675 return LT_ImportStatement; 676 } 677 678 bool KeywordVirtualFound = false; 679 bool ImportStatement = false; 680 while (CurrentToken) { 681 if (CurrentToken->is(tok::kw_virtual)) 682 KeywordVirtualFound = true; 683 if (IsImportStatement(*CurrentToken)) 684 ImportStatement = true; 685 if (!consumeToken()) 686 return LT_Invalid; 687 } 688 if (KeywordVirtualFound) 689 return LT_VirtualFunctionDecl; 690 if (ImportStatement) 691 return LT_ImportStatement; 692 693 if (Line.First->is(TT_ObjCMethodSpecifier)) { 694 if (Contexts.back().FirstObjCSelectorName) 695 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 696 Contexts.back().LongestObjCSelectorName; 697 return LT_ObjCMethodDecl; 698 } 699 700 return LT_Other; 701 } 702 703 private: 704 bool IsImportStatement(const FormatToken &Tok) { 705 // FIXME: Closure-library specific stuff should not be hard-coded but be 706 // configurable. 707 return Style.Language == FormatStyle::LK_JavaScript && 708 Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && 709 Tok.Next->Next && (Tok.Next->Next->TokenText == "module" || 710 Tok.Next->Next->TokenText == "require" || 711 Tok.Next->Next->TokenText == "provide") && 712 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); 713 } 714 715 void resetTokenMetadata(FormatToken *Token) { 716 if (!Token) 717 return; 718 719 // Reset token type in case we have already looked at it and then 720 // recovered from an error (e.g. failure to find the matching >). 721 if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, 722 TT_FunctionLBrace, TT_ImplicitStringLiteral, 723 TT_RegexLiteral, TT_TrailingReturnArrow)) 724 CurrentToken->Type = TT_Unknown; 725 CurrentToken->Role.reset(); 726 CurrentToken->MatchingParen = nullptr; 727 CurrentToken->FakeLParens.clear(); 728 CurrentToken->FakeRParens = 0; 729 } 730 731 void next() { 732 if (CurrentToken) { 733 CurrentToken->NestingLevel = Contexts.size() - 1; 734 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 735 modifyContext(*CurrentToken); 736 determineTokenType(*CurrentToken); 737 CurrentToken = CurrentToken->Next; 738 } 739 740 resetTokenMetadata(CurrentToken); 741 } 742 743 /// \brief A struct to hold information valid in a specific context, e.g. 744 /// a pair of parenthesis. 745 struct Context { 746 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 747 bool IsExpression) 748 : ContextKind(ContextKind), BindingStrength(BindingStrength), 749 IsExpression(IsExpression) {} 750 751 tok::TokenKind ContextKind; 752 unsigned BindingStrength; 753 bool IsExpression; 754 unsigned LongestObjCSelectorName = 0; 755 bool ColonIsForRangeExpr = false; 756 bool ColonIsDictLiteral = false; 757 bool ColonIsObjCMethodExpr = false; 758 FormatToken *FirstObjCSelectorName = nullptr; 759 FormatToken *FirstStartOfName = nullptr; 760 bool CanBeExpression = true; 761 bool InTemplateArgument = false; 762 bool InCtorInitializer = false; 763 bool CaretFound = false; 764 bool IsForEachMacro = false; 765 }; 766 767 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 768 /// of each instance. 769 struct ScopedContextCreator { 770 AnnotatingParser &P; 771 772 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 773 unsigned Increase) 774 : P(P) { 775 P.Contexts.push_back(Context(ContextKind, 776 P.Contexts.back().BindingStrength + Increase, 777 P.Contexts.back().IsExpression)); 778 } 779 780 ~ScopedContextCreator() { P.Contexts.pop_back(); } 781 }; 782 783 void modifyContext(const FormatToken &Current) { 784 if (Current.getPrecedence() == prec::Assignment && 785 !Line.First->isOneOf(tok::kw_template, tok::kw_using) && 786 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { 787 Contexts.back().IsExpression = true; 788 if (!Line.First->is(TT_UnaryOperator)) { 789 for (FormatToken *Previous = Current.Previous; 790 Previous && !Previous->isOneOf(tok::comma, tok::semi); 791 Previous = Previous->Previous) { 792 if (Previous->isOneOf(tok::r_square, tok::r_paren)) { 793 Previous = Previous->MatchingParen; 794 if (!Previous) 795 break; 796 } 797 if (Previous->opensScope()) 798 break; 799 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && 800 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && 801 Previous->Previous && Previous->Previous->isNot(tok::equal)) 802 Previous->Type = TT_PointerOrReference; 803 } 804 } 805 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { 806 Contexts.back().IsExpression = true; 807 } else if (Current.is(TT_TrailingReturnArrow)) { 808 Contexts.back().IsExpression = false; 809 } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 810 !Line.InPPDirective && 811 (!Current.Previous || 812 Current.Previous->isNot(tok::kw_decltype))) { 813 bool ParametersOfFunctionType = 814 Current.Previous && Current.Previous->is(tok::r_paren) && 815 Current.Previous->MatchingParen && 816 Current.Previous->MatchingParen->is(TT_FunctionTypeLParen); 817 bool IsForOrCatch = Current.Previous && 818 Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); 819 Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; 820 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 821 for (FormatToken *Previous = Current.Previous; 822 Previous && Previous->isOneOf(tok::star, tok::amp); 823 Previous = Previous->Previous) 824 Previous->Type = TT_PointerOrReference; 825 if (Line.MustBeDeclaration) 826 Contexts.back().IsExpression = Contexts.front().InCtorInitializer; 827 } else if (Current.Previous && 828 Current.Previous->is(TT_CtorInitializerColon)) { 829 Contexts.back().IsExpression = true; 830 Contexts.back().InCtorInitializer = true; 831 } else if (Current.is(tok::kw_new)) { 832 Contexts.back().CanBeExpression = false; 833 } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) { 834 // This should be the condition or increment in a for-loop. 835 Contexts.back().IsExpression = true; 836 } 837 } 838 839 void determineTokenType(FormatToken &Current) { 840 if (!Current.is(TT_Unknown)) 841 // The token type is already known. 842 return; 843 844 // Line.MightBeFunctionDecl can only be true after the parentheses of a 845 // function declaration have been found. In this case, 'Current' is a 846 // trailing token of this declaration and thus cannot be a name. 847 if (Current.is(Keywords.kw_instanceof)) { 848 Current.Type = TT_BinaryOperator; 849 } else if (isStartOfName(Current) && 850 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { 851 Contexts.back().FirstStartOfName = &Current; 852 Current.Type = TT_StartOfName; 853 } else if (Current.is(tok::kw_auto)) { 854 AutoFound = true; 855 } else if (Current.is(tok::arrow) && 856 Style.Language == FormatStyle::LK_Java) { 857 Current.Type = TT_LambdaArrow; 858 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && 859 Current.NestingLevel == 0) { 860 Current.Type = TT_TrailingReturnArrow; 861 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 862 Current.Type = 863 determineStarAmpUsage(Current, Contexts.back().CanBeExpression && 864 Contexts.back().IsExpression, 865 Contexts.back().InTemplateArgument); 866 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 867 Current.Type = determinePlusMinusCaretUsage(Current); 868 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) 869 Contexts.back().CaretFound = true; 870 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 871 Current.Type = determineIncrementUsage(Current); 872 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { 873 Current.Type = TT_UnaryOperator; 874 } else if (Current.is(tok::question)) { 875 Current.Type = TT_ConditionalExpr; 876 } else if (Current.isBinaryOperator() && 877 (!Current.Previous || Current.Previous->isNot(tok::l_square))) { 878 Current.Type = TT_BinaryOperator; 879 } else if (Current.is(tok::comment)) { 880 Current.Type = 881 Current.TokenText.startswith("/*") ? TT_BlockComment : TT_LineComment; 882 } else if (Current.is(tok::r_paren)) { 883 if (rParenEndsCast(Current)) 884 Current.Type = TT_CastRParen; 885 } else if (Current.is(tok::at) && Current.Next) { 886 switch (Current.Next->Tok.getObjCKeywordID()) { 887 case tok::objc_interface: 888 case tok::objc_implementation: 889 case tok::objc_protocol: 890 Current.Type = TT_ObjCDecl; 891 break; 892 case tok::objc_property: 893 Current.Type = TT_ObjCProperty; 894 break; 895 default: 896 break; 897 } 898 } else if (Current.is(tok::period)) { 899 FormatToken *PreviousNoComment = Current.getPreviousNonComment(); 900 if (PreviousNoComment && 901 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) 902 Current.Type = TT_DesignatedInitializerPeriod; 903 else if (Style.Language == FormatStyle::LK_Java && Current.Previous && 904 Current.Previous->isOneOf(TT_JavaAnnotation, 905 TT_LeadingJavaAnnotation)) { 906 Current.Type = Current.Previous->Type; 907 } 908 } else if (Current.isOneOf(tok::identifier, tok::kw_const) && 909 Current.Previous && 910 !Current.Previous->isOneOf(tok::equal, tok::at) && 911 Line.MightBeFunctionDecl && Contexts.size() == 1) { 912 // Line.MightBeFunctionDecl can only be true after the parentheses of a 913 // function declaration have been found. 914 Current.Type = TT_TrailingAnnotation; 915 } else if ((Style.Language == FormatStyle::LK_Java || 916 Style.Language == FormatStyle::LK_JavaScript) && 917 Current.Previous) { 918 if (Current.Previous->is(tok::at) && 919 Current.isNot(Keywords.kw_interface)) { 920 const FormatToken &AtToken = *Current.Previous; 921 const FormatToken *Previous = AtToken.getPreviousNonComment(); 922 if (!Previous || Previous->is(TT_LeadingJavaAnnotation)) 923 Current.Type = TT_LeadingJavaAnnotation; 924 else 925 Current.Type = TT_JavaAnnotation; 926 } else if (Current.Previous->is(tok::period) && 927 Current.Previous->isOneOf(TT_JavaAnnotation, 928 TT_LeadingJavaAnnotation)) { 929 Current.Type = Current.Previous->Type; 930 } 931 } 932 } 933 934 /// \brief Take a guess at whether \p Tok starts a name of a function or 935 /// variable declaration. 936 /// 937 /// This is a heuristic based on whether \p Tok is an identifier following 938 /// something that is likely a type. 939 bool isStartOfName(const FormatToken &Tok) { 940 if (Tok.isNot(tok::identifier) || !Tok.Previous) 941 return false; 942 943 if (Tok.Previous->is(TT_LeadingJavaAnnotation)) 944 return false; 945 946 // Skip "const" as it does not have an influence on whether this is a name. 947 FormatToken *PreviousNotConst = Tok.Previous; 948 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) 949 PreviousNotConst = PreviousNotConst->Previous; 950 951 if (!PreviousNotConst) 952 return false; 953 954 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && 955 PreviousNotConst->Previous && 956 PreviousNotConst->Previous->is(tok::hash); 957 958 if (PreviousNotConst->is(TT_TemplateCloser)) 959 return PreviousNotConst && PreviousNotConst->MatchingParen && 960 PreviousNotConst->MatchingParen->Previous && 961 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && 962 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); 963 964 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && 965 PreviousNotConst->MatchingParen->Previous && 966 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) 967 return true; 968 969 return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || 970 PreviousNotConst->is(TT_PointerOrReference) || 971 PreviousNotConst->isSimpleTypeSpecifier(); 972 } 973 974 /// \brief Determine whether ')' is ending a cast. 975 bool rParenEndsCast(const FormatToken &Tok) { 976 FormatToken *LeftOfParens = nullptr; 977 if (Tok.MatchingParen) 978 LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); 979 if (LeftOfParens && LeftOfParens->is(tok::r_paren) && 980 LeftOfParens->MatchingParen) 981 LeftOfParens = LeftOfParens->MatchingParen->Previous; 982 if (LeftOfParens && LeftOfParens->is(tok::r_square) && 983 LeftOfParens->MatchingParen && 984 LeftOfParens->MatchingParen->is(TT_LambdaLSquare)) 985 return false; 986 if (Tok.Next) { 987 if (Tok.Next->is(tok::question)) 988 return false; 989 if (Style.Language == FormatStyle::LK_JavaScript && 990 Tok.Next->is(Keywords.kw_in)) 991 return false; 992 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) 993 return true; 994 } 995 bool IsCast = false; 996 bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen; 997 bool ParensAreType = 998 !Tok.Previous || 999 Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) || 1000 Tok.Previous->isSimpleTypeSpecifier(); 1001 bool ParensCouldEndDecl = 1002 Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); 1003 bool IsSizeOfOrAlignOf = 1004 LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); 1005 if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 1006 (Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression)) 1007 IsCast = true; 1008 else if (Tok.Next && Tok.Next->isNot(tok::string_literal) && 1009 (Tok.Next->Tok.isLiteral() || 1010 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) 1011 IsCast = true; 1012 // If there is an identifier after the (), it is likely a cast, unless 1013 // there is also an identifier before the (). 1014 else if (LeftOfParens && Tok.Next && 1015 (LeftOfParens->Tok.getIdentifierInfo() == nullptr || 1016 LeftOfParens->is(tok::kw_return)) && 1017 !LeftOfParens->isOneOf(TT_OverloadedOperator, tok::at, 1018 TT_TemplateCloser)) { 1019 if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) { 1020 IsCast = true; 1021 } else { 1022 // Use heuristics to recognize c style casting. 1023 FormatToken *Prev = Tok.Previous; 1024 if (Prev && Prev->isOneOf(tok::amp, tok::star)) 1025 Prev = Prev->Previous; 1026 1027 if (Prev && Tok.Next && Tok.Next->Next) { 1028 bool NextIsUnary = Tok.Next->isUnaryOperator() || 1029 Tok.Next->isOneOf(tok::amp, tok::star); 1030 IsCast = 1031 NextIsUnary && !Tok.Next->is(tok::plus) && 1032 Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant); 1033 } 1034 1035 for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) { 1036 if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) { 1037 IsCast = false; 1038 break; 1039 } 1040 } 1041 } 1042 } 1043 return IsCast && !ParensAreEmpty; 1044 } 1045 1046 /// \brief Return the type of the given token assuming it is * or &. 1047 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, 1048 bool InTemplateArgument) { 1049 if (Style.Language == FormatStyle::LK_JavaScript) 1050 return TT_BinaryOperator; 1051 1052 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1053 if (!PrevToken) 1054 return TT_UnaryOperator; 1055 1056 const FormatToken *NextToken = Tok.getNextNonComment(); 1057 if (!NextToken || 1058 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) 1059 return TT_Unknown; 1060 1061 if (PrevToken->is(tok::coloncolon)) 1062 return TT_PointerOrReference; 1063 1064 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 1065 tok::comma, tok::semi, tok::kw_return, tok::colon, 1066 tok::equal, tok::kw_delete, tok::kw_sizeof) || 1067 PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, 1068 TT_UnaryOperator, TT_CastRParen)) 1069 return TT_UnaryOperator; 1070 1071 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) 1072 return TT_PointerOrReference; 1073 if (NextToken->isOneOf(tok::kw_operator, tok::comma, tok::semi)) 1074 return TT_PointerOrReference; 1075 1076 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && 1077 PrevToken->MatchingParen->Previous && 1078 PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof, 1079 tok::kw_decltype)) 1080 return TT_PointerOrReference; 1081 1082 if (PrevToken->Tok.isLiteral() || 1083 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, 1084 tok::kw_false, tok::r_brace) || 1085 NextToken->Tok.isLiteral() || 1086 NextToken->isOneOf(tok::kw_true, tok::kw_false) || 1087 NextToken->isUnaryOperator() || 1088 // If we know we're in a template argument, there are no named 1089 // declarations. Thus, having an identifier on the right-hand side 1090 // indicates a binary operator. 1091 (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) 1092 return TT_BinaryOperator; 1093 1094 // "&&(" is quite unlikely to be two successive unary "&". 1095 if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren)) 1096 return TT_BinaryOperator; 1097 1098 // This catches some cases where evaluation order is used as control flow: 1099 // aaa && aaa->f(); 1100 const FormatToken *NextNextToken = NextToken->getNextNonComment(); 1101 if (NextNextToken && NextNextToken->is(tok::arrow)) 1102 return TT_BinaryOperator; 1103 1104 // It is very unlikely that we are going to find a pointer or reference type 1105 // definition on the RHS of an assignment. 1106 if (IsExpression && !Contexts.back().CaretFound) 1107 return TT_BinaryOperator; 1108 1109 return TT_PointerOrReference; 1110 } 1111 1112 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { 1113 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1114 if (!PrevToken || PrevToken->is(TT_CastRParen)) 1115 return TT_UnaryOperator; 1116 1117 // Use heuristics to recognize unary operators. 1118 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 1119 tok::question, tok::colon, tok::kw_return, 1120 tok::kw_case, tok::at, tok::l_brace)) 1121 return TT_UnaryOperator; 1122 1123 // There can't be two consecutive binary operators. 1124 if (PrevToken->is(TT_BinaryOperator)) 1125 return TT_UnaryOperator; 1126 1127 // Fall back to marking the token as binary operator. 1128 return TT_BinaryOperator; 1129 } 1130 1131 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1132 TokenType determineIncrementUsage(const FormatToken &Tok) { 1133 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1134 if (!PrevToken || PrevToken->is(TT_CastRParen)) 1135 return TT_UnaryOperator; 1136 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 1137 return TT_TrailingUnaryOperator; 1138 1139 return TT_UnaryOperator; 1140 } 1141 1142 SmallVector<Context, 8> Contexts; 1143 1144 const FormatStyle &Style; 1145 AnnotatedLine &Line; 1146 FormatToken *CurrentToken; 1147 bool AutoFound; 1148 const AdditionalKeywords &Keywords; 1149 }; 1150 1151 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1; 1152 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 1153 1154 /// \brief Parses binary expressions by inserting fake parenthesis based on 1155 /// operator precedence. 1156 class ExpressionParser { 1157 public: 1158 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, 1159 AnnotatedLine &Line) 1160 : Style(Style), Keywords(Keywords), Current(Line.First) {} 1161 1162 /// \brief Parse expressions with the given operatore precedence. 1163 void parse(int Precedence = 0) { 1164 // Skip 'return' and ObjC selector colons as they are not part of a binary 1165 // expression. 1166 while (Current && (Current->is(tok::kw_return) || 1167 (Current->is(tok::colon) && 1168 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) 1169 next(); 1170 1171 if (!Current || Precedence > PrecedenceArrowAndPeriod) 1172 return; 1173 1174 // Conditional expressions need to be parsed separately for proper nesting. 1175 if (Precedence == prec::Conditional) { 1176 parseConditionalExpr(); 1177 return; 1178 } 1179 1180 // Parse unary operators, which all have a higher precedence than binary 1181 // operators. 1182 if (Precedence == PrecedenceUnaryOperator) { 1183 parseUnaryOperator(); 1184 return; 1185 } 1186 1187 FormatToken *Start = Current; 1188 FormatToken *LatestOperator = nullptr; 1189 unsigned OperatorIndex = 0; 1190 1191 while (Current) { 1192 // Consume operators with higher precedence. 1193 parse(Precedence + 1); 1194 1195 int CurrentPrecedence = getCurrentPrecedence(); 1196 1197 if (Current && Current->is(TT_SelectorName) && 1198 Precedence == CurrentPrecedence) { 1199 if (LatestOperator) 1200 addFakeParenthesis(Start, prec::Level(Precedence)); 1201 Start = Current; 1202 } 1203 1204 // At the end of the line or when an operator with higher precedence is 1205 // found, insert fake parenthesis and return. 1206 if (!Current || (Current->closesScope() && Current->MatchingParen) || 1207 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || 1208 (CurrentPrecedence == prec::Conditional && 1209 Precedence == prec::Assignment && Current->is(tok::colon))) { 1210 break; 1211 } 1212 1213 // Consume scopes: (), [], <> and {} 1214 if (Current->opensScope()) { 1215 while (Current && !Current->closesScope()) { 1216 next(); 1217 parse(); 1218 } 1219 next(); 1220 } else { 1221 // Operator found. 1222 if (CurrentPrecedence == Precedence) { 1223 LatestOperator = Current; 1224 Current->OperatorIndex = OperatorIndex; 1225 ++OperatorIndex; 1226 } 1227 next(/*SkipPastLeadingComments=*/Precedence > 0); 1228 } 1229 } 1230 1231 if (LatestOperator && (Current || Precedence > 0)) { 1232 LatestOperator->LastOperator = true; 1233 if (Precedence == PrecedenceArrowAndPeriod) { 1234 // Call expressions don't have a binary operator precedence. 1235 addFakeParenthesis(Start, prec::Unknown); 1236 } else { 1237 addFakeParenthesis(Start, prec::Level(Precedence)); 1238 } 1239 } 1240 } 1241 1242 private: 1243 /// \brief Gets the precedence (+1) of the given token for binary operators 1244 /// and other tokens that we treat like binary operators. 1245 int getCurrentPrecedence() { 1246 if (Current) { 1247 const FormatToken *NextNonComment = Current->getNextNonComment(); 1248 if (Current->is(TT_ConditionalExpr)) 1249 return prec::Conditional; 1250 else if (NextNonComment && NextNonComment->is(tok::colon) && 1251 NextNonComment->is(TT_DictLiteral)) 1252 return prec::Comma; 1253 else if (Current->is(TT_LambdaArrow)) 1254 return prec::Comma; 1255 else if (Current->isOneOf(tok::semi, TT_InlineASMColon, 1256 TT_SelectorName) || 1257 (Current->is(tok::comment) && NextNonComment && 1258 NextNonComment->is(TT_SelectorName))) 1259 return 0; 1260 else if (Current->is(TT_RangeBasedForLoopColon)) 1261 return prec::Comma; 1262 else if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) 1263 return Current->getPrecedence(); 1264 else if (Current->isOneOf(tok::period, tok::arrow)) 1265 return PrecedenceArrowAndPeriod; 1266 else if (Style.Language == FormatStyle::LK_Java && 1267 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, 1268 Keywords.kw_throws)) 1269 return 0; 1270 } 1271 return -1; 1272 } 1273 1274 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { 1275 Start->FakeLParens.push_back(Precedence); 1276 if (Precedence > prec::Unknown) 1277 Start->StartsBinaryExpression = true; 1278 if (Current) { 1279 FormatToken *Previous = Current->Previous; 1280 while (Previous->is(tok::comment) && Previous->Previous) 1281 Previous = Previous->Previous; 1282 ++Previous->FakeRParens; 1283 if (Precedence > prec::Unknown) 1284 Previous->EndsBinaryExpression = true; 1285 } 1286 } 1287 1288 /// \brief Parse unary operator expressions and surround them with fake 1289 /// parentheses if appropriate. 1290 void parseUnaryOperator() { 1291 if (!Current || Current->isNot(TT_UnaryOperator)) { 1292 parse(PrecedenceArrowAndPeriod); 1293 return; 1294 } 1295 1296 FormatToken *Start = Current; 1297 next(); 1298 parseUnaryOperator(); 1299 1300 // The actual precedence doesn't matter. 1301 addFakeParenthesis(Start, prec::Unknown); 1302 } 1303 1304 void parseConditionalExpr() { 1305 while (Current && Current->isTrailingComment()) { 1306 next(); 1307 } 1308 FormatToken *Start = Current; 1309 parse(prec::LogicalOr); 1310 if (!Current || !Current->is(tok::question)) 1311 return; 1312 next(); 1313 parse(prec::Assignment); 1314 if (!Current || Current->isNot(TT_ConditionalExpr)) 1315 return; 1316 next(); 1317 parse(prec::Assignment); 1318 addFakeParenthesis(Start, prec::Conditional); 1319 } 1320 1321 void next(bool SkipPastLeadingComments = true) { 1322 if (Current) 1323 Current = Current->Next; 1324 while (Current && 1325 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && 1326 Current->isTrailingComment()) 1327 Current = Current->Next; 1328 } 1329 1330 const FormatStyle &Style; 1331 const AdditionalKeywords &Keywords; 1332 FormatToken *Current; 1333 }; 1334 1335 } // end anonymous namespace 1336 1337 void TokenAnnotator::setCommentLineLevels( 1338 SmallVectorImpl<AnnotatedLine *> &Lines) { 1339 const AnnotatedLine *NextNonCommentLine = nullptr; 1340 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), 1341 E = Lines.rend(); 1342 I != E; ++I) { 1343 if (NextNonCommentLine && (*I)->First->is(tok::comment) && 1344 (*I)->First->Next == nullptr) 1345 (*I)->Level = NextNonCommentLine->Level; 1346 else 1347 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; 1348 1349 setCommentLineLevels((*I)->Children); 1350 } 1351 } 1352 1353 void TokenAnnotator::annotate(AnnotatedLine &Line) { 1354 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1355 E = Line.Children.end(); 1356 I != E; ++I) { 1357 annotate(**I); 1358 } 1359 AnnotatingParser Parser(Style, Line, Keywords); 1360 Line.Type = Parser.parseLine(); 1361 if (Line.Type == LT_Invalid) 1362 return; 1363 1364 ExpressionParser ExprParser(Style, Keywords, Line); 1365 ExprParser.parse(); 1366 1367 if (Line.First->is(TT_ObjCMethodSpecifier)) 1368 Line.Type = LT_ObjCMethodDecl; 1369 else if (Line.First->is(TT_ObjCDecl)) 1370 Line.Type = LT_ObjCDecl; 1371 else if (Line.First->is(TT_ObjCProperty)) 1372 Line.Type = LT_ObjCProperty; 1373 1374 Line.First->SpacesRequiredBefore = 1; 1375 Line.First->CanBreakBefore = Line.First->MustBreakBefore; 1376 } 1377 1378 // This function heuristically determines whether 'Current' starts the name of a 1379 // function declaration. 1380 static bool isFunctionDeclarationName(const FormatToken &Current) { 1381 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) 1382 return false; 1383 const FormatToken *Next = Current.Next; 1384 for (; Next; Next = Next->Next) { 1385 if (Next->is(TT_TemplateOpener)) { 1386 Next = Next->MatchingParen; 1387 } else if (Next->is(tok::coloncolon)) { 1388 Next = Next->Next; 1389 if (!Next || !Next->is(tok::identifier)) 1390 return false; 1391 } else if (Next->is(tok::l_paren)) { 1392 break; 1393 } else { 1394 return false; 1395 } 1396 } 1397 if (!Next) 1398 return false; 1399 assert(Next->is(tok::l_paren)); 1400 if (Next->Next == Next->MatchingParen) 1401 return true; 1402 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; 1403 Tok = Tok->Next) { 1404 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || 1405 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName)) 1406 return true; 1407 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) || 1408 Tok->Tok.isLiteral()) 1409 return false; 1410 } 1411 return false; 1412 } 1413 1414 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 1415 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1416 E = Line.Children.end(); 1417 I != E; ++I) { 1418 calculateFormattingInformation(**I); 1419 } 1420 1421 Line.First->TotalLength = 1422 Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; 1423 if (!Line.First->Next) 1424 return; 1425 FormatToken *Current = Line.First->Next; 1426 bool InFunctionDecl = Line.MightBeFunctionDecl; 1427 while (Current) { 1428 if (isFunctionDeclarationName(*Current)) 1429 Current->Type = TT_FunctionDeclarationName; 1430 if (Current->is(TT_LineComment)) { 1431 if (Current->Previous->BlockKind == BK_BracedInit && 1432 Current->Previous->opensScope()) 1433 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; 1434 else 1435 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 1436 1437 // If we find a trailing comment, iterate backwards to determine whether 1438 // it seems to relate to a specific parameter. If so, break before that 1439 // parameter to avoid changing the comment's meaning. E.g. don't move 'b' 1440 // to the previous line in: 1441 // SomeFunction(a, 1442 // b, // comment 1443 // c); 1444 if (!Current->HasUnescapedNewline) { 1445 for (FormatToken *Parameter = Current->Previous; Parameter; 1446 Parameter = Parameter->Previous) { 1447 if (Parameter->isOneOf(tok::comment, tok::r_brace)) 1448 break; 1449 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { 1450 if (!Parameter->Previous->is(TT_CtorInitializerComma) && 1451 Parameter->HasUnescapedNewline) 1452 Parameter->MustBreakBefore = true; 1453 break; 1454 } 1455 } 1456 } 1457 } else if (Current->SpacesRequiredBefore == 0 && 1458 spaceRequiredBefore(Line, *Current)) { 1459 Current->SpacesRequiredBefore = 1; 1460 } 1461 1462 Current->MustBreakBefore = 1463 Current->MustBreakBefore || mustBreakBefore(Line, *Current); 1464 1465 if (Style.AlwaysBreakAfterDefinitionReturnType && InFunctionDecl && 1466 Current->is(TT_FunctionDeclarationName) && 1467 !Line.Last->isOneOf(tok::semi, tok::comment)) // Only for definitions. 1468 // FIXME: Line.Last points to other characters than tok::semi 1469 // and tok::lbrace. 1470 Current->MustBreakBefore = true; 1471 1472 Current->CanBreakBefore = 1473 Current->MustBreakBefore || canBreakBefore(Line, *Current); 1474 unsigned ChildSize = 0; 1475 if (Current->Previous->Children.size() == 1) { 1476 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last; 1477 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit 1478 : LastOfChild.TotalLength + 1; 1479 } 1480 const FormatToken *Prev = Current->Previous; 1481 if (Current->MustBreakBefore || Prev->Children.size() > 1 || 1482 (Prev->Children.size() == 1 && 1483 Prev->Children[0]->First->MustBreakBefore) || 1484 Current->IsMultiline) 1485 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; 1486 else 1487 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + 1488 ChildSize + Current->SpacesRequiredBefore; 1489 1490 if (Current->is(TT_CtorInitializerColon)) 1491 InFunctionDecl = false; 1492 1493 // FIXME: Only calculate this if CanBreakBefore is true once static 1494 // initializers etc. are sorted out. 1495 // FIXME: Move magic numbers to a better place. 1496 Current->SplitPenalty = 20 * Current->BindingStrength + 1497 splitPenalty(Line, *Current, InFunctionDecl); 1498 1499 Current = Current->Next; 1500 } 1501 1502 calculateUnbreakableTailLengths(Line); 1503 for (Current = Line.First; Current != nullptr; Current = Current->Next) { 1504 if (Current->Role) 1505 Current->Role->precomputeFormattingInfos(Current); 1506 } 1507 1508 DEBUG({ printDebugInfo(Line); }); 1509 } 1510 1511 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { 1512 unsigned UnbreakableTailLength = 0; 1513 FormatToken *Current = Line.Last; 1514 while (Current) { 1515 Current->UnbreakableTailLength = UnbreakableTailLength; 1516 if (Current->CanBreakBefore || 1517 Current->isOneOf(tok::comment, tok::string_literal)) { 1518 UnbreakableTailLength = 0; 1519 } else { 1520 UnbreakableTailLength += 1521 Current->ColumnWidth + Current->SpacesRequiredBefore; 1522 } 1523 Current = Current->Previous; 1524 } 1525 } 1526 1527 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 1528 const FormatToken &Tok, 1529 bool InFunctionDecl) { 1530 const FormatToken &Left = *Tok.Previous; 1531 const FormatToken &Right = Tok; 1532 1533 if (Left.is(tok::semi)) 1534 return 0; 1535 1536 if (Style.Language == FormatStyle::LK_Java) { 1537 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws)) 1538 return 1; 1539 if (Right.is(Keywords.kw_implements)) 1540 return 2; 1541 if (Left.is(tok::comma) && Left.NestingLevel == 0) 1542 return 3; 1543 } else if (Style.Language == FormatStyle::LK_JavaScript) { 1544 if (Right.is(Keywords.kw_function)) 1545 return 100; 1546 } 1547 1548 if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && 1549 Right.Next->is(TT_DictLiteral))) 1550 return 1; 1551 if (Right.is(tok::l_square)) { 1552 if (Style.Language == FormatStyle::LK_Proto) 1553 return 1; 1554 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare)) 1555 return 500; 1556 } 1557 1558 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || 1559 Right.is(tok::kw_operator)) { 1560 if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 1561 return 3; 1562 if (Left.is(TT_StartOfName)) 1563 return 20; 1564 if (InFunctionDecl && Right.NestingLevel == 0) 1565 return Style.PenaltyReturnTypeOnItsOwnLine; 1566 return 200; 1567 } 1568 if (Right.is(TT_PointerOrReference)) 1569 return 190; 1570 if (Right.is(TT_TrailingReturnArrow)) 1571 return 110; 1572 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 1573 return 150; 1574 if (Left.is(TT_CastRParen)) 1575 return 100; 1576 if (Left.is(tok::coloncolon) || 1577 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) 1578 return 500; 1579 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 1580 return 5000; 1581 1582 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon)) 1583 return 2; 1584 1585 if (Right.isMemberAccess()) { 1586 if (Left.is(tok::r_paren) && Left.MatchingParen && 1587 Left.MatchingParen->ParameterCount > 0) 1588 return 20; // Should be smaller than breaking at a nested comma. 1589 return 150; 1590 } 1591 1592 if (Right.is(TT_TrailingAnnotation) && 1593 (!Right.Next || Right.Next->isNot(tok::l_paren))) { 1594 // Moving trailing annotations to the next line is fine for ObjC method 1595 // declarations. 1596 if (Line.First->is(TT_ObjCMethodSpecifier)) 1597 1598 return 10; 1599 // Generally, breaking before a trailing annotation is bad unless it is 1600 // function-like. It seems to be especially preferable to keep standard 1601 // annotations (i.e. "const", "final" and "override") on the same line. 1602 // Use a slightly higher penalty after ")" so that annotations like 1603 // "const override" are kept together. 1604 bool is_short_annotation = Right.TokenText.size() < 10; 1605 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); 1606 } 1607 1608 // In for-loops, prefer breaking at ',' and ';'. 1609 if (Line.First->is(tok::kw_for) && Left.is(tok::equal)) 1610 return 4; 1611 1612 // In Objective-C method expressions, prefer breaking before "param:" over 1613 // breaking after it. 1614 if (Right.is(TT_SelectorName)) 1615 return 0; 1616 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) 1617 return Line.MightBeFunctionDecl ? 50 : 500; 1618 1619 if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket) 1620 return 100; 1621 if (Left.is(tok::l_paren) && Left.Previous && Left.Previous->is(tok::kw_if)) 1622 return 1000; 1623 if (Left.is(tok::equal) && InFunctionDecl) 1624 return 110; 1625 if (Right.is(tok::r_brace)) 1626 return 1; 1627 if (Left.is(TT_TemplateOpener)) 1628 return 100; 1629 if (Left.opensScope()) { 1630 if (!Style.AlignAfterOpenBracket) 1631 return 0; 1632 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter 1633 : 19; 1634 } 1635 if (Left.is(TT_JavaAnnotation)) 1636 return 50; 1637 1638 if (Right.is(tok::lessless)) { 1639 if (Left.is(tok::string_literal)) { 1640 StringRef Content = Left.TokenText; 1641 if (Content.startswith("\"")) 1642 Content = Content.drop_front(1); 1643 if (Content.endswith("\"")) 1644 Content = Content.drop_back(1); 1645 Content = Content.trim(); 1646 if (Content.size() > 1 && 1647 (Content.back() == ':' || Content.back() == '=')) 1648 return 25; 1649 } 1650 return 1; // Breaking at a << is really cheap. 1651 } 1652 if (Left.is(TT_ConditionalExpr)) 1653 return prec::Conditional; 1654 prec::Level Level = Left.getPrecedence(); 1655 if (Level != prec::Unknown) 1656 return Level; 1657 1658 return 3; 1659 } 1660 1661 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 1662 const FormatToken &Left, 1663 const FormatToken &Right) { 1664 if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) 1665 return true; 1666 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && 1667 Left.Tok.getObjCKeywordID() == tok::objc_property) 1668 return true; 1669 if (Right.is(tok::hashhash)) 1670 return Left.is(tok::hash); 1671 if (Left.isOneOf(tok::hashhash, tok::hash)) 1672 return Right.is(tok::hash); 1673 if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) 1674 return Style.SpaceInEmptyParentheses; 1675 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) 1676 return (Right.is(TT_CastRParen) || 1677 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) 1678 ? Style.SpacesInCStyleCastParentheses 1679 : Style.SpacesInParentheses; 1680 if (Right.isOneOf(tok::semi, tok::comma)) 1681 return false; 1682 if (Right.is(tok::less) && 1683 (Left.is(tok::kw_template) || 1684 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1685 return true; 1686 if (Left.isOneOf(tok::exclaim, tok::tilde)) 1687 return false; 1688 if (Left.is(tok::at) && 1689 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 1690 tok::numeric_constant, tok::l_paren, tok::l_brace, 1691 tok::kw_true, tok::kw_false)) 1692 return false; 1693 if (Left.is(tok::coloncolon)) 1694 return false; 1695 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 1696 return false; 1697 if (Right.is(tok::ellipsis)) 1698 return Left.Tok.isLiteral(); 1699 if (Left.is(tok::l_square) && Right.is(tok::amp)) 1700 return false; 1701 if (Right.is(TT_PointerOrReference)) 1702 return !(Left.is(tok::r_paren) && Left.MatchingParen && 1703 (Left.MatchingParen->is(TT_OverloadedOperatorLParen) || 1704 (Left.MatchingParen->Previous && 1705 Left.MatchingParen->Previous->is( 1706 TT_FunctionDeclarationName)))) && 1707 (Left.Tok.isLiteral() || 1708 (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && 1709 (Style.PointerAlignment != FormatStyle::PAS_Left || 1710 Line.IsMultiVariableDeclStmt))); 1711 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && 1712 (!Left.is(TT_PointerOrReference) || 1713 (Style.PointerAlignment != FormatStyle::PAS_Right && 1714 !Line.IsMultiVariableDeclStmt))) 1715 return true; 1716 if (Left.is(TT_PointerOrReference)) 1717 return Right.Tok.isLiteral() || Right.is(TT_BlockComment) || 1718 (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, 1719 tok::l_paren) && 1720 (Style.PointerAlignment != FormatStyle::PAS_Right && 1721 !Line.IsMultiVariableDeclStmt) && 1722 Left.Previous && 1723 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); 1724 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1725 return false; 1726 if (Left.is(tok::l_square)) 1727 return (Left.is(TT_ArrayInitializerLSquare) && 1728 Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) || 1729 (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets && 1730 Right.isNot(tok::r_square)); 1731 if (Right.is(tok::r_square)) 1732 return Right.MatchingParen && 1733 ((Style.SpacesInContainerLiterals && 1734 Right.MatchingParen->is(TT_ArrayInitializerLSquare)) || 1735 (Style.SpacesInSquareBrackets && 1736 Right.MatchingParen->is(TT_ArraySubscriptLSquare))); 1737 if (Right.is(tok::l_square) && 1738 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) && 1739 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) 1740 return false; 1741 if (Left.is(tok::colon)) 1742 return !Left.is(TT_ObjCMethodExpr); 1743 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1744 return !Left.Children.empty(); // No spaces in "{}". 1745 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || 1746 (Right.is(tok::r_brace) && Right.MatchingParen && 1747 Right.MatchingParen->BlockKind != BK_Block)) 1748 return !Style.Cpp11BracedListStyle; 1749 if (Left.is(TT_BlockComment)) 1750 return !Left.TokenText.endswith("=*/"); 1751 if (Right.is(tok::l_paren)) { 1752 if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) 1753 return true; 1754 return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || 1755 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && 1756 (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, 1757 tok::kw_switch, tok::kw_case, TT_ForEachMacro) || 1758 (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch, 1759 tok::kw_new, tok::kw_delete) && 1760 (!Left.Previous || Left.Previous->isNot(tok::period))))) || 1761 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && 1762 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) && 1763 Line.Type != LT_PreprocessorDirective); 1764 } 1765 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1766 return false; 1767 if (Right.is(TT_UnaryOperator)) 1768 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && 1769 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); 1770 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, 1771 tok::r_paren) || 1772 Left.isSimpleTypeSpecifier()) && 1773 Right.is(tok::l_brace) && Right.getNextNonComment() && 1774 Right.BlockKind != BK_Block) 1775 return false; 1776 if (Left.is(tok::period) || Right.is(tok::period)) 1777 return false; 1778 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") 1779 return false; 1780 if (Left.is(TT_TemplateCloser) && Left.MatchingParen && 1781 Left.MatchingParen->Previous && 1782 Left.MatchingParen->Previous->is(tok::period)) 1783 // A.<B>DoSomething(); 1784 return false; 1785 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) 1786 return false; 1787 return true; 1788 } 1789 1790 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1791 const FormatToken &Right) { 1792 const FormatToken &Left = *Right.Previous; 1793 if (Style.Language == FormatStyle::LK_Proto) { 1794 if (Right.is(tok::period) && 1795 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, 1796 Keywords.kw_repeated)) 1797 return true; 1798 if (Right.is(tok::l_paren) && 1799 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) 1800 return true; 1801 } else if (Style.Language == FormatStyle::LK_JavaScript) { 1802 if (Left.is(Keywords.kw_var)) 1803 return true; 1804 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) 1805 return false; 1806 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) && 1807 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) 1808 return false; 1809 if (Left.is(TT_TemplateCloser) && 1810 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square, 1811 Keywords.kw_implements, Keywords.kw_extends)) 1812 // Type assertions ('<type>expr') are not followed by whitespace. Other 1813 // locations that should have whitespace following are identified by the 1814 // above set of follower tokens. 1815 return false; 1816 } else if (Style.Language == FormatStyle::LK_Java) { 1817 if (Left.is(tok::r_square) && Right.is(tok::l_brace)) 1818 return true; 1819 if (Left.is(TT_LambdaArrow) || Right.is(TT_LambdaArrow)) 1820 return true; 1821 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) 1822 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never; 1823 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, 1824 tok::kw_protected) || 1825 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract, 1826 Keywords.kw_native)) && 1827 Right.is(TT_TemplateOpener)) 1828 return true; 1829 } 1830 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) 1831 return true; // Never ever merge two identifiers. 1832 if (Left.is(TT_ImplicitStringLiteral)) 1833 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); 1834 if (Line.Type == LT_ObjCMethodDecl) { 1835 if (Left.is(TT_ObjCMethodSpecifier)) 1836 return true; 1837 if (Left.is(tok::r_paren) && Right.is(tok::identifier)) 1838 // Don't space between ')' and <id> 1839 return false; 1840 } 1841 if (Line.Type == LT_ObjCProperty && 1842 (Right.is(tok::equal) || Left.is(tok::equal))) 1843 return false; 1844 1845 if (Right.is(TT_TrailingReturnArrow) || Left.is(TT_TrailingReturnArrow)) 1846 return true; 1847 if (Left.is(tok::comma)) 1848 return true; 1849 if (Right.is(tok::comma)) 1850 return false; 1851 if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen)) 1852 return true; 1853 if (Left.is(tok::kw_operator)) 1854 return Right.is(tok::coloncolon); 1855 if (Right.is(TT_OverloadedOperatorLParen)) 1856 return false; 1857 if (Right.is(tok::colon)) 1858 return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && 1859 Right.getNextNonComment() && Right.isNot(TT_ObjCMethodExpr) && 1860 !Left.is(tok::question) && 1861 !(Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) && 1862 (Right.isNot(TT_DictLiteral) || Style.SpacesInContainerLiterals); 1863 if (Left.is(TT_UnaryOperator)) 1864 return Right.is(TT_BinaryOperator); 1865 if (Left.is(TT_CastRParen)) 1866 return Style.SpaceAfterCStyleCast || Right.is(TT_BinaryOperator); 1867 if (Left.is(tok::greater) && Right.is(tok::greater)) { 1868 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && 1869 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); 1870 } 1871 if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || 1872 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar)) 1873 return false; 1874 if (!Style.SpaceBeforeAssignmentOperators && 1875 Right.getPrecedence() == prec::Assignment) 1876 return false; 1877 if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) 1878 return (Left.is(TT_TemplateOpener) && 1879 Style.Standard == FormatStyle::LS_Cpp03) || 1880 !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren) || 1881 Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener)); 1882 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) 1883 return Style.SpacesInAngles; 1884 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || 1885 Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) 1886 return true; 1887 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) && 1888 Right.isNot(TT_FunctionTypeLParen)) 1889 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; 1890 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && 1891 Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) 1892 return false; 1893 if (Right.is(tok::less) && Left.isNot(tok::l_paren) && 1894 Line.First->is(tok::hash)) 1895 return true; 1896 if (Right.is(TT_TrailingUnaryOperator)) 1897 return false; 1898 if (Left.is(TT_RegexLiteral)) 1899 return false; 1900 return spaceRequiredBetween(Line, Left, Right); 1901 } 1902 1903 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. 1904 static bool isAllmanBrace(const FormatToken &Tok) { 1905 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && 1906 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral); 1907 } 1908 1909 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, 1910 const FormatToken &Right) { 1911 const FormatToken &Left = *Right.Previous; 1912 if (Right.NewlinesBefore > 1) 1913 return true; 1914 1915 // If the last token before a '}' is a comma or a trailing comment, the 1916 // intention is to insert a line break after it in order to make shuffling 1917 // around entries easier. 1918 const FormatToken *BeforeClosingBrace = nullptr; 1919 if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) && 1920 Left.BlockKind != BK_Block && Left.MatchingParen) 1921 BeforeClosingBrace = Left.MatchingParen->Previous; 1922 else if (Right.MatchingParen && 1923 Right.MatchingParen->isOneOf(tok::l_brace, 1924 TT_ArrayInitializerLSquare)) 1925 BeforeClosingBrace = &Left; 1926 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || 1927 BeforeClosingBrace->isTrailingComment())) 1928 return true; 1929 1930 if (Right.is(tok::comment)) 1931 return Left.BlockKind != BK_BracedInit && 1932 Left.isNot(TT_CtorInitializerColon) && 1933 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); 1934 if (Right.Previous->isTrailingComment() || 1935 (Right.isStringLiteral() && Right.Previous->isStringLiteral())) 1936 return true; 1937 if (Right.Previous->IsUnterminatedLiteral) 1938 return true; 1939 if (Right.is(tok::lessless) && Right.Next && 1940 Right.Previous->is(tok::string_literal) && 1941 Right.Next->is(tok::string_literal)) 1942 return true; 1943 if (Right.Previous->ClosesTemplateDeclaration && 1944 Right.Previous->MatchingParen && 1945 Right.Previous->MatchingParen->NestingLevel == 0 && 1946 Style.AlwaysBreakTemplateDeclarations) 1947 return true; 1948 if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) && 1949 Style.BreakConstructorInitializersBeforeComma && 1950 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 1951 return true; 1952 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) 1953 // Raw string literals are special wrt. line breaks. The author has made a 1954 // deliberate choice and might have aligned the contents of the string 1955 // literal accordingly. Thus, we try keep existing line breaks. 1956 return Right.NewlinesBefore > 0; 1957 if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && 1958 Style.Language == FormatStyle::LK_Proto) 1959 // Don't put enums onto single lines in protocol buffers. 1960 return true; 1961 if (Style.Language == FormatStyle::LK_JavaScript && Right.is(tok::r_brace) && 1962 Left.is(tok::l_brace) && !Left.Children.empty()) 1963 // Support AllowShortFunctionsOnASingleLine for JavaScript. 1964 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || 1965 (Left.NestingLevel == 0 && Line.Level == 0 && 1966 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline); 1967 if (isAllmanBrace(Left) || isAllmanBrace(Right)) 1968 return Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1969 Style.BreakBeforeBraces == FormatStyle::BS_GNU; 1970 if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) && 1971 Right.is(TT_SelectorName)) 1972 return true; 1973 if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) 1974 return true; 1975 1976 if ((Style.Language == FormatStyle::LK_Java || 1977 Style.Language == FormatStyle::LK_JavaScript) && 1978 Left.is(TT_LeadingJavaAnnotation) && 1979 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && 1980 Line.Last->is(tok::l_brace)) 1981 return true; 1982 1983 if (Style.Language == FormatStyle::LK_JavaScript) { 1984 // FIXME: This might apply to other languages and token kinds. 1985 if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous && 1986 Left.Previous->is(tok::char_constant)) 1987 return true; 1988 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && 1989 Left.NestingLevel == 0 && Left.Previous && 1990 Left.Previous->is(tok::equal) && 1991 Line.First->isOneOf(tok::identifier, Keywords.kw_import, 1992 tok::kw_export) && 1993 // kw_var is a pseudo-token that's a tok::identifier, so matches above. 1994 !Line.First->is(Keywords.kw_var)) 1995 // Enum style object literal. 1996 return true; 1997 } else if (Style.Language == FormatStyle::LK_Java) { 1998 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && 1999 Right.Next->is(tok::string_literal)) 2000 return true; 2001 } 2002 2003 return false; 2004 } 2005 2006 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 2007 const FormatToken &Right) { 2008 const FormatToken &Left = *Right.Previous; 2009 2010 if (Style.Language == FormatStyle::LK_Java) { 2011 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 2012 Keywords.kw_implements)) 2013 return false; 2014 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 2015 Keywords.kw_implements)) 2016 return true; 2017 } 2018 2019 if (Left.is(tok::at)) 2020 return false; 2021 if (Left.Tok.getObjCKeywordID() == tok::objc_interface) 2022 return false; 2023 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) 2024 return !Right.is(tok::l_paren); 2025 if (Right.is(TT_PointerOrReference)) 2026 return Line.IsMultiVariableDeclStmt || 2027 (Style.PointerAlignment == FormatStyle::PAS_Right && 2028 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); 2029 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || 2030 Right.is(tok::kw_operator)) 2031 return true; 2032 if (Left.is(TT_PointerOrReference)) 2033 return false; 2034 if (Right.isTrailingComment()) 2035 // We rely on MustBreakBefore being set correctly here as we should not 2036 // change the "binding" behavior of a comment. 2037 // The first comment in a braced lists is always interpreted as belonging to 2038 // the first list element. Otherwise, it should be placed outside of the 2039 // list. 2040 return Left.BlockKind == BK_BracedInit; 2041 if (Left.is(tok::question) && Right.is(tok::colon)) 2042 return false; 2043 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) 2044 return Style.BreakBeforeTernaryOperators; 2045 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question)) 2046 return !Style.BreakBeforeTernaryOperators; 2047 if (Right.is(TT_InheritanceColon)) 2048 return true; 2049 if (Right.is(tok::colon) && 2050 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) 2051 return false; 2052 if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))) 2053 return true; 2054 if (Right.is(TT_SelectorName)) 2055 return true; 2056 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) 2057 return true; 2058 if (Left.ClosesTemplateDeclaration) 2059 return true; 2060 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, 2061 TT_OverloadedOperator)) 2062 return false; 2063 if (Left.is(TT_RangeBasedForLoopColon)) 2064 return true; 2065 if (Right.is(TT_RangeBasedForLoopColon)) 2066 return false; 2067 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || 2068 Left.is(tok::kw_operator)) 2069 return false; 2070 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && 2071 Line.Type == LT_VirtualFunctionDecl) 2072 return false; 2073 if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) 2074 return false; 2075 if (Left.is(tok::l_paren) && Left.Previous && 2076 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) 2077 return false; 2078 if (Right.is(TT_ImplicitStringLiteral)) 2079 return false; 2080 2081 if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser)) 2082 return false; 2083 2084 // We only break before r_brace if there was a corresponding break before 2085 // the l_brace, which is tracked by BreakBeforeClosingBrace. 2086 if (Right.is(tok::r_brace)) 2087 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; 2088 2089 // Allow breaking after a trailing annotation, e.g. after a method 2090 // declaration. 2091 if (Left.is(TT_TrailingAnnotation)) 2092 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, 2093 tok::less, tok::coloncolon); 2094 2095 if (Right.is(tok::kw___attribute)) 2096 return true; 2097 2098 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 2099 return true; 2100 2101 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) 2102 return true; 2103 2104 if (Left.is(TT_CtorInitializerComma) && 2105 Style.BreakConstructorInitializersBeforeComma) 2106 return false; 2107 if (Right.is(TT_CtorInitializerComma) && 2108 Style.BreakConstructorInitializersBeforeComma) 2109 return true; 2110 if ((Left.is(tok::greater) && Right.is(tok::greater)) || 2111 (Left.is(tok::less) && Right.is(tok::less))) 2112 return false; 2113 if (Right.is(TT_BinaryOperator) && 2114 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && 2115 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || 2116 Right.getPrecedence() != prec::Assignment)) 2117 return true; 2118 if (Left.is(TT_ArrayInitializerLSquare)) 2119 return true; 2120 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) 2121 return true; 2122 if (Left.isBinaryOperator() && !Left.isOneOf(tok::arrowstar, tok::lessless) && 2123 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && 2124 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || 2125 Left.getPrecedence() == prec::Assignment)) 2126 return true; 2127 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 2128 tok::kw_class, tok::kw_struct) || 2129 Right.isMemberAccess() || 2130 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, 2131 tok::colon, tok::l_square, tok::at) || 2132 (Left.is(tok::r_paren) && 2133 Right.isOneOf(tok::identifier, tok::kw_const)) || 2134 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 2135 } 2136 2137 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 2138 llvm::errs() << "AnnotatedTokens:\n"; 2139 const FormatToken *Tok = Line.First; 2140 while (Tok) { 2141 llvm::errs() << " M=" << Tok->MustBreakBefore 2142 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type 2143 << " S=" << Tok->SpacesRequiredBefore 2144 << " B=" << Tok->BlockParameterCount 2145 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() 2146 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind 2147 << " FakeLParens="; 2148 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 2149 llvm::errs() << Tok->FakeLParens[i] << "/"; 2150 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; 2151 if (!Tok->Next) 2152 assert(Tok == Line.Last); 2153 Tok = Tok->Next; 2154 } 2155 llvm::errs() << "----\n"; 2156 } 2157 2158 } // namespace format 2159 } // namespace clang 2160