1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/Support/Debug.h" 20 21 #define DEBUG_TYPE "format-token-annotator" 22 23 namespace clang { 24 namespace format { 25 26 namespace { 27 28 /// \brief A parser that gathers additional information about tokens. 29 /// 30 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 31 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 32 /// into template parameter lists. 33 class AnnotatingParser { 34 public: 35 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, 36 const AdditionalKeywords &Keywords) 37 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), 38 Keywords(Keywords) { 39 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); 40 resetTokenMetadata(CurrentToken); 41 } 42 43 private: 44 bool parseAngle() { 45 if (!CurrentToken || !CurrentToken->Previous) 46 return false; 47 if (NonTemplateLess.count(CurrentToken->Previous)) 48 return false; 49 50 const FormatToken& Previous = *CurrentToken->Previous; 51 if (Previous.Previous) { 52 if (Previous.Previous->Tok.isLiteral()) 53 return false; 54 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 && 55 (!Previous.Previous->MatchingParen || 56 !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen))) 57 return false; 58 } 59 60 FormatToken *Left = CurrentToken->Previous; 61 Left->ParentBracket = Contexts.back().ContextKind; 62 ScopedContextCreator ContextCreator(*this, tok::less, 12); 63 64 // If this angle is in the context of an expression, we need to be more 65 // hesitant to detect it as opening template parameters. 66 bool InExprContext = Contexts.back().IsExpression; 67 68 Contexts.back().IsExpression = false; 69 // If there's a template keyword before the opening angle bracket, this is a 70 // template parameter, not an argument. 71 Contexts.back().InTemplateArgument = 72 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); 73 74 if (Style.Language == FormatStyle::LK_Java && 75 CurrentToken->is(tok::question)) 76 next(); 77 78 while (CurrentToken) { 79 if (CurrentToken->is(tok::greater)) { 80 Left->MatchingParen = CurrentToken; 81 CurrentToken->MatchingParen = Left; 82 CurrentToken->Type = TT_TemplateCloser; 83 next(); 84 return true; 85 } 86 if (CurrentToken->is(tok::question) && 87 Style.Language == FormatStyle::LK_Java) { 88 next(); 89 continue; 90 } 91 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || 92 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext)) 93 return false; 94 // If a && or || is found and interpreted as a binary operator, this set 95 // of angles is likely part of something like "a < b && c > d". If the 96 // angles are inside an expression, the ||/&& might also be a binary 97 // operator that was misinterpreted because we are parsing template 98 // parameters. 99 // FIXME: This is getting out of hand, write a decent parser. 100 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && 101 CurrentToken->Previous->is(TT_BinaryOperator) && 102 Contexts[Contexts.size() - 2].IsExpression && 103 !Line.startsWith(tok::kw_template)) 104 return false; 105 updateParameterCount(Left, CurrentToken); 106 if (!consumeToken()) 107 return false; 108 } 109 return false; 110 } 111 112 bool parseParens(bool LookForDecls = false) { 113 if (!CurrentToken) 114 return false; 115 FormatToken *Left = CurrentToken->Previous; 116 Left->ParentBracket = Contexts.back().ContextKind; 117 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 118 119 // FIXME: This is a bit of a hack. Do better. 120 Contexts.back().ColonIsForRangeExpr = 121 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 122 123 bool StartsObjCMethodExpr = false; 124 if (CurrentToken->is(tok::caret)) { 125 // (^ can start a block type. 126 Left->Type = TT_ObjCBlockLParen; 127 } else if (FormatToken *MaybeSel = Left->Previous) { 128 // @selector( starts a selector. 129 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && 130 MaybeSel->Previous->is(tok::at)) { 131 StartsObjCMethodExpr = true; 132 } 133 } 134 135 if (Left->is(TT_OverloadedOperatorLParen)) { 136 Contexts.back().IsExpression = false; 137 } else if (Style.Language == FormatStyle::LK_JavaScript && 138 Line.startsWith(Keywords.kw_type, tok::identifier)) { 139 // type X = (...); 140 Contexts.back().IsExpression = false; 141 } else if (Left->Previous && 142 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, 143 tok::kw_if, tok::kw_while, tok::l_paren, 144 tok::comma) || 145 Left->Previous->is(TT_BinaryOperator))) { 146 // static_assert, if and while usually contain expressions. 147 Contexts.back().IsExpression = true; 148 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && 149 (Left->Previous->is(Keywords.kw_function) || 150 (Left->Previous->endsSequence(tok::identifier, 151 Keywords.kw_function)))) { 152 // function(...) or function f(...) 153 Contexts.back().IsExpression = false; 154 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && 155 Left->Previous->is(TT_JsTypeColon)) { 156 // let x: (SomeType); 157 Contexts.back().IsExpression = false; 158 } else if (Left->Previous && Left->Previous->is(tok::r_square) && 159 Left->Previous->MatchingParen && 160 Left->Previous->MatchingParen->is(TT_LambdaLSquare)) { 161 // This is a parameter list of a lambda expression. 162 Contexts.back().IsExpression = false; 163 } else if (Line.InPPDirective && 164 (!Left->Previous || !Left->Previous->is(tok::identifier))) { 165 Contexts.back().IsExpression = true; 166 } else if (Contexts[Contexts.size() - 2].CaretFound) { 167 // This is the parameter list of an ObjC block. 168 Contexts.back().IsExpression = false; 169 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { 170 Left->Type = TT_AttributeParen; 171 } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { 172 // The first argument to a foreach macro is a declaration. 173 Contexts.back().IsForEachMacro = true; 174 Contexts.back().IsExpression = false; 175 } else if (Left->Previous && Left->Previous->MatchingParen && 176 Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) { 177 Contexts.back().IsExpression = false; 178 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) { 179 bool IsForOrCatch = 180 Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch); 181 Contexts.back().IsExpression = !IsForOrCatch; 182 } 183 184 if (StartsObjCMethodExpr) { 185 Contexts.back().ColonIsObjCMethodExpr = true; 186 Left->Type = TT_ObjCMethodExpr; 187 } 188 189 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression; 190 bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp); 191 bool HasMultipleLines = false; 192 bool HasMultipleParametersOnALine = false; 193 bool MightBeObjCForRangeLoop = 194 Left->Previous && Left->Previous->is(tok::kw_for); 195 while (CurrentToken) { 196 // LookForDecls is set when "if (" has been seen. Check for 197 // 'identifier' '*' 'identifier' followed by not '=' -- this 198 // '*' has to be a binary operator but determineStarAmpUsage() will 199 // categorize it as an unary operator, so set the right type here. 200 if (LookForDecls && CurrentToken->Next) { 201 FormatToken *Prev = CurrentToken->getPreviousNonComment(); 202 if (Prev) { 203 FormatToken *PrevPrev = Prev->getPreviousNonComment(); 204 FormatToken *Next = CurrentToken->Next; 205 if (PrevPrev && PrevPrev->is(tok::identifier) && 206 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && 207 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { 208 Prev->Type = TT_BinaryOperator; 209 LookForDecls = false; 210 } 211 } 212 } 213 214 if (CurrentToken->Previous->is(TT_PointerOrReference) && 215 CurrentToken->Previous->Previous->isOneOf(tok::l_paren, 216 tok::coloncolon)) 217 ProbablyFunctionType = true; 218 if (CurrentToken->is(tok::comma)) 219 MightBeFunctionType = false; 220 if (CurrentToken->Previous->is(TT_BinaryOperator)) 221 Contexts.back().IsExpression = true; 222 if (CurrentToken->is(tok::r_paren)) { 223 if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next && 224 (CurrentToken->Next->is(tok::l_paren) || 225 (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration))) 226 Left->Type = TT_FunctionTypeLParen; 227 Left->MatchingParen = CurrentToken; 228 CurrentToken->MatchingParen = Left; 229 230 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) && 231 Left->Previous && Left->Previous->is(tok::l_paren)) { 232 // Detect the case where macros are used to generate lambdas or 233 // function bodies, e.g.: 234 // auto my_lambda = MARCO((Type *type, int i) { .. body .. }); 235 for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) { 236 if (Tok->is(TT_BinaryOperator) && 237 Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) 238 Tok->Type = TT_PointerOrReference; 239 } 240 } 241 242 if (StartsObjCMethodExpr) { 243 CurrentToken->Type = TT_ObjCMethodExpr; 244 if (Contexts.back().FirstObjCSelectorName) { 245 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 246 Contexts.back().LongestObjCSelectorName; 247 } 248 } 249 250 if (Left->is(TT_AttributeParen)) 251 CurrentToken->Type = TT_AttributeParen; 252 if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) 253 CurrentToken->Type = TT_JavaAnnotation; 254 if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) 255 CurrentToken->Type = TT_LeadingJavaAnnotation; 256 257 if (!HasMultipleLines) 258 Left->PackingKind = PPK_Inconclusive; 259 else if (HasMultipleParametersOnALine) 260 Left->PackingKind = PPK_BinPacked; 261 else 262 Left->PackingKind = PPK_OnePerLine; 263 264 next(); 265 return true; 266 } 267 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 268 return false; 269 270 if (CurrentToken->is(tok::l_brace)) 271 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen 272 if (CurrentToken->is(tok::comma) && CurrentToken->Next && 273 !CurrentToken->Next->HasUnescapedNewline && 274 !CurrentToken->Next->isTrailingComment()) 275 HasMultipleParametersOnALine = true; 276 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || 277 CurrentToken->Previous->isSimpleTypeSpecifier()) && 278 !CurrentToken->is(tok::l_brace)) 279 Contexts.back().IsExpression = false; 280 if (CurrentToken->isOneOf(tok::semi, tok::colon)) 281 MightBeObjCForRangeLoop = false; 282 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) 283 CurrentToken->Type = TT_ObjCForIn; 284 // When we discover a 'new', we set CanBeExpression to 'false' in order to 285 // parse the type correctly. Reset that after a comma. 286 if (CurrentToken->is(tok::comma)) 287 Contexts.back().CanBeExpression = true; 288 289 FormatToken *Tok = CurrentToken; 290 if (!consumeToken()) 291 return false; 292 updateParameterCount(Left, Tok); 293 if (CurrentToken && CurrentToken->HasUnescapedNewline) 294 HasMultipleLines = true; 295 } 296 return false; 297 } 298 299 bool parseSquare() { 300 if (!CurrentToken) 301 return false; 302 303 // A '[' could be an index subscript (after an identifier or after 304 // ')' or ']'), it could be the start of an Objective-C method 305 // expression, or it could the start of an Objective-C array literal. 306 FormatToken *Left = CurrentToken->Previous; 307 Left->ParentBracket = Contexts.back().ContextKind; 308 FormatToken *Parent = Left->getPreviousNonComment(); 309 310 // Cases where '>' is followed by '['. 311 // In C++, this can happen either in array of templates (foo<int>[10]) 312 // or when array is a nested template type (unique_ptr<type1<type2>[]>). 313 bool CppArrayTemplates = 314 Style.Language == FormatStyle::LK_Cpp && Parent && 315 Parent->is(TT_TemplateCloser) && 316 (Contexts.back().CanBeExpression || Contexts.back().IsExpression || 317 Contexts.back().InTemplateArgument); 318 319 bool StartsObjCMethodExpr = 320 !CppArrayTemplates && Style.Language == FormatStyle::LK_Cpp && 321 Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && 322 CurrentToken->isNot(tok::l_brace) && 323 (!Parent || 324 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 325 tok::kw_return, tok::kw_throw) || 326 Parent->isUnaryOperator() || 327 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || 328 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); 329 bool ColonFound = false; 330 331 unsigned BindingIncrease = 1; 332 if (Left->is(TT_Unknown)) { 333 if (StartsObjCMethodExpr) { 334 Left->Type = TT_ObjCMethodExpr; 335 } else if (Style.Language == FormatStyle::LK_JavaScript && Parent && 336 Contexts.back().ContextKind == tok::l_brace && 337 Parent->isOneOf(tok::l_brace, tok::comma)) { 338 Left->Type = TT_JsComputedPropertyName; 339 } else if (Style.Language == FormatStyle::LK_Proto || 340 (!CppArrayTemplates && Parent && 341 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, 342 tok::comma, tok::l_paren, tok::l_square, 343 tok::question, tok::colon, tok::kw_return, 344 // Should only be relevant to JavaScript: 345 tok::kw_default))) { 346 Left->Type = TT_ArrayInitializerLSquare; 347 } else { 348 BindingIncrease = 10; 349 Left->Type = TT_ArraySubscriptLSquare; 350 } 351 } 352 353 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease); 354 Contexts.back().IsExpression = true; 355 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; 356 357 while (CurrentToken) { 358 if (CurrentToken->is(tok::r_square)) { 359 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && 360 Left->is(TT_ObjCMethodExpr)) { 361 // An ObjC method call is rarely followed by an open parenthesis. 362 // FIXME: Do we incorrectly label ":" with this? 363 StartsObjCMethodExpr = false; 364 Left->Type = TT_Unknown; 365 } 366 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { 367 CurrentToken->Type = TT_ObjCMethodExpr; 368 // determineStarAmpUsage() thinks that '*' '[' is allocating an 369 // array of pointers, but if '[' starts a selector then '*' is a 370 // binary operator. 371 if (Parent && Parent->is(TT_PointerOrReference)) 372 Parent->Type = TT_BinaryOperator; 373 } 374 Left->MatchingParen = CurrentToken; 375 CurrentToken->MatchingParen = Left; 376 if (Contexts.back().FirstObjCSelectorName) { 377 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 378 Contexts.back().LongestObjCSelectorName; 379 if (Left->BlockParameterCount > 1) 380 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; 381 } 382 next(); 383 return true; 384 } 385 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 386 return false; 387 if (CurrentToken->is(tok::colon)) { 388 if (Left->is(TT_ArraySubscriptLSquare)) { 389 Left->Type = TT_ObjCMethodExpr; 390 StartsObjCMethodExpr = true; 391 Contexts.back().ColonIsObjCMethodExpr = true; 392 if (Parent && Parent->is(tok::r_paren)) 393 Parent->Type = TT_CastRParen; 394 } 395 ColonFound = true; 396 } 397 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) && 398 !ColonFound) 399 Left->Type = TT_ArrayInitializerLSquare; 400 FormatToken *Tok = CurrentToken; 401 if (!consumeToken()) 402 return false; 403 updateParameterCount(Left, Tok); 404 } 405 return false; 406 } 407 408 bool parseBrace() { 409 if (CurrentToken) { 410 FormatToken *Left = CurrentToken->Previous; 411 Left->ParentBracket = Contexts.back().ContextKind; 412 413 if (Contexts.back().CaretFound) 414 Left->Type = TT_ObjCBlockLBrace; 415 Contexts.back().CaretFound = false; 416 417 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 418 Contexts.back().ColonIsDictLiteral = true; 419 if (Left->BlockKind == BK_BracedInit) 420 Contexts.back().IsExpression = true; 421 422 while (CurrentToken) { 423 if (CurrentToken->is(tok::r_brace)) { 424 Left->MatchingParen = CurrentToken; 425 CurrentToken->MatchingParen = Left; 426 next(); 427 return true; 428 } 429 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 430 return false; 431 updateParameterCount(Left, CurrentToken); 432 if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { 433 FormatToken *Previous = CurrentToken->getPreviousNonComment(); 434 if (((CurrentToken->is(tok::colon) && 435 (!Contexts.back().ColonIsDictLiteral || 436 Style.Language != FormatStyle::LK_Cpp)) || 437 Style.Language == FormatStyle::LK_Proto) && 438 (Previous->Tok.getIdentifierInfo() || 439 Previous->is(tok::string_literal))) 440 Previous->Type = TT_SelectorName; 441 if (CurrentToken->is(tok::colon) || 442 Style.Language == FormatStyle::LK_JavaScript) 443 Left->Type = TT_DictLiteral; 444 } 445 if (!consumeToken()) 446 return false; 447 } 448 } 449 return true; 450 } 451 452 void updateParameterCount(FormatToken *Left, FormatToken *Current) { 453 if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block) 454 ++Left->BlockParameterCount; 455 if (Current->is(tok::comma)) { 456 ++Left->ParameterCount; 457 if (!Left->Role) 458 Left->Role.reset(new CommaSeparatedList(Style)); 459 Left->Role->CommaFound(Current); 460 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { 461 Left->ParameterCount = 1; 462 } 463 } 464 465 bool parseConditional() { 466 while (CurrentToken) { 467 if (CurrentToken->is(tok::colon)) { 468 CurrentToken->Type = TT_ConditionalExpr; 469 next(); 470 return true; 471 } 472 if (!consumeToken()) 473 return false; 474 } 475 return false; 476 } 477 478 bool parseTemplateDeclaration() { 479 if (CurrentToken && CurrentToken->is(tok::less)) { 480 CurrentToken->Type = TT_TemplateOpener; 481 next(); 482 if (!parseAngle()) 483 return false; 484 if (CurrentToken) 485 CurrentToken->Previous->ClosesTemplateDeclaration = true; 486 return true; 487 } 488 return false; 489 } 490 491 bool consumeToken() { 492 FormatToken *Tok = CurrentToken; 493 next(); 494 switch (Tok->Tok.getKind()) { 495 case tok::plus: 496 case tok::minus: 497 if (!Tok->Previous && Line.MustBeDeclaration) 498 Tok->Type = TT_ObjCMethodSpecifier; 499 break; 500 case tok::colon: 501 if (!Tok->Previous) 502 return false; 503 // Colons from ?: are handled in parseConditional(). 504 if (Style.Language == FormatStyle::LK_JavaScript) { 505 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop 506 (Contexts.size() == 1 && // switch/case labels 507 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) || 508 Contexts.back().ContextKind == tok::l_paren || // function params 509 Contexts.back().ContextKind == tok::l_square || // array type 510 (Contexts.size() == 1 && 511 Line.MustBeDeclaration)) { // method/property declaration 512 Tok->Type = TT_JsTypeColon; 513 break; 514 } 515 } 516 if (Contexts.back().ColonIsDictLiteral || 517 Style.Language == FormatStyle::LK_Proto) { 518 Tok->Type = TT_DictLiteral; 519 } else if (Contexts.back().ColonIsObjCMethodExpr || 520 Line.startsWith(TT_ObjCMethodSpecifier)) { 521 Tok->Type = TT_ObjCMethodExpr; 522 const FormatToken *BeforePrevious = Tok->Previous->Previous; 523 if (!BeforePrevious || 524 !(BeforePrevious->is(TT_CastRParen) || 525 (BeforePrevious->is(TT_ObjCMethodExpr) && 526 BeforePrevious->is(tok::colon))) || 527 BeforePrevious->is(tok::r_square) || 528 Contexts.back().LongestObjCSelectorName == 0) { 529 Tok->Previous->Type = TT_SelectorName; 530 if (Tok->Previous->ColumnWidth > 531 Contexts.back().LongestObjCSelectorName) 532 Contexts.back().LongestObjCSelectorName = 533 Tok->Previous->ColumnWidth; 534 if (!Contexts.back().FirstObjCSelectorName) 535 Contexts.back().FirstObjCSelectorName = Tok->Previous; 536 } 537 } else if (Contexts.back().ColonIsForRangeExpr) { 538 Tok->Type = TT_RangeBasedForLoopColon; 539 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { 540 Tok->Type = TT_BitFieldColon; 541 } else if (Contexts.size() == 1 && 542 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { 543 if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren, 544 tok::kw_noexcept)) 545 Tok->Type = TT_CtorInitializerColon; 546 else 547 Tok->Type = TT_InheritanceColon; 548 } else if (Tok->Previous->is(tok::identifier) && Tok->Next && 549 Tok->Next->isOneOf(tok::r_paren, tok::comma)) { 550 // This handles a special macro in ObjC code where selectors including 551 // the colon are passed as macro arguments. 552 Tok->Type = TT_ObjCMethodExpr; 553 } else if (Contexts.back().ContextKind == tok::l_paren) { 554 Tok->Type = TT_InlineASMColon; 555 } 556 break; 557 case tok::pipe: 558 case tok::amp: 559 // | and & in declarations/type expressions represent union and 560 // intersection types, respectively. 561 if (Style.Language == FormatStyle::LK_JavaScript && 562 !Contexts.back().IsExpression) 563 Tok->Type = TT_JsTypeOperator; 564 break; 565 case tok::kw_if: 566 case tok::kw_while: 567 if (CurrentToken && CurrentToken->is(tok::l_paren)) { 568 next(); 569 if (!parseParens(/*LookForDecls=*/true)) 570 return false; 571 } 572 break; 573 case tok::kw_for: 574 if (Style.Language == FormatStyle::LK_JavaScript && Tok->Previous && 575 Tok->Previous->is(tok::period)) 576 break; 577 Contexts.back().ColonIsForRangeExpr = true; 578 next(); 579 if (!parseParens()) 580 return false; 581 break; 582 case tok::l_paren: 583 // When faced with 'operator()()', the kw_operator handler incorrectly 584 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make 585 // the first two parens OverloadedOperators and the second l_paren an 586 // OverloadedOperatorLParen. 587 if (Tok->Previous && 588 Tok->Previous->is(tok::r_paren) && 589 Tok->Previous->MatchingParen && 590 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) { 591 Tok->Previous->Type = TT_OverloadedOperator; 592 Tok->Previous->MatchingParen->Type = TT_OverloadedOperator; 593 Tok->Type = TT_OverloadedOperatorLParen; 594 } 595 596 if (!parseParens()) 597 return false; 598 if (Line.MustBeDeclaration && Contexts.size() == 1 && 599 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) && 600 (!Tok->Previous || 601 !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute, 602 TT_LeadingJavaAnnotation))) 603 Line.MightBeFunctionDecl = true; 604 break; 605 case tok::l_square: 606 if (!parseSquare()) 607 return false; 608 break; 609 case tok::l_brace: 610 if (!parseBrace()) 611 return false; 612 break; 613 case tok::less: 614 if (parseAngle()) { 615 Tok->Type = TT_TemplateOpener; 616 } else { 617 Tok->Type = TT_BinaryOperator; 618 NonTemplateLess.insert(Tok); 619 CurrentToken = Tok; 620 next(); 621 } 622 break; 623 case tok::r_paren: 624 case tok::r_square: 625 return false; 626 case tok::r_brace: 627 // Lines can start with '}'. 628 if (Tok->Previous) 629 return false; 630 break; 631 case tok::greater: 632 Tok->Type = TT_BinaryOperator; 633 break; 634 case tok::kw_operator: 635 while (CurrentToken && 636 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { 637 if (CurrentToken->isOneOf(tok::star, tok::amp)) 638 CurrentToken->Type = TT_PointerOrReference; 639 consumeToken(); 640 if (CurrentToken && 641 CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma)) 642 CurrentToken->Previous->Type = TT_OverloadedOperator; 643 } 644 if (CurrentToken) { 645 CurrentToken->Type = TT_OverloadedOperatorLParen; 646 if (CurrentToken->Previous->is(TT_BinaryOperator)) 647 CurrentToken->Previous->Type = TT_OverloadedOperator; 648 } 649 break; 650 case tok::question: 651 if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next && 652 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren, 653 tok::r_brace)) { 654 // Question marks before semicolons, colons, etc. indicate optional 655 // types (fields, parameters), e.g. 656 // function(x?: string, y?) {...} 657 // class X { y?; } 658 Tok->Type = TT_JsTypeOptionalQuestion; 659 break; 660 } 661 // Declarations cannot be conditional expressions, this can only be part 662 // of a type declaration. 663 if (Line.MustBeDeclaration && !Contexts.back().IsExpression && 664 Style.Language == FormatStyle::LK_JavaScript) 665 break; 666 parseConditional(); 667 break; 668 case tok::kw_template: 669 parseTemplateDeclaration(); 670 break; 671 case tok::comma: 672 if (Contexts.back().InCtorInitializer) 673 Tok->Type = TT_CtorInitializerComma; 674 else if (Contexts.back().FirstStartOfName && 675 (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) { 676 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 677 Line.IsMultiVariableDeclStmt = true; 678 } 679 if (Contexts.back().IsForEachMacro) 680 Contexts.back().IsExpression = true; 681 break; 682 default: 683 break; 684 } 685 return true; 686 } 687 688 void parseIncludeDirective() { 689 if (CurrentToken && CurrentToken->is(tok::less)) { 690 next(); 691 while (CurrentToken) { 692 if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) 693 CurrentToken->Type = TT_ImplicitStringLiteral; 694 next(); 695 } 696 } 697 } 698 699 void parseWarningOrError() { 700 next(); 701 // We still want to format the whitespace left of the first token of the 702 // warning or error. 703 next(); 704 while (CurrentToken) { 705 CurrentToken->Type = TT_ImplicitStringLiteral; 706 next(); 707 } 708 } 709 710 void parsePragma() { 711 next(); // Consume "pragma". 712 if (CurrentToken && 713 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) { 714 bool IsMark = CurrentToken->is(Keywords.kw_mark); 715 next(); // Consume "mark". 716 next(); // Consume first token (so we fix leading whitespace). 717 while (CurrentToken) { 718 if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) 719 CurrentToken->Type = TT_ImplicitStringLiteral; 720 next(); 721 } 722 } 723 } 724 725 LineType parsePreprocessorDirective() { 726 bool IsFirstToken = CurrentToken->IsFirst; 727 LineType Type = LT_PreprocessorDirective; 728 next(); 729 if (!CurrentToken) 730 return Type; 731 732 if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) { 733 // JavaScript files can contain shebang lines of the form: 734 // #!/usr/bin/env node 735 // Treat these like C++ #include directives. 736 while (CurrentToken) { 737 // Tokens cannot be comments here. 738 CurrentToken->Type = TT_ImplicitStringLiteral; 739 next(); 740 } 741 return LT_ImportStatement; 742 } 743 744 if (CurrentToken->Tok.is(tok::numeric_constant)) { 745 CurrentToken->SpacesRequiredBefore = 1; 746 return Type; 747 } 748 // Hashes in the middle of a line can lead to any strange token 749 // sequence. 750 if (!CurrentToken->Tok.getIdentifierInfo()) 751 return Type; 752 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { 753 case tok::pp_include: 754 case tok::pp_include_next: 755 case tok::pp_import: 756 next(); 757 parseIncludeDirective(); 758 Type = LT_ImportStatement; 759 break; 760 case tok::pp_error: 761 case tok::pp_warning: 762 parseWarningOrError(); 763 break; 764 case tok::pp_pragma: 765 parsePragma(); 766 break; 767 case tok::pp_if: 768 case tok::pp_elif: 769 Contexts.back().IsExpression = true; 770 parseLine(); 771 break; 772 default: 773 break; 774 } 775 while (CurrentToken) 776 next(); 777 return Type; 778 } 779 780 public: 781 LineType parseLine() { 782 NonTemplateLess.clear(); 783 if (CurrentToken->is(tok::hash)) 784 return parsePreprocessorDirective(); 785 786 // Directly allow to 'import <string-literal>' to support protocol buffer 787 // definitions (code.google.com/p/protobuf) or missing "#" (either way we 788 // should not break the line). 789 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); 790 if ((Style.Language == FormatStyle::LK_Java && 791 CurrentToken->is(Keywords.kw_package)) || 792 (Info && Info->getPPKeywordID() == tok::pp_import && 793 CurrentToken->Next && 794 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier, 795 tok::kw_static))) { 796 next(); 797 parseIncludeDirective(); 798 return LT_ImportStatement; 799 } 800 801 // If this line starts and ends in '<' and '>', respectively, it is likely 802 // part of "#define <a/b.h>". 803 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { 804 parseIncludeDirective(); 805 return LT_ImportStatement; 806 } 807 808 // In .proto files, top-level options are very similar to import statements 809 // and should not be line-wrapped. 810 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 && 811 CurrentToken->is(Keywords.kw_option)) { 812 next(); 813 if (CurrentToken && CurrentToken->is(tok::identifier)) 814 return LT_ImportStatement; 815 } 816 817 bool KeywordVirtualFound = false; 818 bool ImportStatement = false; 819 820 // import {...} from '...'; 821 if (Style.Language == FormatStyle::LK_JavaScript && 822 CurrentToken->is(Keywords.kw_import)) 823 ImportStatement = true; 824 825 while (CurrentToken) { 826 if (CurrentToken->is(tok::kw_virtual)) 827 KeywordVirtualFound = true; 828 if (Style.Language == FormatStyle::LK_JavaScript) { 829 // export {...} from '...'; 830 // An export followed by "from 'some string';" is a re-export from 831 // another module identified by a URI and is treated as a 832 // LT_ImportStatement (i.e. prevent wraps on it for long URIs). 833 // Just "export {...};" or "export class ..." should not be treated as 834 // an import in this sense. 835 if (Line.First->is(tok::kw_export) && 836 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next && 837 CurrentToken->Next->isStringLiteral()) 838 ImportStatement = true; 839 if (isClosureImportStatement(*CurrentToken)) 840 ImportStatement = true; 841 } 842 if (!consumeToken()) 843 return LT_Invalid; 844 } 845 if (KeywordVirtualFound) 846 return LT_VirtualFunctionDecl; 847 if (ImportStatement) 848 return LT_ImportStatement; 849 850 if (Line.startsWith(TT_ObjCMethodSpecifier)) { 851 if (Contexts.back().FirstObjCSelectorName) 852 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 853 Contexts.back().LongestObjCSelectorName; 854 return LT_ObjCMethodDecl; 855 } 856 857 return LT_Other; 858 } 859 860 private: 861 bool isClosureImportStatement(const FormatToken &Tok) { 862 // FIXME: Closure-library specific stuff should not be hard-coded but be 863 // configurable. 864 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && 865 Tok.Next->Next && (Tok.Next->Next->TokenText == "module" || 866 Tok.Next->Next->TokenText == "provide" || 867 Tok.Next->Next->TokenText == "require" || 868 Tok.Next->Next->TokenText == "setTestOnly" || 869 Tok.Next->Next->TokenText == "forwardDeclare") && 870 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); 871 } 872 873 void resetTokenMetadata(FormatToken *Token) { 874 if (!Token) 875 return; 876 877 // Reset token type in case we have already looked at it and then 878 // recovered from an error (e.g. failure to find the matching >). 879 if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, 880 TT_FunctionLBrace, TT_ImplicitStringLiteral, 881 TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, 882 TT_OverloadedOperator, TT_RegexLiteral, 883 TT_TemplateString)) 884 CurrentToken->Type = TT_Unknown; 885 CurrentToken->Role.reset(); 886 CurrentToken->MatchingParen = nullptr; 887 CurrentToken->FakeLParens.clear(); 888 CurrentToken->FakeRParens = 0; 889 } 890 891 void next() { 892 if (CurrentToken) { 893 CurrentToken->NestingLevel = Contexts.size() - 1; 894 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 895 modifyContext(*CurrentToken); 896 determineTokenType(*CurrentToken); 897 CurrentToken = CurrentToken->Next; 898 } 899 900 resetTokenMetadata(CurrentToken); 901 } 902 903 /// \brief A struct to hold information valid in a specific context, e.g. 904 /// a pair of parenthesis. 905 struct Context { 906 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 907 bool IsExpression) 908 : ContextKind(ContextKind), BindingStrength(BindingStrength), 909 IsExpression(IsExpression) {} 910 911 tok::TokenKind ContextKind; 912 unsigned BindingStrength; 913 bool IsExpression; 914 unsigned LongestObjCSelectorName = 0; 915 bool ColonIsForRangeExpr = false; 916 bool ColonIsDictLiteral = false; 917 bool ColonIsObjCMethodExpr = false; 918 FormatToken *FirstObjCSelectorName = nullptr; 919 FormatToken *FirstStartOfName = nullptr; 920 bool CanBeExpression = true; 921 bool InTemplateArgument = false; 922 bool InCtorInitializer = false; 923 bool CaretFound = false; 924 bool IsForEachMacro = false; 925 }; 926 927 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 928 /// of each instance. 929 struct ScopedContextCreator { 930 AnnotatingParser &P; 931 932 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 933 unsigned Increase) 934 : P(P) { 935 P.Contexts.push_back(Context(ContextKind, 936 P.Contexts.back().BindingStrength + Increase, 937 P.Contexts.back().IsExpression)); 938 } 939 940 ~ScopedContextCreator() { P.Contexts.pop_back(); } 941 }; 942 943 void modifyContext(const FormatToken &Current) { 944 if (Current.getPrecedence() == prec::Assignment && 945 !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) && 946 // Type aliases use `type X = ...;` in TypeScript. 947 !(Style.Language == FormatStyle::LK_JavaScript && 948 Line.startsWith(Keywords.kw_type, tok::identifier)) && 949 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { 950 Contexts.back().IsExpression = true; 951 if (!Line.startsWith(TT_UnaryOperator)) { 952 for (FormatToken *Previous = Current.Previous; 953 Previous && Previous->Previous && 954 !Previous->Previous->isOneOf(tok::comma, tok::semi); 955 Previous = Previous->Previous) { 956 if (Previous->isOneOf(tok::r_square, tok::r_paren)) { 957 Previous = Previous->MatchingParen; 958 if (!Previous) 959 break; 960 } 961 if (Previous->opensScope()) 962 break; 963 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && 964 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && 965 Previous->Previous && Previous->Previous->isNot(tok::equal)) 966 Previous->Type = TT_PointerOrReference; 967 } 968 } 969 } else if (Current.is(tok::lessless) && 970 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) { 971 Contexts.back().IsExpression = true; 972 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { 973 Contexts.back().IsExpression = true; 974 } else if (Current.is(TT_TrailingReturnArrow)) { 975 Contexts.back().IsExpression = false; 976 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) { 977 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java; 978 } else if (Current.Previous && 979 Current.Previous->is(TT_CtorInitializerColon)) { 980 Contexts.back().IsExpression = true; 981 Contexts.back().InCtorInitializer = true; 982 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 983 for (FormatToken *Previous = Current.Previous; 984 Previous && Previous->isOneOf(tok::star, tok::amp); 985 Previous = Previous->Previous) 986 Previous->Type = TT_PointerOrReference; 987 if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer) 988 Contexts.back().IsExpression = false; 989 } else if (Current.is(tok::kw_new)) { 990 Contexts.back().CanBeExpression = false; 991 } else if (Current.isOneOf(tok::semi, tok::exclaim)) { 992 // This should be the condition or increment in a for-loop. 993 Contexts.back().IsExpression = true; 994 } 995 } 996 997 void determineTokenType(FormatToken &Current) { 998 if (!Current.is(TT_Unknown)) 999 // The token type is already known. 1000 return; 1001 1002 // Line.MightBeFunctionDecl can only be true after the parentheses of a 1003 // function declaration have been found. In this case, 'Current' is a 1004 // trailing token of this declaration and thus cannot be a name. 1005 if (Current.is(Keywords.kw_instanceof)) { 1006 Current.Type = TT_BinaryOperator; 1007 } else if (isStartOfName(Current) && 1008 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { 1009 Contexts.back().FirstStartOfName = &Current; 1010 Current.Type = TT_StartOfName; 1011 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) { 1012 AutoFound = true; 1013 } else if (Current.is(tok::arrow) && 1014 Style.Language == FormatStyle::LK_Java) { 1015 Current.Type = TT_LambdaArrow; 1016 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && 1017 Current.NestingLevel == 0) { 1018 Current.Type = TT_TrailingReturnArrow; 1019 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 1020 Current.Type = 1021 determineStarAmpUsage(Current, Contexts.back().CanBeExpression && 1022 Contexts.back().IsExpression, 1023 Contexts.back().InTemplateArgument); 1024 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 1025 Current.Type = determinePlusMinusCaretUsage(Current); 1026 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) 1027 Contexts.back().CaretFound = true; 1028 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 1029 Current.Type = determineIncrementUsage(Current); 1030 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { 1031 Current.Type = TT_UnaryOperator; 1032 } else if (Current.is(tok::question)) { 1033 if (Style.Language == FormatStyle::LK_JavaScript && 1034 Line.MustBeDeclaration && !Contexts.back().IsExpression) { 1035 // In JavaScript, `interface X { foo?(): bar; }` is an optional method 1036 // on the interface, not a ternary expression. 1037 Current.Type = TT_JsTypeOptionalQuestion; 1038 } else { 1039 Current.Type = TT_ConditionalExpr; 1040 } 1041 } else if (Current.isBinaryOperator() && 1042 (!Current.Previous || Current.Previous->isNot(tok::l_square))) { 1043 Current.Type = TT_BinaryOperator; 1044 } else if (Current.is(tok::comment)) { 1045 if (Current.TokenText.startswith("/*")) { 1046 if (Current.TokenText.endswith("*/")) 1047 Current.Type = TT_BlockComment; 1048 else 1049 // The lexer has for some reason determined a comment here. But we 1050 // cannot really handle it, if it isn't properly terminated. 1051 Current.Tok.setKind(tok::unknown); 1052 } else { 1053 Current.Type = TT_LineComment; 1054 } 1055 } else if (Current.is(tok::r_paren)) { 1056 if (rParenEndsCast(Current)) 1057 Current.Type = TT_CastRParen; 1058 if (Current.MatchingParen && Current.Next && 1059 !Current.Next->isBinaryOperator() && 1060 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, 1061 tok::period, tok::arrow, tok::coloncolon)) 1062 if (FormatToken *AfterParen = Current.MatchingParen->Next) { 1063 // Make sure this isn't the return type of an Obj-C block declaration 1064 if (AfterParen->Tok.isNot(tok::caret)) { 1065 if (FormatToken *BeforeParen = Current.MatchingParen->Previous) 1066 if (BeforeParen->is(tok::identifier) && 1067 BeforeParen->TokenText == BeforeParen->TokenText.upper() && 1068 (!BeforeParen->Previous || 1069 BeforeParen->Previous->ClosesTemplateDeclaration)) 1070 Current.Type = TT_FunctionAnnotationRParen; 1071 } 1072 } 1073 } else if (Current.is(tok::at) && Current.Next) { 1074 if (Current.Next->isStringLiteral()) { 1075 Current.Type = TT_ObjCStringLiteral; 1076 } else { 1077 switch (Current.Next->Tok.getObjCKeywordID()) { 1078 case tok::objc_interface: 1079 case tok::objc_implementation: 1080 case tok::objc_protocol: 1081 Current.Type = TT_ObjCDecl; 1082 break; 1083 case tok::objc_property: 1084 Current.Type = TT_ObjCProperty; 1085 break; 1086 default: 1087 break; 1088 } 1089 } 1090 } else if (Current.is(tok::period)) { 1091 FormatToken *PreviousNoComment = Current.getPreviousNonComment(); 1092 if (PreviousNoComment && 1093 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) 1094 Current.Type = TT_DesignatedInitializerPeriod; 1095 else if (Style.Language == FormatStyle::LK_Java && Current.Previous && 1096 Current.Previous->isOneOf(TT_JavaAnnotation, 1097 TT_LeadingJavaAnnotation)) { 1098 Current.Type = Current.Previous->Type; 1099 } 1100 } else if (Current.isOneOf(tok::identifier, tok::kw_const) && 1101 Current.Previous && 1102 !Current.Previous->isOneOf(tok::equal, tok::at) && 1103 Line.MightBeFunctionDecl && Contexts.size() == 1) { 1104 // Line.MightBeFunctionDecl can only be true after the parentheses of a 1105 // function declaration have been found. 1106 Current.Type = TT_TrailingAnnotation; 1107 } else if ((Style.Language == FormatStyle::LK_Java || 1108 Style.Language == FormatStyle::LK_JavaScript) && 1109 Current.Previous) { 1110 if (Current.Previous->is(tok::at) && 1111 Current.isNot(Keywords.kw_interface)) { 1112 const FormatToken &AtToken = *Current.Previous; 1113 const FormatToken *Previous = AtToken.getPreviousNonComment(); 1114 if (!Previous || Previous->is(TT_LeadingJavaAnnotation)) 1115 Current.Type = TT_LeadingJavaAnnotation; 1116 else 1117 Current.Type = TT_JavaAnnotation; 1118 } else if (Current.Previous->is(tok::period) && 1119 Current.Previous->isOneOf(TT_JavaAnnotation, 1120 TT_LeadingJavaAnnotation)) { 1121 Current.Type = Current.Previous->Type; 1122 } 1123 } 1124 } 1125 1126 /// \brief Take a guess at whether \p Tok starts a name of a function or 1127 /// variable declaration. 1128 /// 1129 /// This is a heuristic based on whether \p Tok is an identifier following 1130 /// something that is likely a type. 1131 bool isStartOfName(const FormatToken &Tok) { 1132 if (Tok.isNot(tok::identifier) || !Tok.Previous) 1133 return false; 1134 1135 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof)) 1136 return false; 1137 if (Style.Language == FormatStyle::LK_JavaScript && 1138 Tok.Previous->is(Keywords.kw_in)) 1139 return false; 1140 1141 // Skip "const" as it does not have an influence on whether this is a name. 1142 FormatToken *PreviousNotConst = Tok.Previous; 1143 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) 1144 PreviousNotConst = PreviousNotConst->Previous; 1145 1146 if (!PreviousNotConst) 1147 return false; 1148 1149 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && 1150 PreviousNotConst->Previous && 1151 PreviousNotConst->Previous->is(tok::hash); 1152 1153 if (PreviousNotConst->is(TT_TemplateCloser)) 1154 return PreviousNotConst && PreviousNotConst->MatchingParen && 1155 PreviousNotConst->MatchingParen->Previous && 1156 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && 1157 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); 1158 1159 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && 1160 PreviousNotConst->MatchingParen->Previous && 1161 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) 1162 return true; 1163 1164 return (!IsPPKeyword && 1165 PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) || 1166 PreviousNotConst->is(TT_PointerOrReference) || 1167 PreviousNotConst->isSimpleTypeSpecifier(); 1168 } 1169 1170 /// \brief Determine whether ')' is ending a cast. 1171 bool rParenEndsCast(const FormatToken &Tok) { 1172 // C-style casts are only used in C++ and Java. 1173 if (Style.Language != FormatStyle::LK_Cpp && 1174 Style.Language != FormatStyle::LK_Java) 1175 return false; 1176 1177 // Empty parens aren't casts and there are no casts at the end of the line. 1178 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen) 1179 return false; 1180 1181 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); 1182 if (LeftOfParens) { 1183 // If there is a closing parenthesis left of the current parentheses, 1184 // look past it as these might be chained casts. 1185 if (LeftOfParens->is(tok::r_paren)) { 1186 if (!LeftOfParens->MatchingParen || 1187 !LeftOfParens->MatchingParen->Previous) 1188 return false; 1189 LeftOfParens = LeftOfParens->MatchingParen->Previous; 1190 } 1191 1192 // If there is an identifier (or with a few exceptions a keyword) right 1193 // before the parentheses, this is unlikely to be a cast. 1194 if (LeftOfParens->Tok.getIdentifierInfo() && 1195 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case, 1196 tok::kw_delete)) 1197 return false; 1198 1199 // Certain other tokens right before the parentheses are also signals that 1200 // this cannot be a cast. 1201 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator, 1202 TT_TemplateCloser, tok::ellipsis)) 1203 return false; 1204 } 1205 1206 if (Tok.Next->is(tok::question)) 1207 return false; 1208 1209 // As Java has no function types, a "(" after the ")" likely means that this 1210 // is a cast. 1211 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) 1212 return true; 1213 1214 // If a (non-string) literal follows, this is likely a cast. 1215 if (Tok.Next->isNot(tok::string_literal) && 1216 (Tok.Next->Tok.isLiteral() || 1217 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) 1218 return true; 1219 1220 // Heuristically try to determine whether the parentheses contain a type. 1221 bool ParensAreType = 1222 !Tok.Previous || 1223 Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) || 1224 Tok.Previous->isSimpleTypeSpecifier(); 1225 bool ParensCouldEndDecl = 1226 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); 1227 if (ParensAreType && !ParensCouldEndDecl) 1228 return true; 1229 1230 // At this point, we heuristically assume that there are no casts at the 1231 // start of the line. We assume that we have found most cases where there 1232 // are by the logic above, e.g. "(void)x;". 1233 if (!LeftOfParens) 1234 return false; 1235 1236 // Certain token types inside the parentheses mean that this can't be a 1237 // cast. 1238 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok; 1239 Token = Token->Next) 1240 if (Token->is(TT_BinaryOperator)) 1241 return false; 1242 1243 // If the following token is an identifier or 'this', this is a cast. All 1244 // cases where this can be something else are handled above. 1245 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) 1246 return true; 1247 1248 if (!Tok.Next->Next) 1249 return false; 1250 1251 // If the next token after the parenthesis is a unary operator, assume 1252 // that this is cast, unless there are unexpected tokens inside the 1253 // parenthesis. 1254 bool NextIsUnary = 1255 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star); 1256 if (!NextIsUnary || Tok.Next->is(tok::plus) || 1257 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) 1258 return false; 1259 // Search for unexpected tokens. 1260 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen; 1261 Prev = Prev->Previous) { 1262 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) 1263 return false; 1264 } 1265 return true; 1266 } 1267 1268 /// \brief Return the type of the given token assuming it is * or &. 1269 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, 1270 bool InTemplateArgument) { 1271 if (Style.Language == FormatStyle::LK_JavaScript) 1272 return TT_BinaryOperator; 1273 1274 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1275 if (!PrevToken) 1276 return TT_UnaryOperator; 1277 1278 const FormatToken *NextToken = Tok.getNextNonComment(); 1279 if (!NextToken || 1280 NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal, 1281 Keywords.kw_override) || 1282 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) 1283 return TT_PointerOrReference; 1284 1285 if (PrevToken->is(tok::coloncolon)) 1286 return TT_PointerOrReference; 1287 1288 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 1289 tok::comma, tok::semi, tok::kw_return, tok::colon, 1290 tok::equal, tok::kw_delete, tok::kw_sizeof) || 1291 PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, 1292 TT_UnaryOperator, TT_CastRParen)) 1293 return TT_UnaryOperator; 1294 1295 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) 1296 return TT_PointerOrReference; 1297 if (NextToken->is(tok::kw_operator) && !IsExpression) 1298 return TT_PointerOrReference; 1299 if (NextToken->isOneOf(tok::comma, tok::semi)) 1300 return TT_PointerOrReference; 1301 1302 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && 1303 PrevToken->MatchingParen->Previous && 1304 PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof, 1305 tok::kw_decltype)) 1306 return TT_PointerOrReference; 1307 1308 if (PrevToken->Tok.isLiteral() || 1309 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, 1310 tok::kw_false, tok::r_brace) || 1311 NextToken->Tok.isLiteral() || 1312 NextToken->isOneOf(tok::kw_true, tok::kw_false) || 1313 NextToken->isUnaryOperator() || 1314 // If we know we're in a template argument, there are no named 1315 // declarations. Thus, having an identifier on the right-hand side 1316 // indicates a binary operator. 1317 (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) 1318 return TT_BinaryOperator; 1319 1320 // "&&(" is quite unlikely to be two successive unary "&". 1321 if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren)) 1322 return TT_BinaryOperator; 1323 1324 // This catches some cases where evaluation order is used as control flow: 1325 // aaa && aaa->f(); 1326 const FormatToken *NextNextToken = NextToken->getNextNonComment(); 1327 if (NextNextToken && NextNextToken->is(tok::arrow)) 1328 return TT_BinaryOperator; 1329 1330 // It is very unlikely that we are going to find a pointer or reference type 1331 // definition on the RHS of an assignment. 1332 if (IsExpression && !Contexts.back().CaretFound) 1333 return TT_BinaryOperator; 1334 1335 return TT_PointerOrReference; 1336 } 1337 1338 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { 1339 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1340 if (!PrevToken) 1341 return TT_UnaryOperator; 1342 1343 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) && 1344 !PrevToken->is(tok::exclaim)) 1345 // There aren't any trailing unary operators except for TypeScript's 1346 // non-null operator (!). Thus, this must be squence of leading operators. 1347 return TT_UnaryOperator; 1348 1349 // Use heuristics to recognize unary operators. 1350 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 1351 tok::question, tok::colon, tok::kw_return, 1352 tok::kw_case, tok::at, tok::l_brace)) 1353 return TT_UnaryOperator; 1354 1355 // There can't be two consecutive binary operators. 1356 if (PrevToken->is(TT_BinaryOperator)) 1357 return TT_UnaryOperator; 1358 1359 // Fall back to marking the token as binary operator. 1360 return TT_BinaryOperator; 1361 } 1362 1363 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1364 TokenType determineIncrementUsage(const FormatToken &Tok) { 1365 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1366 if (!PrevToken || PrevToken->is(TT_CastRParen)) 1367 return TT_UnaryOperator; 1368 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 1369 return TT_TrailingUnaryOperator; 1370 1371 return TT_UnaryOperator; 1372 } 1373 1374 SmallVector<Context, 8> Contexts; 1375 1376 const FormatStyle &Style; 1377 AnnotatedLine &Line; 1378 FormatToken *CurrentToken; 1379 bool AutoFound; 1380 const AdditionalKeywords &Keywords; 1381 1382 // Set of "<" tokens that do not open a template parameter list. If parseAngle 1383 // determines that a specific token can't be a template opener, it will make 1384 // same decision irrespective of the decisions for tokens leading up to it. 1385 // Store this information to prevent this from causing exponential runtime. 1386 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess; 1387 }; 1388 1389 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1; 1390 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 1391 1392 /// \brief Parses binary expressions by inserting fake parenthesis based on 1393 /// operator precedence. 1394 class ExpressionParser { 1395 public: 1396 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, 1397 AnnotatedLine &Line) 1398 : Style(Style), Keywords(Keywords), Current(Line.First) {} 1399 1400 /// \brief Parse expressions with the given operatore precedence. 1401 void parse(int Precedence = 0) { 1402 // Skip 'return' and ObjC selector colons as they are not part of a binary 1403 // expression. 1404 while (Current && (Current->is(tok::kw_return) || 1405 (Current->is(tok::colon) && 1406 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) 1407 next(); 1408 1409 if (!Current || Precedence > PrecedenceArrowAndPeriod) 1410 return; 1411 1412 // Conditional expressions need to be parsed separately for proper nesting. 1413 if (Precedence == prec::Conditional) { 1414 parseConditionalExpr(); 1415 return; 1416 } 1417 1418 // Parse unary operators, which all have a higher precedence than binary 1419 // operators. 1420 if (Precedence == PrecedenceUnaryOperator) { 1421 parseUnaryOperator(); 1422 return; 1423 } 1424 1425 FormatToken *Start = Current; 1426 FormatToken *LatestOperator = nullptr; 1427 unsigned OperatorIndex = 0; 1428 1429 while (Current) { 1430 // Consume operators with higher precedence. 1431 parse(Precedence + 1); 1432 1433 int CurrentPrecedence = getCurrentPrecedence(); 1434 1435 if (Current && Current->is(TT_SelectorName) && 1436 Precedence == CurrentPrecedence) { 1437 if (LatestOperator) 1438 addFakeParenthesis(Start, prec::Level(Precedence)); 1439 Start = Current; 1440 } 1441 1442 // At the end of the line or when an operator with higher precedence is 1443 // found, insert fake parenthesis and return. 1444 if (!Current || (Current->closesScope() && Current->MatchingParen) || 1445 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || 1446 (CurrentPrecedence == prec::Conditional && 1447 Precedence == prec::Assignment && Current->is(tok::colon))) { 1448 break; 1449 } 1450 1451 // Consume scopes: (), [], <> and {} 1452 if (Current->opensScope()) { 1453 while (Current && !Current->closesScope()) { 1454 next(); 1455 parse(); 1456 } 1457 next(); 1458 } else { 1459 // Operator found. 1460 if (CurrentPrecedence == Precedence) { 1461 if (LatestOperator) 1462 LatestOperator->NextOperator = Current; 1463 LatestOperator = Current; 1464 Current->OperatorIndex = OperatorIndex; 1465 ++OperatorIndex; 1466 } 1467 next(/*SkipPastLeadingComments=*/Precedence > 0); 1468 } 1469 } 1470 1471 if (LatestOperator && (Current || Precedence > 0)) { 1472 // LatestOperator->LastOperator = true; 1473 if (Precedence == PrecedenceArrowAndPeriod) { 1474 // Call expressions don't have a binary operator precedence. 1475 addFakeParenthesis(Start, prec::Unknown); 1476 } else { 1477 addFakeParenthesis(Start, prec::Level(Precedence)); 1478 } 1479 } 1480 } 1481 1482 private: 1483 /// \brief Gets the precedence (+1) of the given token for binary operators 1484 /// and other tokens that we treat like binary operators. 1485 int getCurrentPrecedence() { 1486 if (Current) { 1487 const FormatToken *NextNonComment = Current->getNextNonComment(); 1488 if (Current->is(TT_ConditionalExpr)) 1489 return prec::Conditional; 1490 if (NextNonComment && NextNonComment->is(tok::colon) && 1491 NextNonComment->is(TT_DictLiteral)) 1492 return prec::Comma; 1493 if (Current->is(TT_LambdaArrow)) 1494 return prec::Comma; 1495 if (Current->is(TT_JsFatArrow)) 1496 return prec::Assignment; 1497 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName, 1498 TT_JsComputedPropertyName) || 1499 (Current->is(tok::comment) && NextNonComment && 1500 NextNonComment->is(TT_SelectorName))) 1501 return 0; 1502 if (Current->is(TT_RangeBasedForLoopColon)) 1503 return prec::Comma; 1504 if ((Style.Language == FormatStyle::LK_Java || 1505 Style.Language == FormatStyle::LK_JavaScript) && 1506 Current->is(Keywords.kw_instanceof)) 1507 return prec::Relational; 1508 if (Style.Language == FormatStyle::LK_JavaScript && 1509 Current->is(Keywords.kw_in)) 1510 return prec::Relational; 1511 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) 1512 return Current->getPrecedence(); 1513 if (Current->isOneOf(tok::period, tok::arrow)) 1514 return PrecedenceArrowAndPeriod; 1515 if ((Style.Language == FormatStyle::LK_Java || 1516 Style.Language == FormatStyle::LK_JavaScript) && 1517 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, 1518 Keywords.kw_throws)) 1519 return 0; 1520 } 1521 return -1; 1522 } 1523 1524 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { 1525 Start->FakeLParens.push_back(Precedence); 1526 if (Precedence > prec::Unknown) 1527 Start->StartsBinaryExpression = true; 1528 if (Current) { 1529 FormatToken *Previous = Current->Previous; 1530 while (Previous->is(tok::comment) && Previous->Previous) 1531 Previous = Previous->Previous; 1532 ++Previous->FakeRParens; 1533 if (Precedence > prec::Unknown) 1534 Previous->EndsBinaryExpression = true; 1535 } 1536 } 1537 1538 /// \brief Parse unary operator expressions and surround them with fake 1539 /// parentheses if appropriate. 1540 void parseUnaryOperator() { 1541 if (!Current || Current->isNot(TT_UnaryOperator)) { 1542 parse(PrecedenceArrowAndPeriod); 1543 return; 1544 } 1545 1546 FormatToken *Start = Current; 1547 next(); 1548 parseUnaryOperator(); 1549 1550 // The actual precedence doesn't matter. 1551 addFakeParenthesis(Start, prec::Unknown); 1552 } 1553 1554 void parseConditionalExpr() { 1555 while (Current && Current->isTrailingComment()) { 1556 next(); 1557 } 1558 FormatToken *Start = Current; 1559 parse(prec::LogicalOr); 1560 if (!Current || !Current->is(tok::question)) 1561 return; 1562 next(); 1563 parse(prec::Assignment); 1564 if (!Current || Current->isNot(TT_ConditionalExpr)) 1565 return; 1566 next(); 1567 parse(prec::Assignment); 1568 addFakeParenthesis(Start, prec::Conditional); 1569 } 1570 1571 void next(bool SkipPastLeadingComments = true) { 1572 if (Current) 1573 Current = Current->Next; 1574 while (Current && 1575 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && 1576 Current->isTrailingComment()) 1577 Current = Current->Next; 1578 } 1579 1580 const FormatStyle &Style; 1581 const AdditionalKeywords &Keywords; 1582 FormatToken *Current; 1583 }; 1584 1585 } // end anonymous namespace 1586 1587 void TokenAnnotator::setCommentLineLevels( 1588 SmallVectorImpl<AnnotatedLine *> &Lines) { 1589 const AnnotatedLine *NextNonCommentLine = nullptr; 1590 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), 1591 E = Lines.rend(); 1592 I != E; ++I) { 1593 if (NextNonCommentLine && (*I)->First->is(tok::comment) && 1594 (*I)->First->Next == nullptr) 1595 (*I)->Level = NextNonCommentLine->Level; 1596 else 1597 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; 1598 1599 setCommentLineLevels((*I)->Children); 1600 } 1601 } 1602 1603 static unsigned maxNestingDepth(const AnnotatedLine &Line) { 1604 unsigned Result = 0; 1605 for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next) 1606 Result = std::max(Result, Tok->NestingLevel); 1607 return Result; 1608 } 1609 1610 void TokenAnnotator::annotate(AnnotatedLine &Line) { 1611 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1612 E = Line.Children.end(); 1613 I != E; ++I) { 1614 annotate(**I); 1615 } 1616 AnnotatingParser Parser(Style, Line, Keywords); 1617 Line.Type = Parser.parseLine(); 1618 1619 // With very deep nesting, ExpressionParser uses lots of stack and the 1620 // formatting algorithm is very slow. We're not going to do a good job here 1621 // anyway - it's probably generated code being formatted by mistake. 1622 // Just skip the whole line. 1623 if (maxNestingDepth(Line) > 50) 1624 Line.Type = LT_Invalid; 1625 1626 if (Line.Type == LT_Invalid) 1627 return; 1628 1629 ExpressionParser ExprParser(Style, Keywords, Line); 1630 ExprParser.parse(); 1631 1632 if (Line.startsWith(TT_ObjCMethodSpecifier)) 1633 Line.Type = LT_ObjCMethodDecl; 1634 else if (Line.startsWith(TT_ObjCDecl)) 1635 Line.Type = LT_ObjCDecl; 1636 else if (Line.startsWith(TT_ObjCProperty)) 1637 Line.Type = LT_ObjCProperty; 1638 1639 Line.First->SpacesRequiredBefore = 1; 1640 Line.First->CanBreakBefore = Line.First->MustBreakBefore; 1641 } 1642 1643 // This function heuristically determines whether 'Current' starts the name of a 1644 // function declaration. 1645 static bool isFunctionDeclarationName(const FormatToken &Current, 1646 const AnnotatedLine &Line) { 1647 auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* { 1648 for (; Next; Next = Next->Next) { 1649 if (Next->is(TT_OverloadedOperatorLParen)) 1650 return Next; 1651 if (Next->is(TT_OverloadedOperator)) 1652 continue; 1653 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) { 1654 // For 'new[]' and 'delete[]'. 1655 if (Next->Next && Next->Next->is(tok::l_square) && 1656 Next->Next->Next && Next->Next->Next->is(tok::r_square)) 1657 Next = Next->Next->Next; 1658 continue; 1659 } 1660 1661 break; 1662 } 1663 return nullptr; 1664 }; 1665 1666 // Find parentheses of parameter list. 1667 const FormatToken *Next = Current.Next; 1668 if (Current.is(tok::kw_operator)) { 1669 if (Current.Previous && Current.Previous->is(tok::coloncolon)) 1670 return false; 1671 Next = skipOperatorName(Next); 1672 } else { 1673 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) 1674 return false; 1675 for (; Next; Next = Next->Next) { 1676 if (Next->is(TT_TemplateOpener)) { 1677 Next = Next->MatchingParen; 1678 } else if (Next->is(tok::coloncolon)) { 1679 Next = Next->Next; 1680 if (!Next) 1681 return false; 1682 if (Next->is(tok::kw_operator)) { 1683 Next = skipOperatorName(Next->Next); 1684 break; 1685 } 1686 if (!Next->is(tok::identifier)) 1687 return false; 1688 } else if (Next->is(tok::l_paren)) { 1689 break; 1690 } else { 1691 return false; 1692 } 1693 } 1694 } 1695 1696 // Check whether parameter list can be long to a function declaration. 1697 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen) 1698 return false; 1699 // If the lines ends with "{", this is likely an function definition. 1700 if (Line.Last->is(tok::l_brace)) 1701 return true; 1702 if (Next->Next == Next->MatchingParen) 1703 return true; // Empty parentheses. 1704 // If there is an &/&& after the r_paren, this is likely a function. 1705 if (Next->MatchingParen->Next && 1706 Next->MatchingParen->Next->is(TT_PointerOrReference)) 1707 return true; 1708 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; 1709 Tok = Tok->Next) { 1710 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || 1711 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) 1712 return true; 1713 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) || 1714 Tok->Tok.isLiteral()) 1715 return false; 1716 } 1717 return false; 1718 } 1719 1720 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const { 1721 assert(Line.MightBeFunctionDecl); 1722 1723 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel || 1724 Style.AlwaysBreakAfterReturnType == 1725 FormatStyle::RTBS_TopLevelDefinitions) && 1726 Line.Level > 0) 1727 return false; 1728 1729 switch (Style.AlwaysBreakAfterReturnType) { 1730 case FormatStyle::RTBS_None: 1731 return false; 1732 case FormatStyle::RTBS_All: 1733 case FormatStyle::RTBS_TopLevel: 1734 return true; 1735 case FormatStyle::RTBS_AllDefinitions: 1736 case FormatStyle::RTBS_TopLevelDefinitions: 1737 return Line.mightBeFunctionDefinition(); 1738 } 1739 1740 return false; 1741 } 1742 1743 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 1744 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1745 E = Line.Children.end(); 1746 I != E; ++I) { 1747 calculateFormattingInformation(**I); 1748 } 1749 1750 Line.First->TotalLength = 1751 Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; 1752 if (!Line.First->Next) 1753 return; 1754 FormatToken *Current = Line.First->Next; 1755 bool InFunctionDecl = Line.MightBeFunctionDecl; 1756 while (Current) { 1757 if (isFunctionDeclarationName(*Current, Line)) 1758 Current->Type = TT_FunctionDeclarationName; 1759 if (Current->is(TT_LineComment)) { 1760 if (Current->Previous->BlockKind == BK_BracedInit && 1761 Current->Previous->opensScope()) 1762 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; 1763 else 1764 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 1765 1766 // If we find a trailing comment, iterate backwards to determine whether 1767 // it seems to relate to a specific parameter. If so, break before that 1768 // parameter to avoid changing the comment's meaning. E.g. don't move 'b' 1769 // to the previous line in: 1770 // SomeFunction(a, 1771 // b, // comment 1772 // c); 1773 if (!Current->HasUnescapedNewline) { 1774 for (FormatToken *Parameter = Current->Previous; Parameter; 1775 Parameter = Parameter->Previous) { 1776 if (Parameter->isOneOf(tok::comment, tok::r_brace)) 1777 break; 1778 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { 1779 if (!Parameter->Previous->is(TT_CtorInitializerComma) && 1780 Parameter->HasUnescapedNewline) 1781 Parameter->MustBreakBefore = true; 1782 break; 1783 } 1784 } 1785 } 1786 } else if (Current->SpacesRequiredBefore == 0 && 1787 spaceRequiredBefore(Line, *Current)) { 1788 Current->SpacesRequiredBefore = 1; 1789 } 1790 1791 Current->MustBreakBefore = 1792 Current->MustBreakBefore || mustBreakBefore(Line, *Current); 1793 1794 if (!Current->MustBreakBefore && InFunctionDecl && 1795 Current->is(TT_FunctionDeclarationName)) 1796 Current->MustBreakBefore = mustBreakForReturnType(Line); 1797 1798 Current->CanBreakBefore = 1799 Current->MustBreakBefore || canBreakBefore(Line, *Current); 1800 unsigned ChildSize = 0; 1801 if (Current->Previous->Children.size() == 1) { 1802 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last; 1803 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit 1804 : LastOfChild.TotalLength + 1; 1805 } 1806 const FormatToken *Prev = Current->Previous; 1807 if (Current->MustBreakBefore || Prev->Children.size() > 1 || 1808 (Prev->Children.size() == 1 && 1809 Prev->Children[0]->First->MustBreakBefore) || 1810 Current->IsMultiline) 1811 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; 1812 else 1813 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + 1814 ChildSize + Current->SpacesRequiredBefore; 1815 1816 if (Current->is(TT_CtorInitializerColon)) 1817 InFunctionDecl = false; 1818 1819 // FIXME: Only calculate this if CanBreakBefore is true once static 1820 // initializers etc. are sorted out. 1821 // FIXME: Move magic numbers to a better place. 1822 Current->SplitPenalty = 20 * Current->BindingStrength + 1823 splitPenalty(Line, *Current, InFunctionDecl); 1824 1825 Current = Current->Next; 1826 } 1827 1828 calculateUnbreakableTailLengths(Line); 1829 for (Current = Line.First; Current != nullptr; Current = Current->Next) { 1830 if (Current->Role) 1831 Current->Role->precomputeFormattingInfos(Current); 1832 } 1833 1834 DEBUG({ printDebugInfo(Line); }); 1835 } 1836 1837 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { 1838 unsigned UnbreakableTailLength = 0; 1839 FormatToken *Current = Line.Last; 1840 while (Current) { 1841 Current->UnbreakableTailLength = UnbreakableTailLength; 1842 if (Current->CanBreakBefore || 1843 Current->isOneOf(tok::comment, tok::string_literal)) { 1844 UnbreakableTailLength = 0; 1845 } else { 1846 UnbreakableTailLength += 1847 Current->ColumnWidth + Current->SpacesRequiredBefore; 1848 } 1849 Current = Current->Previous; 1850 } 1851 } 1852 1853 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 1854 const FormatToken &Tok, 1855 bool InFunctionDecl) { 1856 const FormatToken &Left = *Tok.Previous; 1857 const FormatToken &Right = Tok; 1858 1859 if (Left.is(tok::semi)) 1860 return 0; 1861 1862 if (Style.Language == FormatStyle::LK_Java) { 1863 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws)) 1864 return 1; 1865 if (Right.is(Keywords.kw_implements)) 1866 return 2; 1867 if (Left.is(tok::comma) && Left.NestingLevel == 0) 1868 return 3; 1869 } else if (Style.Language == FormatStyle::LK_JavaScript) { 1870 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma)) 1871 return 100; 1872 if (Left.is(TT_JsTypeColon)) 1873 return 35; 1874 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || 1875 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) 1876 return 100; 1877 } 1878 1879 if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && 1880 Right.Next->is(TT_DictLiteral))) 1881 return 1; 1882 if (Right.is(tok::l_square)) { 1883 if (Style.Language == FormatStyle::LK_Proto) 1884 return 1; 1885 if (Left.is(tok::r_square)) 1886 return 200; 1887 // Slightly prefer formatting local lambda definitions like functions. 1888 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal)) 1889 return 35; 1890 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, 1891 TT_ArrayInitializerLSquare)) 1892 return 500; 1893 } 1894 1895 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || 1896 Right.is(tok::kw_operator)) { 1897 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 1898 return 3; 1899 if (Left.is(TT_StartOfName)) 1900 return 110; 1901 if (InFunctionDecl && Right.NestingLevel == 0) 1902 return Style.PenaltyReturnTypeOnItsOwnLine; 1903 return 200; 1904 } 1905 if (Right.is(TT_PointerOrReference)) 1906 return 190; 1907 if (Right.is(TT_LambdaArrow)) 1908 return 110; 1909 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 1910 return 150; 1911 if (Left.is(TT_CastRParen)) 1912 return 100; 1913 if (Left.is(tok::coloncolon) || 1914 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) 1915 return 500; 1916 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 1917 return 5000; 1918 if (Left.is(tok::comment)) 1919 return 1000; 1920 1921 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon)) 1922 return 2; 1923 1924 if (Right.isMemberAccess()) { 1925 // Breaking before the "./->" of a chained call/member access is reasonably 1926 // cheap, as formatting those with one call per line is generally 1927 // desirable. In particular, it should be cheaper to break before the call 1928 // than it is to break inside a call's parameters, which could lead to weird 1929 // "hanging" indents. The exception is the very last "./->" to support this 1930 // frequent pattern: 1931 // 1932 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc( 1933 // dddddddd); 1934 // 1935 // which might otherwise be blown up onto many lines. Here, clang-format 1936 // won't produce "hanging" indents anyway as there is no other trailing 1937 // call. 1938 // 1939 // Also apply higher penalty is not a call as that might lead to a wrapping 1940 // like: 1941 // 1942 // aaaaaaa 1943 // .aaaaaaaaa.bbbbbbbb(cccccccc); 1944 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope() 1945 ? 150 1946 : 35; 1947 } 1948 1949 if (Right.is(TT_TrailingAnnotation) && 1950 (!Right.Next || Right.Next->isNot(tok::l_paren))) { 1951 // Moving trailing annotations to the next line is fine for ObjC method 1952 // declarations. 1953 if (Line.startsWith(TT_ObjCMethodSpecifier)) 1954 return 10; 1955 // Generally, breaking before a trailing annotation is bad unless it is 1956 // function-like. It seems to be especially preferable to keep standard 1957 // annotations (i.e. "const", "final" and "override") on the same line. 1958 // Use a slightly higher penalty after ")" so that annotations like 1959 // "const override" are kept together. 1960 bool is_short_annotation = Right.TokenText.size() < 10; 1961 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); 1962 } 1963 1964 // In for-loops, prefer breaking at ',' and ';'. 1965 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal)) 1966 return 4; 1967 1968 // In Objective-C method expressions, prefer breaking before "param:" over 1969 // breaking after it. 1970 if (Right.is(TT_SelectorName)) 1971 return 0; 1972 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) 1973 return Line.MightBeFunctionDecl ? 50 : 500; 1974 1975 if (Left.is(tok::l_paren) && InFunctionDecl && 1976 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) 1977 return 100; 1978 if (Left.is(tok::l_paren) && Left.Previous && 1979 Left.Previous->isOneOf(tok::kw_if, tok::kw_for)) 1980 return 1000; 1981 if (Left.is(tok::equal) && InFunctionDecl) 1982 return 110; 1983 if (Right.is(tok::r_brace)) 1984 return 1; 1985 if (Left.is(TT_TemplateOpener)) 1986 return 100; 1987 if (Left.opensScope()) { 1988 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) 1989 return 0; 1990 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter 1991 : 19; 1992 } 1993 if (Left.is(TT_JavaAnnotation)) 1994 return 50; 1995 1996 if (Right.is(tok::lessless)) { 1997 if (Left.is(tok::string_literal) && 1998 (Right.NextOperator || Right.OperatorIndex != 1)) { 1999 StringRef Content = Left.TokenText; 2000 if (Content.startswith("\"")) 2001 Content = Content.drop_front(1); 2002 if (Content.endswith("\"")) 2003 Content = Content.drop_back(1); 2004 Content = Content.trim(); 2005 if (Content.size() > 1 && 2006 (Content.back() == ':' || Content.back() == '=')) 2007 return 25; 2008 } 2009 return 1; // Breaking at a << is really cheap. 2010 } 2011 if (Left.is(TT_ConditionalExpr)) 2012 return prec::Conditional; 2013 prec::Level Level = Left.getPrecedence(); 2014 if (Level != prec::Unknown) 2015 return Level; 2016 Level = Right.getPrecedence(); 2017 if (Level != prec::Unknown) 2018 return Level; 2019 2020 return 3; 2021 } 2022 2023 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 2024 const FormatToken &Left, 2025 const FormatToken &Right) { 2026 if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) 2027 return true; 2028 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && 2029 Left.Tok.getObjCKeywordID() == tok::objc_property) 2030 return true; 2031 if (Right.is(tok::hashhash)) 2032 return Left.is(tok::hash); 2033 if (Left.isOneOf(tok::hashhash, tok::hash)) 2034 return Right.is(tok::hash); 2035 if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) 2036 return Style.SpaceInEmptyParentheses; 2037 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) 2038 return (Right.is(TT_CastRParen) || 2039 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) 2040 ? Style.SpacesInCStyleCastParentheses 2041 : Style.SpacesInParentheses; 2042 if (Right.isOneOf(tok::semi, tok::comma)) 2043 return false; 2044 if (Right.is(tok::less) && 2045 Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList) 2046 return true; 2047 if (Right.is(tok::less) && Left.is(tok::kw_template)) 2048 return Style.SpaceAfterTemplateKeyword; 2049 if (Left.isOneOf(tok::exclaim, tok::tilde)) 2050 return false; 2051 if (Left.is(tok::at) && 2052 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 2053 tok::numeric_constant, tok::l_paren, tok::l_brace, 2054 tok::kw_true, tok::kw_false)) 2055 return false; 2056 if (Left.is(tok::colon)) 2057 return !Left.is(TT_ObjCMethodExpr); 2058 if (Left.is(tok::coloncolon)) 2059 return false; 2060 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 2061 return false; 2062 if (Right.is(tok::ellipsis)) 2063 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous && 2064 Left.Previous->is(tok::kw_case)); 2065 if (Left.is(tok::l_square) && Right.is(tok::amp)) 2066 return false; 2067 if (Right.is(TT_PointerOrReference)) 2068 return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) || 2069 (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous && 2070 Left.Previous->is(tok::r_paren)) || 2071 (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && 2072 (Style.PointerAlignment != FormatStyle::PAS_Left || 2073 (Line.IsMultiVariableDeclStmt && 2074 (Left.NestingLevel == 0 || 2075 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); 2076 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && 2077 (!Left.is(TT_PointerOrReference) || 2078 (Style.PointerAlignment != FormatStyle::PAS_Right && 2079 !Line.IsMultiVariableDeclStmt))) 2080 return true; 2081 if (Left.is(TT_PointerOrReference)) 2082 return Right.Tok.isLiteral() || 2083 Right.isOneOf(TT_BlockComment, Keywords.kw_final, 2084 Keywords.kw_override) || 2085 (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) || 2086 (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, 2087 tok::l_paren) && 2088 (Style.PointerAlignment != FormatStyle::PAS_Right && 2089 !Line.IsMultiVariableDeclStmt) && 2090 Left.Previous && 2091 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); 2092 if (Right.is(tok::star) && Left.is(tok::l_paren)) 2093 return false; 2094 if (Left.is(tok::l_square)) 2095 return (Left.is(TT_ArrayInitializerLSquare) && 2096 Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) || 2097 (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets && 2098 Right.isNot(tok::r_square)); 2099 if (Right.is(tok::r_square)) 2100 return Right.MatchingParen && 2101 ((Style.SpacesInContainerLiterals && 2102 Right.MatchingParen->is(TT_ArrayInitializerLSquare)) || 2103 (Style.SpacesInSquareBrackets && 2104 Right.MatchingParen->is(TT_ArraySubscriptLSquare))); 2105 if (Right.is(tok::l_square) && 2106 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare) && 2107 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) 2108 return false; 2109 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 2110 return !Left.Children.empty(); // No spaces in "{}". 2111 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || 2112 (Right.is(tok::r_brace) && Right.MatchingParen && 2113 Right.MatchingParen->BlockKind != BK_Block)) 2114 return !Style.Cpp11BracedListStyle; 2115 if (Left.is(TT_BlockComment)) 2116 return !Left.TokenText.endswith("=*/"); 2117 if (Right.is(tok::l_paren)) { 2118 if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) 2119 return true; 2120 return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || 2121 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && 2122 (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while, 2123 tok::kw_switch, tok::kw_case, TT_ForEachMacro, 2124 TT_ObjCForIn) || 2125 (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch, 2126 tok::kw_new, tok::kw_delete) && 2127 (!Left.Previous || Left.Previous->isNot(tok::period))))) || 2128 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && 2129 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() || 2130 Left.is(tok::r_paren)) && 2131 Line.Type != LT_PreprocessorDirective); 2132 } 2133 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) 2134 return false; 2135 if (Right.is(TT_UnaryOperator)) 2136 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && 2137 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); 2138 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, 2139 tok::r_paren) || 2140 Left.isSimpleTypeSpecifier()) && 2141 Right.is(tok::l_brace) && Right.getNextNonComment() && 2142 Right.BlockKind != BK_Block) 2143 return false; 2144 if (Left.is(tok::period) || Right.is(tok::period)) 2145 return false; 2146 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") 2147 return false; 2148 if (Left.is(TT_TemplateCloser) && Left.MatchingParen && 2149 Left.MatchingParen->Previous && 2150 Left.MatchingParen->Previous->is(tok::period)) 2151 // A.<B>DoSomething(); 2152 return false; 2153 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) 2154 return false; 2155 return true; 2156 } 2157 2158 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 2159 const FormatToken &Right) { 2160 const FormatToken &Left = *Right.Previous; 2161 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) 2162 return true; // Never ever merge two identifiers. 2163 if (Style.Language == FormatStyle::LK_Cpp) { 2164 if (Left.is(tok::kw_operator)) 2165 return Right.is(tok::coloncolon); 2166 } else if (Style.Language == FormatStyle::LK_Proto) { 2167 if (Right.is(tok::period) && 2168 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, 2169 Keywords.kw_repeated, Keywords.kw_extend)) 2170 return true; 2171 if (Right.is(tok::l_paren) && 2172 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) 2173 return true; 2174 } else if (Style.Language == FormatStyle::LK_JavaScript) { 2175 if (Left.is(TT_JsFatArrow)) 2176 return true; 2177 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || 2178 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) 2179 return false; 2180 if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) 2181 return false; 2182 if (Right.is(tok::star) && 2183 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) 2184 return false; 2185 if (Right.isOneOf(tok::l_brace, tok::l_square) && 2186 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) 2187 return true; 2188 // JS methods can use some keywords as names (e.g. `delete()`). 2189 if (Right.is(tok::l_paren) && Line.MustBeDeclaration && 2190 Left.Tok.getIdentifierInfo()) 2191 return false; 2192 if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, 2193 Keywords.kw_of, tok::kw_const) && 2194 (!Left.Previous || !Left.Previous->is(tok::period))) 2195 return true; 2196 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && 2197 Left.Previous->is(tok::period) && Right.is(tok::l_paren)) 2198 return false; 2199 if (Left.is(Keywords.kw_as) && 2200 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) 2201 return true; 2202 if (Left.is(tok::kw_default) && Left.Previous && 2203 Left.Previous->is(tok::kw_export)) 2204 return true; 2205 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace)) 2206 return true; 2207 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) 2208 return false; 2209 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator)) 2210 return false; 2211 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) && 2212 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) 2213 return false; 2214 if (Left.is(tok::ellipsis)) 2215 return false; 2216 if (Left.is(TT_TemplateCloser) && 2217 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square, 2218 Keywords.kw_implements, Keywords.kw_extends)) 2219 // Type assertions ('<type>expr') are not followed by whitespace. Other 2220 // locations that should have whitespace following are identified by the 2221 // above set of follower tokens. 2222 return false; 2223 // Postfix non-null assertion operator, as in `foo!.bar()`. 2224 if (Right.is(tok::exclaim) && (Left.isOneOf(tok::identifier, tok::r_paren, 2225 tok::r_square, tok::r_brace) || 2226 Left.Tok.isLiteral())) 2227 return false; 2228 if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as)) 2229 return true; // "x! as string" 2230 } else if (Style.Language == FormatStyle::LK_Java) { 2231 if (Left.is(tok::r_square) && Right.is(tok::l_brace)) 2232 return true; 2233 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) 2234 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never; 2235 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, 2236 tok::kw_protected) || 2237 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract, 2238 Keywords.kw_native)) && 2239 Right.is(TT_TemplateOpener)) 2240 return true; 2241 } 2242 if (Left.is(TT_ImplicitStringLiteral)) 2243 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); 2244 if (Line.Type == LT_ObjCMethodDecl) { 2245 if (Left.is(TT_ObjCMethodSpecifier)) 2246 return true; 2247 if (Left.is(tok::r_paren) && Right.is(tok::identifier)) 2248 // Don't space between ')' and <id> 2249 return false; 2250 } 2251 if (Line.Type == LT_ObjCProperty && 2252 (Right.is(tok::equal) || Left.is(tok::equal))) 2253 return false; 2254 2255 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) || 2256 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) 2257 return true; 2258 if (Right.is(TT_OverloadedOperatorLParen)) 2259 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; 2260 if (Left.is(tok::comma)) 2261 return true; 2262 if (Right.is(tok::comma)) 2263 return false; 2264 if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen)) 2265 return true; 2266 if (Right.is(tok::colon)) { 2267 if (Line.First->isOneOf(tok::kw_case, tok::kw_default) || 2268 !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi)) 2269 return false; 2270 if (Right.is(TT_ObjCMethodExpr)) 2271 return false; 2272 if (Left.is(tok::question)) 2273 return false; 2274 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) 2275 return false; 2276 if (Right.is(TT_DictLiteral)) 2277 return Style.SpacesInContainerLiterals; 2278 return true; 2279 } 2280 if (Left.is(TT_UnaryOperator)) 2281 return Right.is(TT_BinaryOperator); 2282 2283 // If the next token is a binary operator or a selector name, we have 2284 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly. 2285 if (Left.is(TT_CastRParen)) 2286 return Style.SpaceAfterCStyleCast || 2287 Right.isOneOf(TT_BinaryOperator, TT_SelectorName); 2288 2289 if (Left.is(tok::greater) && Right.is(tok::greater)) 2290 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && 2291 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); 2292 if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || 2293 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar)) 2294 return false; 2295 if (!Style.SpaceBeforeAssignmentOperators && 2296 Right.getPrecedence() == prec::Assignment) 2297 return false; 2298 if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment)) 2299 return (Left.is(TT_TemplateOpener) && 2300 Style.Standard == FormatStyle::LS_Cpp03) || 2301 !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren, 2302 tok::l_square) || 2303 Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener)); 2304 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) 2305 return Style.SpacesInAngles; 2306 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || 2307 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && 2308 !Right.is(tok::r_paren))) 2309 return true; 2310 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) && 2311 Right.isNot(TT_FunctionTypeLParen)) 2312 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; 2313 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && 2314 Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) 2315 return false; 2316 if (Right.is(tok::less) && Left.isNot(tok::l_paren) && 2317 Line.startsWith(tok::hash)) 2318 return true; 2319 if (Right.is(TT_TrailingUnaryOperator)) 2320 return false; 2321 if (Left.is(TT_RegexLiteral)) 2322 return false; 2323 return spaceRequiredBetween(Line, Left, Right); 2324 } 2325 2326 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. 2327 static bool isAllmanBrace(const FormatToken &Tok) { 2328 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && 2329 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral); 2330 } 2331 2332 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, 2333 const FormatToken &Right) { 2334 const FormatToken &Left = *Right.Previous; 2335 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0) 2336 return true; 2337 2338 if (Style.Language == FormatStyle::LK_JavaScript) { 2339 // FIXME: This might apply to other languages and token kinds. 2340 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous && 2341 Left.Previous->is(tok::string_literal)) 2342 return true; 2343 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 && 2344 Left.Previous && Left.Previous->is(tok::equal) && 2345 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export, 2346 tok::kw_const) && 2347 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match 2348 // above. 2349 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) 2350 // Object literals on the top level of a file are treated as "enum-style". 2351 // Each key/value pair is put on a separate line, instead of bin-packing. 2352 return true; 2353 if (Left.is(tok::l_brace) && Line.Level == 0 && 2354 (Line.startsWith(tok::kw_enum) || 2355 Line.startsWith(tok::kw_export, tok::kw_enum))) 2356 // JavaScript top-level enum key/value pairs are put on separate lines 2357 // instead of bin-packing. 2358 return true; 2359 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && 2360 !Left.Children.empty()) 2361 // Support AllowShortFunctionsOnASingleLine for JavaScript. 2362 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || 2363 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty || 2364 (Left.NestingLevel == 0 && Line.Level == 0 && 2365 Style.AllowShortFunctionsOnASingleLine == 2366 FormatStyle::SFS_Inline); 2367 } else if (Style.Language == FormatStyle::LK_Java) { 2368 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && 2369 Right.Next->is(tok::string_literal)) 2370 return true; 2371 } 2372 2373 // If the last token before a '}' is a comma or a trailing comment, the 2374 // intention is to insert a line break after it in order to make shuffling 2375 // around entries easier. 2376 const FormatToken *BeforeClosingBrace = nullptr; 2377 if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) && 2378 Left.BlockKind != BK_Block && Left.MatchingParen) 2379 BeforeClosingBrace = Left.MatchingParen->Previous; 2380 else if (Right.MatchingParen && 2381 Right.MatchingParen->isOneOf(tok::l_brace, 2382 TT_ArrayInitializerLSquare)) 2383 BeforeClosingBrace = &Left; 2384 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || 2385 BeforeClosingBrace->isTrailingComment())) 2386 return true; 2387 2388 if (Right.is(tok::comment)) 2389 return Left.BlockKind != BK_BracedInit && 2390 Left.isNot(TT_CtorInitializerColon) && 2391 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); 2392 if (Left.isTrailingComment()) 2393 return true; 2394 if (Left.isStringLiteral() && 2395 (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral))) 2396 return true; 2397 if (Right.Previous->IsUnterminatedLiteral) 2398 return true; 2399 if (Right.is(tok::lessless) && Right.Next && 2400 Right.Previous->is(tok::string_literal) && 2401 Right.Next->is(tok::string_literal)) 2402 return true; 2403 if (Right.Previous->ClosesTemplateDeclaration && 2404 Right.Previous->MatchingParen && 2405 Right.Previous->MatchingParen->NestingLevel == 0 && 2406 Style.AlwaysBreakTemplateDeclarations) 2407 return true; 2408 if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) && 2409 Style.BreakConstructorInitializersBeforeComma && 2410 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 2411 return true; 2412 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) 2413 // Raw string literals are special wrt. line breaks. The author has made a 2414 // deliberate choice and might have aligned the contents of the string 2415 // literal accordingly. Thus, we try keep existing line breaks. 2416 return Right.NewlinesBefore > 0; 2417 if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && 2418 Style.Language == FormatStyle::LK_Proto) 2419 // Don't put enums onto single lines in protocol buffers. 2420 return true; 2421 if (Right.is(TT_InlineASMBrace)) 2422 return Right.HasUnescapedNewline; 2423 if (isAllmanBrace(Left) || isAllmanBrace(Right)) 2424 return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || 2425 (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || 2426 (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); 2427 if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) 2428 return true; 2429 2430 if ((Style.Language == FormatStyle::LK_Java || 2431 Style.Language == FormatStyle::LK_JavaScript) && 2432 Left.is(TT_LeadingJavaAnnotation) && 2433 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && 2434 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) 2435 return true; 2436 2437 return false; 2438 } 2439 2440 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 2441 const FormatToken &Right) { 2442 const FormatToken &Left = *Right.Previous; 2443 2444 // Language-specific stuff. 2445 if (Style.Language == FormatStyle::LK_Java) { 2446 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 2447 Keywords.kw_implements)) 2448 return false; 2449 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 2450 Keywords.kw_implements)) 2451 return true; 2452 } else if (Style.Language == FormatStyle::LK_JavaScript) { 2453 const FormatToken *NonComment = Right.getPreviousNonComment(); 2454 if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, 2455 tok::kw_throw) || 2456 (NonComment && 2457 NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, 2458 tok::kw_throw))) 2459 return false; // Otherwise a semicolon is inserted. 2460 if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) 2461 return false; 2462 if (Left.is(TT_JsTypeColon)) 2463 return true; 2464 if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is)) 2465 return false; 2466 if (Left.is(Keywords.kw_in)) 2467 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None; 2468 if (Right.is(Keywords.kw_in)) 2469 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; 2470 if (Right.is(Keywords.kw_as)) 2471 return false; // must not break before as in 'x as type' casts 2472 if (Left.is(Keywords.kw_declare) && 2473 Right.isOneOf(Keywords.kw_module, tok::kw_namespace, 2474 Keywords.kw_function, tok::kw_class, tok::kw_enum, 2475 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, 2476 Keywords.kw_let, tok::kw_const)) 2477 // See grammar for 'declare' statements at: 2478 // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 2479 return false; 2480 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && 2481 Right.isOneOf(tok::identifier, tok::string_literal)) { 2482 return false; // must not break in "module foo { ...}" 2483 } 2484 } 2485 2486 if (Left.is(tok::at)) 2487 return false; 2488 if (Left.Tok.getObjCKeywordID() == tok::objc_interface) 2489 return false; 2490 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) 2491 return !Right.is(tok::l_paren); 2492 if (Right.is(TT_PointerOrReference)) 2493 return Line.IsMultiVariableDeclStmt || 2494 (Style.PointerAlignment == FormatStyle::PAS_Right && 2495 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); 2496 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || 2497 Right.is(tok::kw_operator)) 2498 return true; 2499 if (Left.is(TT_PointerOrReference)) 2500 return false; 2501 if (Right.isTrailingComment()) 2502 // We rely on MustBreakBefore being set correctly here as we should not 2503 // change the "binding" behavior of a comment. 2504 // The first comment in a braced lists is always interpreted as belonging to 2505 // the first list element. Otherwise, it should be placed outside of the 2506 // list. 2507 return Left.BlockKind == BK_BracedInit; 2508 if (Left.is(tok::question) && Right.is(tok::colon)) 2509 return false; 2510 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) 2511 return Style.BreakBeforeTernaryOperators; 2512 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question)) 2513 return !Style.BreakBeforeTernaryOperators; 2514 if (Right.is(TT_InheritanceColon)) 2515 return true; 2516 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && 2517 Left.isNot(TT_SelectorName)) 2518 return true; 2519 if (Right.is(tok::colon) && 2520 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) 2521 return false; 2522 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) 2523 return true; 2524 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && 2525 Right.Next->is(TT_ObjCMethodExpr))) 2526 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. 2527 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) 2528 return true; 2529 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen)) 2530 return true; 2531 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, 2532 TT_OverloadedOperator)) 2533 return false; 2534 if (Left.is(TT_RangeBasedForLoopColon)) 2535 return true; 2536 if (Right.is(TT_RangeBasedForLoopColon)) 2537 return false; 2538 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener)) 2539 return true; 2540 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || 2541 Left.is(tok::kw_operator)) 2542 return false; 2543 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && 2544 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) 2545 return false; 2546 if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) 2547 return false; 2548 if (Left.is(tok::l_paren) && Left.Previous && 2549 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) 2550 return false; 2551 if (Right.is(TT_ImplicitStringLiteral)) 2552 return false; 2553 2554 if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser)) 2555 return false; 2556 if (Right.is(tok::r_square) && Right.MatchingParen && 2557 Right.MatchingParen->is(TT_LambdaLSquare)) 2558 return false; 2559 2560 // We only break before r_brace if there was a corresponding break before 2561 // the l_brace, which is tracked by BreakBeforeClosingBrace. 2562 if (Right.is(tok::r_brace)) 2563 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; 2564 2565 // Allow breaking after a trailing annotation, e.g. after a method 2566 // declaration. 2567 if (Left.is(TT_TrailingAnnotation)) 2568 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, 2569 tok::less, tok::coloncolon); 2570 2571 if (Right.is(tok::kw___attribute)) 2572 return true; 2573 2574 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 2575 return true; 2576 2577 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) 2578 return true; 2579 2580 if (Left.is(TT_CtorInitializerComma) && 2581 Style.BreakConstructorInitializersBeforeComma) 2582 return false; 2583 if (Right.is(TT_CtorInitializerComma) && 2584 Style.BreakConstructorInitializersBeforeComma) 2585 return true; 2586 if ((Left.is(tok::greater) && Right.is(tok::greater)) || 2587 (Left.is(tok::less) && Right.is(tok::less))) 2588 return false; 2589 if (Right.is(TT_BinaryOperator) && 2590 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && 2591 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || 2592 Right.getPrecedence() != prec::Assignment)) 2593 return true; 2594 if (Left.is(TT_ArrayInitializerLSquare)) 2595 return true; 2596 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) 2597 return true; 2598 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) && 2599 !Left.isOneOf(tok::arrowstar, tok::lessless) && 2600 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && 2601 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || 2602 Left.getPrecedence() == prec::Assignment)) 2603 return true; 2604 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 2605 tok::kw_class, tok::kw_struct, tok::comment) || 2606 Right.isMemberAccess() || 2607 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, 2608 tok::colon, tok::l_square, tok::at) || 2609 (Left.is(tok::r_paren) && 2610 Right.isOneOf(tok::identifier, tok::kw_const)) || 2611 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 2612 } 2613 2614 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 2615 llvm::errs() << "AnnotatedTokens:\n"; 2616 const FormatToken *Tok = Line.First; 2617 while (Tok) { 2618 llvm::errs() << " M=" << Tok->MustBreakBefore 2619 << " C=" << Tok->CanBreakBefore 2620 << " T=" << getTokenTypeName(Tok->Type) 2621 << " S=" << Tok->SpacesRequiredBefore 2622 << " B=" << Tok->BlockParameterCount 2623 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() 2624 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind 2625 << " FakeLParens="; 2626 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 2627 llvm::errs() << Tok->FakeLParens[i] << "/"; 2628 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; 2629 if (!Tok->Next) 2630 assert(Tok == Line.Last); 2631 Tok = Tok->Next; 2632 } 2633 llvm::errs() << "----\n"; 2634 } 2635 2636 } // namespace format 2637 } // namespace clang 2638