1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a token annotator, i.e. creates 12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "TokenAnnotator.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/Support/Debug.h" 20 21 #define DEBUG_TYPE "format-token-annotator" 22 23 namespace clang { 24 namespace format { 25 26 namespace { 27 28 /// \brief A parser that gathers additional information about tokens. 29 /// 30 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 31 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 32 /// into template parameter lists. 33 class AnnotatingParser { 34 public: 35 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, 36 const AdditionalKeywords &Keywords) 37 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), 38 Keywords(Keywords) { 39 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); 40 resetTokenMetadata(CurrentToken); 41 } 42 43 private: 44 bool parseAngle() { 45 if (!CurrentToken || !CurrentToken->Previous) 46 return false; 47 if (NonTemplateLess.count(CurrentToken->Previous)) 48 return false; 49 50 const FormatToken &Previous = *CurrentToken->Previous; // The '<'. 51 if (Previous.Previous) { 52 if (Previous.Previous->Tok.isLiteral()) 53 return false; 54 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 && 55 (!Previous.Previous->MatchingParen || 56 !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen))) 57 return false; 58 } 59 60 FormatToken *Left = CurrentToken->Previous; 61 Left->ParentBracket = Contexts.back().ContextKind; 62 ScopedContextCreator ContextCreator(*this, tok::less, 12); 63 64 // If this angle is in the context of an expression, we need to be more 65 // hesitant to detect it as opening template parameters. 66 bool InExprContext = Contexts.back().IsExpression; 67 68 Contexts.back().IsExpression = false; 69 // If there's a template keyword before the opening angle bracket, this is a 70 // template parameter, not an argument. 71 Contexts.back().InTemplateArgument = 72 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); 73 74 if (Style.Language == FormatStyle::LK_Java && 75 CurrentToken->is(tok::question)) 76 next(); 77 78 while (CurrentToken) { 79 if (CurrentToken->is(tok::greater)) { 80 Left->MatchingParen = CurrentToken; 81 CurrentToken->MatchingParen = Left; 82 // In TT_Proto, we must distignuish between: 83 // map<key, value> 84 // msg < item: data > 85 // msg: < item: data > 86 // In TT_TextProto, map<key, value> does not occur. 87 if (Style.Language == FormatStyle::LK_TextProto || 88 (Style.Language == FormatStyle::LK_Proto && Left->Previous && 89 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) 90 CurrentToken->Type = TT_DictLiteral; 91 else 92 CurrentToken->Type = TT_TemplateCloser; 93 next(); 94 return true; 95 } 96 if (CurrentToken->is(tok::question) && 97 Style.Language == FormatStyle::LK_Java) { 98 next(); 99 continue; 100 } 101 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || 102 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext && 103 Style.Language != FormatStyle::LK_Proto && 104 Style.Language != FormatStyle::LK_TextProto)) 105 return false; 106 // If a && or || is found and interpreted as a binary operator, this set 107 // of angles is likely part of something like "a < b && c > d". If the 108 // angles are inside an expression, the ||/&& might also be a binary 109 // operator that was misinterpreted because we are parsing template 110 // parameters. 111 // FIXME: This is getting out of hand, write a decent parser. 112 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && 113 CurrentToken->Previous->is(TT_BinaryOperator) && 114 Contexts[Contexts.size() - 2].IsExpression && 115 !Line.startsWith(tok::kw_template)) 116 return false; 117 updateParameterCount(Left, CurrentToken); 118 if (Style.Language == FormatStyle::LK_Proto) { 119 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) { 120 if (CurrentToken->is(tok::colon) || 121 (CurrentToken->isOneOf(tok::l_brace, tok::less) && 122 Previous->isNot(tok::colon))) 123 Previous->Type = TT_SelectorName; 124 } 125 } 126 if (!consumeToken()) 127 return false; 128 } 129 return false; 130 } 131 132 bool parseParens(bool LookForDecls = false) { 133 if (!CurrentToken) 134 return false; 135 FormatToken *Left = CurrentToken->Previous; 136 Left->ParentBracket = Contexts.back().ContextKind; 137 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 138 139 // FIXME: This is a bit of a hack. Do better. 140 Contexts.back().ColonIsForRangeExpr = 141 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 142 143 bool StartsObjCMethodExpr = false; 144 if (FormatToken *MaybeSel = Left->Previous) { 145 // @selector( starts a selector. 146 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && 147 MaybeSel->Previous->is(tok::at)) { 148 StartsObjCMethodExpr = true; 149 } 150 } 151 152 if (Left->is(TT_OverloadedOperatorLParen)) { 153 Contexts.back().IsExpression = false; 154 } else if (Style.Language == FormatStyle::LK_JavaScript && 155 (Line.startsWith(Keywords.kw_type, tok::identifier) || 156 Line.startsWith(tok::kw_export, Keywords.kw_type, 157 tok::identifier))) { 158 // type X = (...); 159 // export type X = (...); 160 Contexts.back().IsExpression = false; 161 } else if (Left->Previous && 162 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, 163 tok::kw_if, tok::kw_while, tok::l_paren, 164 tok::comma) || 165 Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) || 166 Left->Previous->is(TT_BinaryOperator))) { 167 // static_assert, if and while usually contain expressions. 168 Contexts.back().IsExpression = true; 169 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && 170 (Left->Previous->is(Keywords.kw_function) || 171 (Left->Previous->endsSequence(tok::identifier, 172 Keywords.kw_function)))) { 173 // function(...) or function f(...) 174 Contexts.back().IsExpression = false; 175 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && 176 Left->Previous->is(TT_JsTypeColon)) { 177 // let x: (SomeType); 178 Contexts.back().IsExpression = false; 179 } else if (Left->Previous && Left->Previous->is(tok::r_square) && 180 Left->Previous->MatchingParen && 181 Left->Previous->MatchingParen->is(TT_LambdaLSquare)) { 182 // This is a parameter list of a lambda expression. 183 Contexts.back().IsExpression = false; 184 } else if (Line.InPPDirective && 185 (!Left->Previous || !Left->Previous->is(tok::identifier))) { 186 Contexts.back().IsExpression = true; 187 } else if (Contexts[Contexts.size() - 2].CaretFound) { 188 // This is the parameter list of an ObjC block. 189 Contexts.back().IsExpression = false; 190 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { 191 Left->Type = TT_AttributeParen; 192 } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { 193 // The first argument to a foreach macro is a declaration. 194 Contexts.back().IsForEachMacro = true; 195 Contexts.back().IsExpression = false; 196 } else if (Left->Previous && Left->Previous->MatchingParen && 197 Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) { 198 Contexts.back().IsExpression = false; 199 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) { 200 bool IsForOrCatch = 201 Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch); 202 Contexts.back().IsExpression = !IsForOrCatch; 203 } 204 205 if (StartsObjCMethodExpr) { 206 Contexts.back().ColonIsObjCMethodExpr = true; 207 Left->Type = TT_ObjCMethodExpr; 208 } 209 210 // MightBeFunctionType and ProbablyFunctionType are used for 211 // function pointer and reference types as well as Objective-C 212 // block types: 213 // 214 // void (*FunctionPointer)(void); 215 // void (&FunctionReference)(void); 216 // void (^ObjCBlock)(void); 217 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression; 218 bool ProbablyFunctionType = 219 CurrentToken->isOneOf(tok::star, tok::amp, tok::caret); 220 bool HasMultipleLines = false; 221 bool HasMultipleParametersOnALine = false; 222 bool MightBeObjCForRangeLoop = 223 Left->Previous && Left->Previous->is(tok::kw_for); 224 FormatToken *PossibleObjCForInToken = nullptr; 225 while (CurrentToken) { 226 // LookForDecls is set when "if (" has been seen. Check for 227 // 'identifier' '*' 'identifier' followed by not '=' -- this 228 // '*' has to be a binary operator but determineStarAmpUsage() will 229 // categorize it as an unary operator, so set the right type here. 230 if (LookForDecls && CurrentToken->Next) { 231 FormatToken *Prev = CurrentToken->getPreviousNonComment(); 232 if (Prev) { 233 FormatToken *PrevPrev = Prev->getPreviousNonComment(); 234 FormatToken *Next = CurrentToken->Next; 235 if (PrevPrev && PrevPrev->is(tok::identifier) && 236 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && 237 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { 238 Prev->Type = TT_BinaryOperator; 239 LookForDecls = false; 240 } 241 } 242 } 243 244 if (CurrentToken->Previous->is(TT_PointerOrReference) && 245 CurrentToken->Previous->Previous->isOneOf(tok::l_paren, 246 tok::coloncolon)) 247 ProbablyFunctionType = true; 248 if (CurrentToken->is(tok::comma)) 249 MightBeFunctionType = false; 250 if (CurrentToken->Previous->is(TT_BinaryOperator)) 251 Contexts.back().IsExpression = true; 252 if (CurrentToken->is(tok::r_paren)) { 253 if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next && 254 (CurrentToken->Next->is(tok::l_paren) || 255 (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration))) 256 Left->Type = Left->Next->is(tok::caret) ? TT_ObjCBlockLParen 257 : TT_FunctionTypeLParen; 258 Left->MatchingParen = CurrentToken; 259 CurrentToken->MatchingParen = Left; 260 261 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) && 262 Left->Previous && Left->Previous->is(tok::l_paren)) { 263 // Detect the case where macros are used to generate lambdas or 264 // function bodies, e.g.: 265 // auto my_lambda = MARCO((Type *type, int i) { .. body .. }); 266 for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) { 267 if (Tok->is(TT_BinaryOperator) && 268 Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) 269 Tok->Type = TT_PointerOrReference; 270 } 271 } 272 273 if (StartsObjCMethodExpr) { 274 CurrentToken->Type = TT_ObjCMethodExpr; 275 if (Contexts.back().FirstObjCSelectorName) { 276 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 277 Contexts.back().LongestObjCSelectorName; 278 } 279 } 280 281 if (Left->is(TT_AttributeParen)) 282 CurrentToken->Type = TT_AttributeParen; 283 if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) 284 CurrentToken->Type = TT_JavaAnnotation; 285 if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) 286 CurrentToken->Type = TT_LeadingJavaAnnotation; 287 288 if (!HasMultipleLines) 289 Left->PackingKind = PPK_Inconclusive; 290 else if (HasMultipleParametersOnALine) 291 Left->PackingKind = PPK_BinPacked; 292 else 293 Left->PackingKind = PPK_OnePerLine; 294 295 next(); 296 return true; 297 } 298 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 299 return false; 300 301 if (CurrentToken->is(tok::l_brace)) 302 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen 303 if (CurrentToken->is(tok::comma) && CurrentToken->Next && 304 !CurrentToken->Next->HasUnescapedNewline && 305 !CurrentToken->Next->isTrailingComment()) 306 HasMultipleParametersOnALine = true; 307 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || 308 CurrentToken->Previous->isSimpleTypeSpecifier()) && 309 !CurrentToken->is(tok::l_brace)) 310 Contexts.back().IsExpression = false; 311 if (CurrentToken->isOneOf(tok::semi, tok::colon)) { 312 MightBeObjCForRangeLoop = false; 313 if (PossibleObjCForInToken) { 314 PossibleObjCForInToken->Type = TT_Unknown; 315 PossibleObjCForInToken = nullptr; 316 } 317 } 318 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) { 319 PossibleObjCForInToken = CurrentToken; 320 PossibleObjCForInToken->Type = TT_ObjCForIn; 321 } 322 // When we discover a 'new', we set CanBeExpression to 'false' in order to 323 // parse the type correctly. Reset that after a comma. 324 if (CurrentToken->is(tok::comma)) 325 Contexts.back().CanBeExpression = true; 326 327 FormatToken *Tok = CurrentToken; 328 if (!consumeToken()) 329 return false; 330 updateParameterCount(Left, Tok); 331 if (CurrentToken && CurrentToken->HasUnescapedNewline) 332 HasMultipleLines = true; 333 } 334 return false; 335 } 336 337 bool isCpp11AttributeSpecifier(const FormatToken &Tok) { 338 if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square)) 339 return false; 340 const FormatToken *AttrTok = Tok.Next->Next; 341 if (!AttrTok) 342 return false; 343 // C++17 '[[using ns: foo, bar(baz, blech)]]' 344 // We assume nobody will name an ObjC variable 'using'. 345 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon)) 346 return true; 347 if (AttrTok->isNot(tok::identifier)) 348 return false; 349 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) { 350 // ObjC message send. We assume nobody will use : in a C++11 attribute 351 // specifier parameter, although this is technically valid: 352 // [[foo(:)]] 353 if (AttrTok->is(tok::colon) || 354 AttrTok->startsSequence(tok::identifier, tok::identifier)) 355 return false; 356 if (AttrTok->is(tok::ellipsis)) 357 return true; 358 AttrTok = AttrTok->Next; 359 } 360 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square); 361 } 362 363 bool parseSquare() { 364 if (!CurrentToken) 365 return false; 366 367 // A '[' could be an index subscript (after an identifier or after 368 // ')' or ']'), it could be the start of an Objective-C method 369 // expression, it could the start of an Objective-C array literal, 370 // or it could be a C++ attribute specifier [[foo::bar]]. 371 FormatToken *Left = CurrentToken->Previous; 372 Left->ParentBracket = Contexts.back().ContextKind; 373 FormatToken *Parent = Left->getPreviousNonComment(); 374 375 // Cases where '>' is followed by '['. 376 // In C++, this can happen either in array of templates (foo<int>[10]) 377 // or when array is a nested template type (unique_ptr<type1<type2>[]>). 378 bool CppArrayTemplates = 379 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) && 380 (Contexts.back().CanBeExpression || Contexts.back().IsExpression || 381 Contexts.back().InTemplateArgument); 382 383 bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) || 384 Contexts.back().InCpp11AttributeSpecifier; 385 386 bool StartsObjCMethodExpr = 387 !CppArrayTemplates && Style.isCpp() && !IsCpp11AttributeSpecifier && 388 Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && 389 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) && 390 (!Parent || 391 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 392 tok::kw_return, tok::kw_throw) || 393 Parent->isUnaryOperator() || 394 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || 395 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); 396 bool ColonFound = false; 397 398 unsigned BindingIncrease = 1; 399 if (Left->isCppStructuredBinding(Style)) { 400 Left->Type = TT_StructuredBindingLSquare; 401 } else if (Left->is(TT_Unknown)) { 402 if (StartsObjCMethodExpr) { 403 Left->Type = TT_ObjCMethodExpr; 404 } else if (IsCpp11AttributeSpecifier) { 405 Left->Type = TT_AttributeSquare; 406 } else if (Style.Language == FormatStyle::LK_JavaScript && Parent && 407 Contexts.back().ContextKind == tok::l_brace && 408 Parent->isOneOf(tok::l_brace, tok::comma)) { 409 Left->Type = TT_JsComputedPropertyName; 410 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace && 411 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) { 412 Left->Type = TT_DesignatedInitializerLSquare; 413 } else if (CurrentToken->is(tok::r_square) && Parent && 414 Parent->is(TT_TemplateCloser)) { 415 Left->Type = TT_ArraySubscriptLSquare; 416 } else if (Style.Language == FormatStyle::LK_Proto || 417 Style.Language == FormatStyle::LK_TextProto) { 418 // Square braces in LK_Proto can either be message field attributes: 419 // 420 // optional Aaa aaa = 1 [ 421 // (aaa) = aaa 422 // ]; 423 // 424 // extensions 123 [ 425 // (aaa) = aaa 426 // ]; 427 // 428 // or text proto extensions (in options): 429 // 430 // option (Aaa.options) = { 431 // [type.type/type] { 432 // key: value 433 // } 434 // } 435 // 436 // or repeated fields (in options): 437 // 438 // option (Aaa.options) = { 439 // keys: [ 1, 2, 3 ] 440 // } 441 // 442 // In the first and the third case we want to spread the contents inside 443 // the square braces; in the second we want to keep them inline. 444 Left->Type = TT_ArrayInitializerLSquare; 445 if (!Left->endsSequence(tok::l_square, tok::numeric_constant, 446 tok::equal) && 447 !Left->endsSequence(tok::l_square, tok::numeric_constant, 448 tok::identifier) && 449 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) { 450 Left->Type = TT_ProtoExtensionLSquare; 451 BindingIncrease = 10; 452 } 453 } else if (!CppArrayTemplates && Parent && 454 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, 455 tok::comma, tok::l_paren, tok::l_square, 456 tok::question, tok::colon, tok::kw_return, 457 // Should only be relevant to JavaScript: 458 tok::kw_default)) { 459 Left->Type = TT_ArrayInitializerLSquare; 460 } else { 461 BindingIncrease = 10; 462 Left->Type = TT_ArraySubscriptLSquare; 463 } 464 } 465 466 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease); 467 Contexts.back().IsExpression = true; 468 if (Style.Language == FormatStyle::LK_JavaScript && Parent && 469 Parent->is(TT_JsTypeColon)) 470 Contexts.back().IsExpression = false; 471 472 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; 473 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier; 474 475 while (CurrentToken) { 476 if (CurrentToken->is(tok::r_square)) { 477 if (IsCpp11AttributeSpecifier) 478 CurrentToken->Type = TT_AttributeSquare; 479 else if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && 480 Left->is(TT_ObjCMethodExpr)) { 481 // An ObjC method call is rarely followed by an open parenthesis. 482 // FIXME: Do we incorrectly label ":" with this? 483 StartsObjCMethodExpr = false; 484 Left->Type = TT_Unknown; 485 } 486 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { 487 CurrentToken->Type = TT_ObjCMethodExpr; 488 // determineStarAmpUsage() thinks that '*' '[' is allocating an 489 // array of pointers, but if '[' starts a selector then '*' is a 490 // binary operator. 491 if (Parent && Parent->is(TT_PointerOrReference)) 492 Parent->Type = TT_BinaryOperator; 493 } 494 Left->MatchingParen = CurrentToken; 495 CurrentToken->MatchingParen = Left; 496 if (Contexts.back().FirstObjCSelectorName) { 497 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 498 Contexts.back().LongestObjCSelectorName; 499 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts = 500 Left->ParameterCount; 501 if (Left->BlockParameterCount > 1) 502 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; 503 } 504 next(); 505 return true; 506 } 507 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 508 return false; 509 if (CurrentToken->is(tok::colon)) { 510 if (IsCpp11AttributeSpecifier && 511 CurrentToken->endsSequence(tok::colon, tok::identifier, 512 tok::kw_using)) { 513 // Remember that this is a [[using ns: foo]] C++ attribute, so we 514 // don't add a space before the colon (unlike other colons). 515 CurrentToken->Type = TT_AttributeColon; 516 } else if (Left->isOneOf(TT_ArraySubscriptLSquare, 517 TT_DesignatedInitializerLSquare)) { 518 Left->Type = TT_ObjCMethodExpr; 519 StartsObjCMethodExpr = true; 520 // ParameterCount might have been set to 1 before expression was 521 // recognized as ObjCMethodExpr (as '1 + number of commas' formula is 522 // used for other expression types). Parameter counter has to be, 523 // therefore, reset to 0. 524 Left->ParameterCount = 0; 525 Contexts.back().ColonIsObjCMethodExpr = true; 526 if (Parent && Parent->is(tok::r_paren)) 527 Parent->Type = TT_CastRParen; 528 } 529 ColonFound = true; 530 } 531 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) && 532 !ColonFound) 533 Left->Type = TT_ArrayInitializerLSquare; 534 FormatToken *Tok = CurrentToken; 535 if (!consumeToken()) 536 return false; 537 updateParameterCount(Left, Tok); 538 } 539 return false; 540 } 541 542 bool parseBrace() { 543 if (CurrentToken) { 544 FormatToken *Left = CurrentToken->Previous; 545 Left->ParentBracket = Contexts.back().ContextKind; 546 547 if (Contexts.back().CaretFound) 548 Left->Type = TT_ObjCBlockLBrace; 549 Contexts.back().CaretFound = false; 550 551 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 552 Contexts.back().ColonIsDictLiteral = true; 553 if (Left->BlockKind == BK_BracedInit) 554 Contexts.back().IsExpression = true; 555 if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && 556 Left->Previous->is(TT_JsTypeColon)) 557 Contexts.back().IsExpression = false; 558 559 while (CurrentToken) { 560 if (CurrentToken->is(tok::r_brace)) { 561 Left->MatchingParen = CurrentToken; 562 CurrentToken->MatchingParen = Left; 563 next(); 564 return true; 565 } 566 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 567 return false; 568 updateParameterCount(Left, CurrentToken); 569 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) { 570 FormatToken *Previous = CurrentToken->getPreviousNonComment(); 571 if (Previous->is(TT_JsTypeOptionalQuestion)) 572 Previous = Previous->getPreviousNonComment(); 573 if ((CurrentToken->is(tok::colon) && 574 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || 575 Style.Language == FormatStyle::LK_Proto || 576 Style.Language == FormatStyle::LK_TextProto) { 577 Left->Type = TT_DictLiteral; 578 if (Previous->Tok.getIdentifierInfo() || 579 Previous->is(tok::string_literal)) 580 Previous->Type = TT_SelectorName; 581 } 582 if (CurrentToken->is(tok::colon) || 583 Style.Language == FormatStyle::LK_JavaScript) 584 Left->Type = TT_DictLiteral; 585 } 586 if (CurrentToken->is(tok::comma) && 587 Style.Language == FormatStyle::LK_JavaScript) 588 Left->Type = TT_DictLiteral; 589 if (!consumeToken()) 590 return false; 591 } 592 } 593 return true; 594 } 595 596 void updateParameterCount(FormatToken *Left, FormatToken *Current) { 597 if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block) 598 ++Left->BlockParameterCount; 599 if (Left->Type == TT_ObjCMethodExpr) { 600 if (Current->is(tok::colon)) 601 ++Left->ParameterCount; 602 } else if (Current->is(tok::comma)) { 603 ++Left->ParameterCount; 604 if (!Left->Role) 605 Left->Role.reset(new CommaSeparatedList(Style)); 606 Left->Role->CommaFound(Current); 607 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { 608 Left->ParameterCount = 1; 609 } 610 } 611 612 bool parseConditional() { 613 while (CurrentToken) { 614 if (CurrentToken->is(tok::colon)) { 615 CurrentToken->Type = TT_ConditionalExpr; 616 next(); 617 return true; 618 } 619 if (!consumeToken()) 620 return false; 621 } 622 return false; 623 } 624 625 bool parseTemplateDeclaration() { 626 if (CurrentToken && CurrentToken->is(tok::less)) { 627 CurrentToken->Type = TT_TemplateOpener; 628 next(); 629 if (!parseAngle()) 630 return false; 631 if (CurrentToken) 632 CurrentToken->Previous->ClosesTemplateDeclaration = true; 633 return true; 634 } 635 return false; 636 } 637 638 bool consumeToken() { 639 FormatToken *Tok = CurrentToken; 640 next(); 641 switch (Tok->Tok.getKind()) { 642 case tok::plus: 643 case tok::minus: 644 if (!Tok->Previous && Line.MustBeDeclaration) 645 Tok->Type = TT_ObjCMethodSpecifier; 646 break; 647 case tok::colon: 648 if (!Tok->Previous) 649 return false; 650 // Colons from ?: are handled in parseConditional(). 651 if (Style.Language == FormatStyle::LK_JavaScript) { 652 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop 653 (Contexts.size() == 1 && // switch/case labels 654 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) || 655 Contexts.back().ContextKind == tok::l_paren || // function params 656 Contexts.back().ContextKind == tok::l_square || // array type 657 (!Contexts.back().IsExpression && 658 Contexts.back().ContextKind == tok::l_brace) || // object type 659 (Contexts.size() == 1 && 660 Line.MustBeDeclaration)) { // method/property declaration 661 Contexts.back().IsExpression = false; 662 Tok->Type = TT_JsTypeColon; 663 break; 664 } 665 } 666 if (Contexts.back().ColonIsDictLiteral || 667 Style.Language == FormatStyle::LK_Proto || 668 Style.Language == FormatStyle::LK_TextProto) { 669 Tok->Type = TT_DictLiteral; 670 if (Style.Language == FormatStyle::LK_TextProto) { 671 if (FormatToken *Previous = Tok->getPreviousNonComment()) 672 Previous->Type = TT_SelectorName; 673 } 674 } else if (Contexts.back().ColonIsObjCMethodExpr || 675 Line.startsWith(TT_ObjCMethodSpecifier)) { 676 Tok->Type = TT_ObjCMethodExpr; 677 const FormatToken *BeforePrevious = Tok->Previous->Previous; 678 if (!BeforePrevious || 679 !(BeforePrevious->is(TT_CastRParen) || 680 (BeforePrevious->is(TT_ObjCMethodExpr) && 681 BeforePrevious->is(tok::colon))) || 682 BeforePrevious->is(tok::r_square) || 683 Contexts.back().LongestObjCSelectorName == 0) { 684 Tok->Previous->Type = TT_SelectorName; 685 if (!Contexts.back().FirstObjCSelectorName) 686 Contexts.back().FirstObjCSelectorName = Tok->Previous; 687 else if (Tok->Previous->ColumnWidth > 688 Contexts.back().LongestObjCSelectorName) 689 Contexts.back().LongestObjCSelectorName = 690 Tok->Previous->ColumnWidth; 691 } 692 } else if (Contexts.back().ColonIsForRangeExpr) { 693 Tok->Type = TT_RangeBasedForLoopColon; 694 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { 695 Tok->Type = TT_BitFieldColon; 696 } else if (Contexts.size() == 1 && 697 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { 698 if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren, 699 tok::kw_noexcept)) 700 Tok->Type = TT_CtorInitializerColon; 701 else 702 Tok->Type = TT_InheritanceColon; 703 } else if (Tok->Previous->is(tok::identifier) && Tok->Next && 704 (Tok->Next->isOneOf(tok::r_paren, tok::comma) || 705 Tok->Next->startsSequence(tok::identifier, tok::colon))) { 706 // This handles a special macro in ObjC code where selectors including 707 // the colon are passed as macro arguments. 708 Tok->Type = TT_ObjCMethodExpr; 709 } else if (Contexts.back().ContextKind == tok::l_paren) { 710 Tok->Type = TT_InlineASMColon; 711 } 712 break; 713 case tok::pipe: 714 case tok::amp: 715 // | and & in declarations/type expressions represent union and 716 // intersection types, respectively. 717 if (Style.Language == FormatStyle::LK_JavaScript && 718 !Contexts.back().IsExpression) 719 Tok->Type = TT_JsTypeOperator; 720 break; 721 case tok::kw_if: 722 case tok::kw_while: 723 if (Tok->is(tok::kw_if) && CurrentToken && 724 CurrentToken->is(tok::kw_constexpr)) 725 next(); 726 if (CurrentToken && CurrentToken->is(tok::l_paren)) { 727 next(); 728 if (!parseParens(/*LookForDecls=*/true)) 729 return false; 730 } 731 break; 732 case tok::kw_for: 733 if (Style.Language == FormatStyle::LK_JavaScript) { 734 // x.for and {for: ...} 735 if ((Tok->Previous && Tok->Previous->is(tok::period)) || 736 (Tok->Next && Tok->Next->is(tok::colon))) 737 break; 738 // JS' for await ( ... 739 if (CurrentToken && CurrentToken->is(Keywords.kw_await)) 740 next(); 741 } 742 Contexts.back().ColonIsForRangeExpr = true; 743 next(); 744 if (!parseParens()) 745 return false; 746 break; 747 case tok::l_paren: 748 // When faced with 'operator()()', the kw_operator handler incorrectly 749 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make 750 // the first two parens OverloadedOperators and the second l_paren an 751 // OverloadedOperatorLParen. 752 if (Tok->Previous && Tok->Previous->is(tok::r_paren) && 753 Tok->Previous->MatchingParen && 754 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) { 755 Tok->Previous->Type = TT_OverloadedOperator; 756 Tok->Previous->MatchingParen->Type = TT_OverloadedOperator; 757 Tok->Type = TT_OverloadedOperatorLParen; 758 } 759 760 if (!parseParens()) 761 return false; 762 if (Line.MustBeDeclaration && Contexts.size() == 1 && 763 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) && 764 (!Tok->Previous || 765 !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute, 766 TT_LeadingJavaAnnotation))) 767 Line.MightBeFunctionDecl = true; 768 break; 769 case tok::l_square: 770 if (!parseSquare()) 771 return false; 772 break; 773 case tok::l_brace: 774 if (Style.Language == FormatStyle::LK_TextProto) { 775 FormatToken *Previous = Tok->getPreviousNonComment(); 776 if (Previous && Previous->Type != TT_DictLiteral) 777 Previous->Type = TT_SelectorName; 778 } 779 if (!parseBrace()) 780 return false; 781 break; 782 case tok::less: 783 if (parseAngle()) { 784 Tok->Type = TT_TemplateOpener; 785 // In TT_Proto, we must distignuish between: 786 // map<key, value> 787 // msg < item: data > 788 // msg: < item: data > 789 // In TT_TextProto, map<key, value> does not occur. 790 if (Style.Language == FormatStyle::LK_TextProto || 791 (Style.Language == FormatStyle::LK_Proto && Tok->Previous && 792 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { 793 Tok->Type = TT_DictLiteral; 794 FormatToken *Previous = Tok->getPreviousNonComment(); 795 if (Previous && Previous->Type != TT_DictLiteral) 796 Previous->Type = TT_SelectorName; 797 } 798 } else { 799 Tok->Type = TT_BinaryOperator; 800 NonTemplateLess.insert(Tok); 801 CurrentToken = Tok; 802 next(); 803 } 804 break; 805 case tok::r_paren: 806 case tok::r_square: 807 return false; 808 case tok::r_brace: 809 // Lines can start with '}'. 810 if (Tok->Previous) 811 return false; 812 break; 813 case tok::greater: 814 if (Style.Language != FormatStyle::LK_TextProto) 815 Tok->Type = TT_BinaryOperator; 816 break; 817 case tok::kw_operator: 818 if (Style.Language == FormatStyle::LK_TextProto || 819 Style.Language == FormatStyle::LK_Proto) 820 break; 821 while (CurrentToken && 822 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { 823 if (CurrentToken->isOneOf(tok::star, tok::amp)) 824 CurrentToken->Type = TT_PointerOrReference; 825 consumeToken(); 826 if (CurrentToken && 827 CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, 828 tok::comma)) 829 CurrentToken->Previous->Type = TT_OverloadedOperator; 830 } 831 if (CurrentToken) { 832 CurrentToken->Type = TT_OverloadedOperatorLParen; 833 if (CurrentToken->Previous->is(TT_BinaryOperator)) 834 CurrentToken->Previous->Type = TT_OverloadedOperator; 835 } 836 break; 837 case tok::question: 838 if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next && 839 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren, 840 tok::r_brace)) { 841 // Question marks before semicolons, colons, etc. indicate optional 842 // types (fields, parameters), e.g. 843 // function(x?: string, y?) {...} 844 // class X { y?; } 845 Tok->Type = TT_JsTypeOptionalQuestion; 846 break; 847 } 848 // Declarations cannot be conditional expressions, this can only be part 849 // of a type declaration. 850 if (Line.MustBeDeclaration && !Contexts.back().IsExpression && 851 Style.Language == FormatStyle::LK_JavaScript) 852 break; 853 parseConditional(); 854 break; 855 case tok::kw_template: 856 parseTemplateDeclaration(); 857 break; 858 case tok::comma: 859 if (Contexts.back().InCtorInitializer) 860 Tok->Type = TT_CtorInitializerComma; 861 else if (Contexts.back().InInheritanceList) 862 Tok->Type = TT_InheritanceComma; 863 else if (Contexts.back().FirstStartOfName && 864 (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) { 865 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 866 Line.IsMultiVariableDeclStmt = true; 867 } 868 if (Contexts.back().IsForEachMacro) 869 Contexts.back().IsExpression = true; 870 break; 871 case tok::identifier: 872 if (Tok->isOneOf(Keywords.kw___has_include, 873 Keywords.kw___has_include_next)) { 874 parseHasInclude(); 875 } 876 break; 877 default: 878 break; 879 } 880 return true; 881 } 882 883 void parseIncludeDirective() { 884 if (CurrentToken && CurrentToken->is(tok::less)) { 885 next(); 886 while (CurrentToken) { 887 // Mark tokens up to the trailing line comments as implicit string 888 // literals. 889 if (CurrentToken->isNot(tok::comment) && 890 !CurrentToken->TokenText.startswith("//")) 891 CurrentToken->Type = TT_ImplicitStringLiteral; 892 next(); 893 } 894 } 895 } 896 897 void parseWarningOrError() { 898 next(); 899 // We still want to format the whitespace left of the first token of the 900 // warning or error. 901 next(); 902 while (CurrentToken) { 903 CurrentToken->Type = TT_ImplicitStringLiteral; 904 next(); 905 } 906 } 907 908 void parsePragma() { 909 next(); // Consume "pragma". 910 if (CurrentToken && 911 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) { 912 bool IsMark = CurrentToken->is(Keywords.kw_mark); 913 next(); // Consume "mark". 914 next(); // Consume first token (so we fix leading whitespace). 915 while (CurrentToken) { 916 if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) 917 CurrentToken->Type = TT_ImplicitStringLiteral; 918 next(); 919 } 920 } 921 } 922 923 void parseHasInclude() { 924 if (!CurrentToken || !CurrentToken->is(tok::l_paren)) 925 return; 926 next(); // '(' 927 parseIncludeDirective(); 928 next(); // ')' 929 } 930 931 LineType parsePreprocessorDirective() { 932 bool IsFirstToken = CurrentToken->IsFirst; 933 LineType Type = LT_PreprocessorDirective; 934 next(); 935 if (!CurrentToken) 936 return Type; 937 938 if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) { 939 // JavaScript files can contain shebang lines of the form: 940 // #!/usr/bin/env node 941 // Treat these like C++ #include directives. 942 while (CurrentToken) { 943 // Tokens cannot be comments here. 944 CurrentToken->Type = TT_ImplicitStringLiteral; 945 next(); 946 } 947 return LT_ImportStatement; 948 } 949 950 if (CurrentToken->Tok.is(tok::numeric_constant)) { 951 CurrentToken->SpacesRequiredBefore = 1; 952 return Type; 953 } 954 // Hashes in the middle of a line can lead to any strange token 955 // sequence. 956 if (!CurrentToken->Tok.getIdentifierInfo()) 957 return Type; 958 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { 959 case tok::pp_include: 960 case tok::pp_include_next: 961 case tok::pp_import: 962 next(); 963 parseIncludeDirective(); 964 Type = LT_ImportStatement; 965 break; 966 case tok::pp_error: 967 case tok::pp_warning: 968 parseWarningOrError(); 969 break; 970 case tok::pp_pragma: 971 parsePragma(); 972 break; 973 case tok::pp_if: 974 case tok::pp_elif: 975 Contexts.back().IsExpression = true; 976 parseLine(); 977 break; 978 default: 979 break; 980 } 981 while (CurrentToken) { 982 FormatToken *Tok = CurrentToken; 983 next(); 984 if (Tok->is(tok::l_paren)) 985 parseParens(); 986 else if (Tok->isOneOf(Keywords.kw___has_include, 987 Keywords.kw___has_include_next)) 988 parseHasInclude(); 989 } 990 return Type; 991 } 992 993 public: 994 LineType parseLine() { 995 NonTemplateLess.clear(); 996 if (CurrentToken->is(tok::hash)) 997 return parsePreprocessorDirective(); 998 999 // Directly allow to 'import <string-literal>' to support protocol buffer 1000 // definitions (github.com/google/protobuf) or missing "#" (either way we 1001 // should not break the line). 1002 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); 1003 if ((Style.Language == FormatStyle::LK_Java && 1004 CurrentToken->is(Keywords.kw_package)) || 1005 (Info && Info->getPPKeywordID() == tok::pp_import && 1006 CurrentToken->Next && 1007 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier, 1008 tok::kw_static))) { 1009 next(); 1010 parseIncludeDirective(); 1011 return LT_ImportStatement; 1012 } 1013 1014 // If this line starts and ends in '<' and '>', respectively, it is likely 1015 // part of "#define <a/b.h>". 1016 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { 1017 parseIncludeDirective(); 1018 return LT_ImportStatement; 1019 } 1020 1021 // In .proto files, top-level options are very similar to import statements 1022 // and should not be line-wrapped. 1023 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 && 1024 CurrentToken->is(Keywords.kw_option)) { 1025 next(); 1026 if (CurrentToken && CurrentToken->is(tok::identifier)) 1027 return LT_ImportStatement; 1028 } 1029 1030 bool KeywordVirtualFound = false; 1031 bool ImportStatement = false; 1032 1033 // import {...} from '...'; 1034 if (Style.Language == FormatStyle::LK_JavaScript && 1035 CurrentToken->is(Keywords.kw_import)) 1036 ImportStatement = true; 1037 1038 while (CurrentToken) { 1039 if (CurrentToken->is(tok::kw_virtual)) 1040 KeywordVirtualFound = true; 1041 if (Style.Language == FormatStyle::LK_JavaScript) { 1042 // export {...} from '...'; 1043 // An export followed by "from 'some string';" is a re-export from 1044 // another module identified by a URI and is treated as a 1045 // LT_ImportStatement (i.e. prevent wraps on it for long URIs). 1046 // Just "export {...};" or "export class ..." should not be treated as 1047 // an import in this sense. 1048 if (Line.First->is(tok::kw_export) && 1049 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next && 1050 CurrentToken->Next->isStringLiteral()) 1051 ImportStatement = true; 1052 if (isClosureImportStatement(*CurrentToken)) 1053 ImportStatement = true; 1054 } 1055 if (!consumeToken()) 1056 return LT_Invalid; 1057 } 1058 if (KeywordVirtualFound) 1059 return LT_VirtualFunctionDecl; 1060 if (ImportStatement) 1061 return LT_ImportStatement; 1062 1063 if (Line.startsWith(TT_ObjCMethodSpecifier)) { 1064 if (Contexts.back().FirstObjCSelectorName) 1065 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 1066 Contexts.back().LongestObjCSelectorName; 1067 return LT_ObjCMethodDecl; 1068 } 1069 1070 return LT_Other; 1071 } 1072 1073 private: 1074 bool isClosureImportStatement(const FormatToken &Tok) { 1075 // FIXME: Closure-library specific stuff should not be hard-coded but be 1076 // configurable. 1077 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && 1078 Tok.Next->Next && 1079 (Tok.Next->Next->TokenText == "module" || 1080 Tok.Next->Next->TokenText == "provide" || 1081 Tok.Next->Next->TokenText == "require" || 1082 Tok.Next->Next->TokenText == "forwardDeclare") && 1083 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); 1084 } 1085 1086 void resetTokenMetadata(FormatToken *Token) { 1087 if (!Token) 1088 return; 1089 1090 // Reset token type in case we have already looked at it and then 1091 // recovered from an error (e.g. failure to find the matching >). 1092 if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, 1093 TT_FunctionLBrace, TT_ImplicitStringLiteral, 1094 TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, 1095 TT_OverloadedOperator, TT_RegexLiteral, 1096 TT_TemplateString, TT_ObjCStringLiteral)) 1097 CurrentToken->Type = TT_Unknown; 1098 CurrentToken->Role.reset(); 1099 CurrentToken->MatchingParen = nullptr; 1100 CurrentToken->FakeLParens.clear(); 1101 CurrentToken->FakeRParens = 0; 1102 } 1103 1104 void next() { 1105 if (CurrentToken) { 1106 CurrentToken->NestingLevel = Contexts.size() - 1; 1107 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 1108 modifyContext(*CurrentToken); 1109 determineTokenType(*CurrentToken); 1110 CurrentToken = CurrentToken->Next; 1111 } 1112 1113 resetTokenMetadata(CurrentToken); 1114 } 1115 1116 /// \brief A struct to hold information valid in a specific context, e.g. 1117 /// a pair of parenthesis. 1118 struct Context { 1119 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 1120 bool IsExpression) 1121 : ContextKind(ContextKind), BindingStrength(BindingStrength), 1122 IsExpression(IsExpression) {} 1123 1124 tok::TokenKind ContextKind; 1125 unsigned BindingStrength; 1126 bool IsExpression; 1127 unsigned LongestObjCSelectorName = 0; 1128 bool ColonIsForRangeExpr = false; 1129 bool ColonIsDictLiteral = false; 1130 bool ColonIsObjCMethodExpr = false; 1131 FormatToken *FirstObjCSelectorName = nullptr; 1132 FormatToken *FirstStartOfName = nullptr; 1133 bool CanBeExpression = true; 1134 bool InTemplateArgument = false; 1135 bool InCtorInitializer = false; 1136 bool InInheritanceList = false; 1137 bool CaretFound = false; 1138 bool IsForEachMacro = false; 1139 bool InCpp11AttributeSpecifier = false; 1140 }; 1141 1142 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 1143 /// of each instance. 1144 struct ScopedContextCreator { 1145 AnnotatingParser &P; 1146 1147 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 1148 unsigned Increase) 1149 : P(P) { 1150 P.Contexts.push_back(Context(ContextKind, 1151 P.Contexts.back().BindingStrength + Increase, 1152 P.Contexts.back().IsExpression)); 1153 } 1154 1155 ~ScopedContextCreator() { P.Contexts.pop_back(); } 1156 }; 1157 1158 void modifyContext(const FormatToken &Current) { 1159 if (Current.getPrecedence() == prec::Assignment && 1160 !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) && 1161 // Type aliases use `type X = ...;` in TypeScript and can be exported 1162 // using `export type ...`. 1163 !(Style.Language == FormatStyle::LK_JavaScript && 1164 (Line.startsWith(Keywords.kw_type, tok::identifier) || 1165 Line.startsWith(tok::kw_export, Keywords.kw_type, 1166 tok::identifier))) && 1167 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { 1168 Contexts.back().IsExpression = true; 1169 if (!Line.startsWith(TT_UnaryOperator)) { 1170 for (FormatToken *Previous = Current.Previous; 1171 Previous && Previous->Previous && 1172 !Previous->Previous->isOneOf(tok::comma, tok::semi); 1173 Previous = Previous->Previous) { 1174 if (Previous->isOneOf(tok::r_square, tok::r_paren)) { 1175 Previous = Previous->MatchingParen; 1176 if (!Previous) 1177 break; 1178 } 1179 if (Previous->opensScope()) 1180 break; 1181 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && 1182 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && 1183 Previous->Previous && Previous->Previous->isNot(tok::equal)) 1184 Previous->Type = TT_PointerOrReference; 1185 } 1186 } 1187 } else if (Current.is(tok::lessless) && 1188 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) { 1189 Contexts.back().IsExpression = true; 1190 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { 1191 Contexts.back().IsExpression = true; 1192 } else if (Current.is(TT_TrailingReturnArrow)) { 1193 Contexts.back().IsExpression = false; 1194 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) { 1195 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java; 1196 } else if (Current.Previous && 1197 Current.Previous->is(TT_CtorInitializerColon)) { 1198 Contexts.back().IsExpression = true; 1199 Contexts.back().InCtorInitializer = true; 1200 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) { 1201 Contexts.back().InInheritanceList = true; 1202 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 1203 for (FormatToken *Previous = Current.Previous; 1204 Previous && Previous->isOneOf(tok::star, tok::amp); 1205 Previous = Previous->Previous) 1206 Previous->Type = TT_PointerOrReference; 1207 if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer) 1208 Contexts.back().IsExpression = false; 1209 } else if (Current.is(tok::kw_new)) { 1210 Contexts.back().CanBeExpression = false; 1211 } else if (Current.isOneOf(tok::semi, tok::exclaim)) { 1212 // This should be the condition or increment in a for-loop. 1213 Contexts.back().IsExpression = true; 1214 } 1215 } 1216 1217 void determineTokenType(FormatToken &Current) { 1218 if (!Current.is(TT_Unknown)) 1219 // The token type is already known. 1220 return; 1221 1222 if (Style.Language == FormatStyle::LK_JavaScript) { 1223 if (Current.is(tok::exclaim)) { 1224 if (Current.Previous && 1225 (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace, 1226 tok::r_paren, tok::r_square, 1227 tok::r_brace) || 1228 Current.Previous->Tok.isLiteral())) { 1229 Current.Type = TT_JsNonNullAssertion; 1230 return; 1231 } 1232 if (Current.Next && 1233 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { 1234 Current.Type = TT_JsNonNullAssertion; 1235 return; 1236 } 1237 } 1238 } 1239 1240 // Line.MightBeFunctionDecl can only be true after the parentheses of a 1241 // function declaration have been found. In this case, 'Current' is a 1242 // trailing token of this declaration and thus cannot be a name. 1243 if (Current.is(Keywords.kw_instanceof)) { 1244 Current.Type = TT_BinaryOperator; 1245 } else if (isStartOfName(Current) && 1246 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { 1247 Contexts.back().FirstStartOfName = &Current; 1248 Current.Type = TT_StartOfName; 1249 } else if (Current.is(tok::semi)) { 1250 // Reset FirstStartOfName after finding a semicolon so that a for loop 1251 // with multiple increment statements is not confused with a for loop 1252 // having multiple variable declarations. 1253 Contexts.back().FirstStartOfName = nullptr; 1254 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) { 1255 AutoFound = true; 1256 } else if (Current.is(tok::arrow) && 1257 Style.Language == FormatStyle::LK_Java) { 1258 Current.Type = TT_LambdaArrow; 1259 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && 1260 Current.NestingLevel == 0) { 1261 Current.Type = TT_TrailingReturnArrow; 1262 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 1263 Current.Type = determineStarAmpUsage(Current, 1264 Contexts.back().CanBeExpression && 1265 Contexts.back().IsExpression, 1266 Contexts.back().InTemplateArgument); 1267 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 1268 Current.Type = determinePlusMinusCaretUsage(Current); 1269 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) 1270 Contexts.back().CaretFound = true; 1271 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 1272 Current.Type = determineIncrementUsage(Current); 1273 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { 1274 Current.Type = TT_UnaryOperator; 1275 } else if (Current.is(tok::question)) { 1276 if (Style.Language == FormatStyle::LK_JavaScript && 1277 Line.MustBeDeclaration && !Contexts.back().IsExpression) { 1278 // In JavaScript, `interface X { foo?(): bar; }` is an optional method 1279 // on the interface, not a ternary expression. 1280 Current.Type = TT_JsTypeOptionalQuestion; 1281 } else { 1282 Current.Type = TT_ConditionalExpr; 1283 } 1284 } else if (Current.isBinaryOperator() && 1285 (!Current.Previous || Current.Previous->isNot(tok::l_square)) && 1286 (!Current.is(tok::greater) && 1287 Style.Language != FormatStyle::LK_TextProto)) { 1288 Current.Type = TT_BinaryOperator; 1289 } else if (Current.is(tok::comment)) { 1290 if (Current.TokenText.startswith("/*")) { 1291 if (Current.TokenText.endswith("*/")) 1292 Current.Type = TT_BlockComment; 1293 else 1294 // The lexer has for some reason determined a comment here. But we 1295 // cannot really handle it, if it isn't properly terminated. 1296 Current.Tok.setKind(tok::unknown); 1297 } else { 1298 Current.Type = TT_LineComment; 1299 } 1300 } else if (Current.is(tok::r_paren)) { 1301 if (rParenEndsCast(Current)) 1302 Current.Type = TT_CastRParen; 1303 if (Current.MatchingParen && Current.Next && 1304 !Current.Next->isBinaryOperator() && 1305 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, 1306 tok::comma, tok::period, tok::arrow, 1307 tok::coloncolon)) 1308 if (FormatToken *AfterParen = Current.MatchingParen->Next) { 1309 // Make sure this isn't the return type of an Obj-C block declaration 1310 if (AfterParen->Tok.isNot(tok::caret)) { 1311 if (FormatToken *BeforeParen = Current.MatchingParen->Previous) 1312 if (BeforeParen->is(tok::identifier) && 1313 BeforeParen->TokenText == BeforeParen->TokenText.upper() && 1314 (!BeforeParen->Previous || 1315 BeforeParen->Previous->ClosesTemplateDeclaration)) 1316 Current.Type = TT_FunctionAnnotationRParen; 1317 } 1318 } 1319 } else if (Current.is(tok::at) && Current.Next && 1320 Style.Language != FormatStyle::LK_JavaScript && 1321 Style.Language != FormatStyle::LK_Java) { 1322 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it 1323 // marks declarations and properties that need special formatting. 1324 switch (Current.Next->Tok.getObjCKeywordID()) { 1325 case tok::objc_interface: 1326 case tok::objc_implementation: 1327 case tok::objc_protocol: 1328 Current.Type = TT_ObjCDecl; 1329 break; 1330 case tok::objc_property: 1331 Current.Type = TT_ObjCProperty; 1332 break; 1333 default: 1334 break; 1335 } 1336 } else if (Current.is(tok::period)) { 1337 FormatToken *PreviousNoComment = Current.getPreviousNonComment(); 1338 if (PreviousNoComment && 1339 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) 1340 Current.Type = TT_DesignatedInitializerPeriod; 1341 else if (Style.Language == FormatStyle::LK_Java && Current.Previous && 1342 Current.Previous->isOneOf(TT_JavaAnnotation, 1343 TT_LeadingJavaAnnotation)) { 1344 Current.Type = Current.Previous->Type; 1345 } 1346 } else if (Current.isOneOf(tok::identifier, tok::kw_const) && 1347 Current.Previous && 1348 !Current.Previous->isOneOf(tok::equal, tok::at) && 1349 Line.MightBeFunctionDecl && Contexts.size() == 1) { 1350 // Line.MightBeFunctionDecl can only be true after the parentheses of a 1351 // function declaration have been found. 1352 Current.Type = TT_TrailingAnnotation; 1353 } else if ((Style.Language == FormatStyle::LK_Java || 1354 Style.Language == FormatStyle::LK_JavaScript) && 1355 Current.Previous) { 1356 if (Current.Previous->is(tok::at) && 1357 Current.isNot(Keywords.kw_interface)) { 1358 const FormatToken &AtToken = *Current.Previous; 1359 const FormatToken *Previous = AtToken.getPreviousNonComment(); 1360 if (!Previous || Previous->is(TT_LeadingJavaAnnotation)) 1361 Current.Type = TT_LeadingJavaAnnotation; 1362 else 1363 Current.Type = TT_JavaAnnotation; 1364 } else if (Current.Previous->is(tok::period) && 1365 Current.Previous->isOneOf(TT_JavaAnnotation, 1366 TT_LeadingJavaAnnotation)) { 1367 Current.Type = Current.Previous->Type; 1368 } 1369 } 1370 } 1371 1372 /// \brief Take a guess at whether \p Tok starts a name of a function or 1373 /// variable declaration. 1374 /// 1375 /// This is a heuristic based on whether \p Tok is an identifier following 1376 /// something that is likely a type. 1377 bool isStartOfName(const FormatToken &Tok) { 1378 if (Tok.isNot(tok::identifier) || !Tok.Previous) 1379 return false; 1380 1381 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof, 1382 Keywords.kw_as)) 1383 return false; 1384 if (Style.Language == FormatStyle::LK_JavaScript && 1385 Tok.Previous->is(Keywords.kw_in)) 1386 return false; 1387 1388 // Skip "const" as it does not have an influence on whether this is a name. 1389 FormatToken *PreviousNotConst = Tok.getPreviousNonComment(); 1390 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) 1391 PreviousNotConst = PreviousNotConst->getPreviousNonComment(); 1392 1393 if (!PreviousNotConst) 1394 return false; 1395 1396 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && 1397 PreviousNotConst->Previous && 1398 PreviousNotConst->Previous->is(tok::hash); 1399 1400 if (PreviousNotConst->is(TT_TemplateCloser)) 1401 return PreviousNotConst && PreviousNotConst->MatchingParen && 1402 PreviousNotConst->MatchingParen->Previous && 1403 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && 1404 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); 1405 1406 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && 1407 PreviousNotConst->MatchingParen->Previous && 1408 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) 1409 return true; 1410 1411 return (!IsPPKeyword && 1412 PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) || 1413 PreviousNotConst->is(TT_PointerOrReference) || 1414 PreviousNotConst->isSimpleTypeSpecifier(); 1415 } 1416 1417 /// \brief Determine whether ')' is ending a cast. 1418 bool rParenEndsCast(const FormatToken &Tok) { 1419 // C-style casts are only used in C++ and Java. 1420 if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java) 1421 return false; 1422 1423 // Empty parens aren't casts and there are no casts at the end of the line. 1424 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen) 1425 return false; 1426 1427 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); 1428 if (LeftOfParens) { 1429 // If there is a closing parenthesis left of the current parentheses, 1430 // look past it as these might be chained casts. 1431 if (LeftOfParens->is(tok::r_paren)) { 1432 if (!LeftOfParens->MatchingParen || 1433 !LeftOfParens->MatchingParen->Previous) 1434 return false; 1435 LeftOfParens = LeftOfParens->MatchingParen->Previous; 1436 } 1437 1438 // If there is an identifier (or with a few exceptions a keyword) right 1439 // before the parentheses, this is unlikely to be a cast. 1440 if (LeftOfParens->Tok.getIdentifierInfo() && 1441 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case, 1442 tok::kw_delete)) 1443 return false; 1444 1445 // Certain other tokens right before the parentheses are also signals that 1446 // this cannot be a cast. 1447 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator, 1448 TT_TemplateCloser, tok::ellipsis)) 1449 return false; 1450 } 1451 1452 if (Tok.Next->is(tok::question)) 1453 return false; 1454 1455 // As Java has no function types, a "(" after the ")" likely means that this 1456 // is a cast. 1457 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) 1458 return true; 1459 1460 // If a (non-string) literal follows, this is likely a cast. 1461 if (Tok.Next->isNot(tok::string_literal) && 1462 (Tok.Next->Tok.isLiteral() || 1463 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) 1464 return true; 1465 1466 // Heuristically try to determine whether the parentheses contain a type. 1467 bool ParensAreType = 1468 !Tok.Previous || 1469 Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) || 1470 Tok.Previous->isSimpleTypeSpecifier(); 1471 bool ParensCouldEndDecl = 1472 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); 1473 if (ParensAreType && !ParensCouldEndDecl) 1474 return true; 1475 1476 // At this point, we heuristically assume that there are no casts at the 1477 // start of the line. We assume that we have found most cases where there 1478 // are by the logic above, e.g. "(void)x;". 1479 if (!LeftOfParens) 1480 return false; 1481 1482 // Certain token types inside the parentheses mean that this can't be a 1483 // cast. 1484 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok; 1485 Token = Token->Next) 1486 if (Token->is(TT_BinaryOperator)) 1487 return false; 1488 1489 // If the following token is an identifier or 'this', this is a cast. All 1490 // cases where this can be something else are handled above. 1491 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) 1492 return true; 1493 1494 if (!Tok.Next->Next) 1495 return false; 1496 1497 // If the next token after the parenthesis is a unary operator, assume 1498 // that this is cast, unless there are unexpected tokens inside the 1499 // parenthesis. 1500 bool NextIsUnary = 1501 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star); 1502 if (!NextIsUnary || Tok.Next->is(tok::plus) || 1503 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) 1504 return false; 1505 // Search for unexpected tokens. 1506 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen; 1507 Prev = Prev->Previous) { 1508 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) 1509 return false; 1510 } 1511 return true; 1512 } 1513 1514 /// \brief Return the type of the given token assuming it is * or &. 1515 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, 1516 bool InTemplateArgument) { 1517 if (Style.Language == FormatStyle::LK_JavaScript) 1518 return TT_BinaryOperator; 1519 1520 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1521 if (!PrevToken) 1522 return TT_UnaryOperator; 1523 1524 const FormatToken *NextToken = Tok.getNextNonComment(); 1525 if (!NextToken || 1526 NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) || 1527 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) 1528 return TT_PointerOrReference; 1529 1530 if (PrevToken->is(tok::coloncolon)) 1531 return TT_PointerOrReference; 1532 1533 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 1534 tok::comma, tok::semi, tok::kw_return, tok::colon, 1535 tok::equal, tok::kw_delete, tok::kw_sizeof, 1536 tok::kw_throw) || 1537 PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, 1538 TT_UnaryOperator, TT_CastRParen)) 1539 return TT_UnaryOperator; 1540 1541 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) 1542 return TT_PointerOrReference; 1543 if (NextToken->is(tok::kw_operator) && !IsExpression) 1544 return TT_PointerOrReference; 1545 if (NextToken->isOneOf(tok::comma, tok::semi)) 1546 return TT_PointerOrReference; 1547 1548 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) { 1549 FormatToken *TokenBeforeMatchingParen = 1550 PrevToken->MatchingParen->getPreviousNonComment(); 1551 if (TokenBeforeMatchingParen && 1552 TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype)) 1553 return TT_PointerOrReference; 1554 } 1555 1556 if (PrevToken->Tok.isLiteral() || 1557 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, 1558 tok::kw_false, tok::r_brace) || 1559 NextToken->Tok.isLiteral() || 1560 NextToken->isOneOf(tok::kw_true, tok::kw_false) || 1561 NextToken->isUnaryOperator() || 1562 // If we know we're in a template argument, there are no named 1563 // declarations. Thus, having an identifier on the right-hand side 1564 // indicates a binary operator. 1565 (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) 1566 return TT_BinaryOperator; 1567 1568 // "&&(" is quite unlikely to be two successive unary "&". 1569 if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren)) 1570 return TT_BinaryOperator; 1571 1572 // This catches some cases where evaluation order is used as control flow: 1573 // aaa && aaa->f(); 1574 const FormatToken *NextNextToken = NextToken->getNextNonComment(); 1575 if (NextNextToken && NextNextToken->is(tok::arrow)) 1576 return TT_BinaryOperator; 1577 1578 // It is very unlikely that we are going to find a pointer or reference type 1579 // definition on the RHS of an assignment. 1580 if (IsExpression && !Contexts.back().CaretFound) 1581 return TT_BinaryOperator; 1582 1583 return TT_PointerOrReference; 1584 } 1585 1586 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { 1587 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1588 if (!PrevToken) 1589 return TT_UnaryOperator; 1590 1591 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator)) 1592 // This must be a sequence of leading unary operators. 1593 return TT_UnaryOperator; 1594 1595 // Use heuristics to recognize unary operators. 1596 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 1597 tok::question, tok::colon, tok::kw_return, 1598 tok::kw_case, tok::at, tok::l_brace)) 1599 return TT_UnaryOperator; 1600 1601 // There can't be two consecutive binary operators. 1602 if (PrevToken->is(TT_BinaryOperator)) 1603 return TT_UnaryOperator; 1604 1605 // Fall back to marking the token as binary operator. 1606 return TT_BinaryOperator; 1607 } 1608 1609 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 1610 TokenType determineIncrementUsage(const FormatToken &Tok) { 1611 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 1612 if (!PrevToken || PrevToken->is(TT_CastRParen)) 1613 return TT_UnaryOperator; 1614 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 1615 return TT_TrailingUnaryOperator; 1616 1617 return TT_UnaryOperator; 1618 } 1619 1620 SmallVector<Context, 8> Contexts; 1621 1622 const FormatStyle &Style; 1623 AnnotatedLine &Line; 1624 FormatToken *CurrentToken; 1625 bool AutoFound; 1626 const AdditionalKeywords &Keywords; 1627 1628 // Set of "<" tokens that do not open a template parameter list. If parseAngle 1629 // determines that a specific token can't be a template opener, it will make 1630 // same decision irrespective of the decisions for tokens leading up to it. 1631 // Store this information to prevent this from causing exponential runtime. 1632 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess; 1633 }; 1634 1635 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1; 1636 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 1637 1638 /// \brief Parses binary expressions by inserting fake parenthesis based on 1639 /// operator precedence. 1640 class ExpressionParser { 1641 public: 1642 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, 1643 AnnotatedLine &Line) 1644 : Style(Style), Keywords(Keywords), Current(Line.First) {} 1645 1646 /// \brief Parse expressions with the given operator precedence. 1647 void parse(int Precedence = 0) { 1648 // Skip 'return' and ObjC selector colons as they are not part of a binary 1649 // expression. 1650 while (Current && (Current->is(tok::kw_return) || 1651 (Current->is(tok::colon) && 1652 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) 1653 next(); 1654 1655 if (!Current || Precedence > PrecedenceArrowAndPeriod) 1656 return; 1657 1658 // Conditional expressions need to be parsed separately for proper nesting. 1659 if (Precedence == prec::Conditional) { 1660 parseConditionalExpr(); 1661 return; 1662 } 1663 1664 // Parse unary operators, which all have a higher precedence than binary 1665 // operators. 1666 if (Precedence == PrecedenceUnaryOperator) { 1667 parseUnaryOperator(); 1668 return; 1669 } 1670 1671 FormatToken *Start = Current; 1672 FormatToken *LatestOperator = nullptr; 1673 unsigned OperatorIndex = 0; 1674 1675 while (Current) { 1676 // Consume operators with higher precedence. 1677 parse(Precedence + 1); 1678 1679 int CurrentPrecedence = getCurrentPrecedence(); 1680 1681 if (Current && Current->is(TT_SelectorName) && 1682 Precedence == CurrentPrecedence) { 1683 if (LatestOperator) 1684 addFakeParenthesis(Start, prec::Level(Precedence)); 1685 Start = Current; 1686 } 1687 1688 // At the end of the line or when an operator with higher precedence is 1689 // found, insert fake parenthesis and return. 1690 if (!Current || 1691 (Current->closesScope() && 1692 (Current->MatchingParen || Current->is(TT_TemplateString))) || 1693 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || 1694 (CurrentPrecedence == prec::Conditional && 1695 Precedence == prec::Assignment && Current->is(tok::colon))) { 1696 break; 1697 } 1698 1699 // Consume scopes: (), [], <> and {} 1700 if (Current->opensScope()) { 1701 // In fragment of a JavaScript template string can look like '}..${' and 1702 // thus close a scope and open a new one at the same time. 1703 while (Current && (!Current->closesScope() || Current->opensScope())) { 1704 next(); 1705 parse(); 1706 } 1707 next(); 1708 } else { 1709 // Operator found. 1710 if (CurrentPrecedence == Precedence) { 1711 if (LatestOperator) 1712 LatestOperator->NextOperator = Current; 1713 LatestOperator = Current; 1714 Current->OperatorIndex = OperatorIndex; 1715 ++OperatorIndex; 1716 } 1717 next(/*SkipPastLeadingComments=*/Precedence > 0); 1718 } 1719 } 1720 1721 if (LatestOperator && (Current || Precedence > 0)) { 1722 // LatestOperator->LastOperator = true; 1723 if (Precedence == PrecedenceArrowAndPeriod) { 1724 // Call expressions don't have a binary operator precedence. 1725 addFakeParenthesis(Start, prec::Unknown); 1726 } else { 1727 addFakeParenthesis(Start, prec::Level(Precedence)); 1728 } 1729 } 1730 } 1731 1732 private: 1733 /// \brief Gets the precedence (+1) of the given token for binary operators 1734 /// and other tokens that we treat like binary operators. 1735 int getCurrentPrecedence() { 1736 if (Current) { 1737 const FormatToken *NextNonComment = Current->getNextNonComment(); 1738 if (Current->is(TT_ConditionalExpr)) 1739 return prec::Conditional; 1740 if (NextNonComment && Current->is(TT_SelectorName) && 1741 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) || 1742 ((Style.Language == FormatStyle::LK_Proto || 1743 Style.Language == FormatStyle::LK_TextProto) && 1744 NextNonComment->is(tok::less)))) 1745 return prec::Assignment; 1746 if (Current->is(TT_JsComputedPropertyName)) 1747 return prec::Assignment; 1748 if (Current->is(TT_LambdaArrow)) 1749 return prec::Comma; 1750 if (Current->is(TT_JsFatArrow)) 1751 return prec::Assignment; 1752 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || 1753 (Current->is(tok::comment) && NextNonComment && 1754 NextNonComment->is(TT_SelectorName))) 1755 return 0; 1756 if (Current->is(TT_RangeBasedForLoopColon)) 1757 return prec::Comma; 1758 if ((Style.Language == FormatStyle::LK_Java || 1759 Style.Language == FormatStyle::LK_JavaScript) && 1760 Current->is(Keywords.kw_instanceof)) 1761 return prec::Relational; 1762 if (Style.Language == FormatStyle::LK_JavaScript && 1763 Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) 1764 return prec::Relational; 1765 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) 1766 return Current->getPrecedence(); 1767 if (Current->isOneOf(tok::period, tok::arrow)) 1768 return PrecedenceArrowAndPeriod; 1769 if ((Style.Language == FormatStyle::LK_Java || 1770 Style.Language == FormatStyle::LK_JavaScript) && 1771 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, 1772 Keywords.kw_throws)) 1773 return 0; 1774 } 1775 return -1; 1776 } 1777 1778 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { 1779 Start->FakeLParens.push_back(Precedence); 1780 if (Precedence > prec::Unknown) 1781 Start->StartsBinaryExpression = true; 1782 if (Current) { 1783 FormatToken *Previous = Current->Previous; 1784 while (Previous->is(tok::comment) && Previous->Previous) 1785 Previous = Previous->Previous; 1786 ++Previous->FakeRParens; 1787 if (Precedence > prec::Unknown) 1788 Previous->EndsBinaryExpression = true; 1789 } 1790 } 1791 1792 /// \brief Parse unary operator expressions and surround them with fake 1793 /// parentheses if appropriate. 1794 void parseUnaryOperator() { 1795 llvm::SmallVector<FormatToken *, 2> Tokens; 1796 while (Current && Current->is(TT_UnaryOperator)) { 1797 Tokens.push_back(Current); 1798 next(); 1799 } 1800 parse(PrecedenceArrowAndPeriod); 1801 for (FormatToken *Token : llvm::reverse(Tokens)) 1802 // The actual precedence doesn't matter. 1803 addFakeParenthesis(Token, prec::Unknown); 1804 } 1805 1806 void parseConditionalExpr() { 1807 while (Current && Current->isTrailingComment()) { 1808 next(); 1809 } 1810 FormatToken *Start = Current; 1811 parse(prec::LogicalOr); 1812 if (!Current || !Current->is(tok::question)) 1813 return; 1814 next(); 1815 parse(prec::Assignment); 1816 if (!Current || Current->isNot(TT_ConditionalExpr)) 1817 return; 1818 next(); 1819 parse(prec::Assignment); 1820 addFakeParenthesis(Start, prec::Conditional); 1821 } 1822 1823 void next(bool SkipPastLeadingComments = true) { 1824 if (Current) 1825 Current = Current->Next; 1826 while (Current && 1827 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && 1828 Current->isTrailingComment()) 1829 Current = Current->Next; 1830 } 1831 1832 const FormatStyle &Style; 1833 const AdditionalKeywords &Keywords; 1834 FormatToken *Current; 1835 }; 1836 1837 } // end anonymous namespace 1838 1839 void TokenAnnotator::setCommentLineLevels( 1840 SmallVectorImpl<AnnotatedLine *> &Lines) { 1841 const AnnotatedLine *NextNonCommentLine = nullptr; 1842 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), 1843 E = Lines.rend(); 1844 I != E; ++I) { 1845 bool CommentLine = true; 1846 for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) { 1847 if (!Tok->is(tok::comment)) { 1848 CommentLine = false; 1849 break; 1850 } 1851 } 1852 1853 // If the comment is currently aligned with the line immediately following 1854 // it, that's probably intentional and we should keep it. 1855 if (NextNonCommentLine && CommentLine && 1856 NextNonCommentLine->First->NewlinesBefore <= 1 && 1857 NextNonCommentLine->First->OriginalColumn == 1858 (*I)->First->OriginalColumn) { 1859 // Align comments for preprocessor lines with the # in column 0. 1860 // Otherwise, align with the next line. 1861 (*I)->Level = (NextNonCommentLine->Type == LT_PreprocessorDirective || 1862 NextNonCommentLine->Type == LT_ImportStatement) 1863 ? 0 1864 : NextNonCommentLine->Level; 1865 } else { 1866 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; 1867 } 1868 1869 setCommentLineLevels((*I)->Children); 1870 } 1871 } 1872 1873 static unsigned maxNestingDepth(const AnnotatedLine &Line) { 1874 unsigned Result = 0; 1875 for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) 1876 Result = std::max(Result, Tok->NestingLevel); 1877 return Result; 1878 } 1879 1880 void TokenAnnotator::annotate(AnnotatedLine &Line) { 1881 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 1882 E = Line.Children.end(); 1883 I != E; ++I) { 1884 annotate(**I); 1885 } 1886 AnnotatingParser Parser(Style, Line, Keywords); 1887 Line.Type = Parser.parseLine(); 1888 1889 // With very deep nesting, ExpressionParser uses lots of stack and the 1890 // formatting algorithm is very slow. We're not going to do a good job here 1891 // anyway - it's probably generated code being formatted by mistake. 1892 // Just skip the whole line. 1893 if (maxNestingDepth(Line) > 50) 1894 Line.Type = LT_Invalid; 1895 1896 if (Line.Type == LT_Invalid) 1897 return; 1898 1899 ExpressionParser ExprParser(Style, Keywords, Line); 1900 ExprParser.parse(); 1901 1902 if (Line.startsWith(TT_ObjCMethodSpecifier)) 1903 Line.Type = LT_ObjCMethodDecl; 1904 else if (Line.startsWith(TT_ObjCDecl)) 1905 Line.Type = LT_ObjCDecl; 1906 else if (Line.startsWith(TT_ObjCProperty)) 1907 Line.Type = LT_ObjCProperty; 1908 1909 Line.First->SpacesRequiredBefore = 1; 1910 Line.First->CanBreakBefore = Line.First->MustBreakBefore; 1911 } 1912 1913 // This function heuristically determines whether 'Current' starts the name of a 1914 // function declaration. 1915 static bool isFunctionDeclarationName(const FormatToken &Current, 1916 const AnnotatedLine &Line) { 1917 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * { 1918 for (; Next; Next = Next->Next) { 1919 if (Next->is(TT_OverloadedOperatorLParen)) 1920 return Next; 1921 if (Next->is(TT_OverloadedOperator)) 1922 continue; 1923 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) { 1924 // For 'new[]' and 'delete[]'. 1925 if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next && 1926 Next->Next->Next->is(tok::r_square)) 1927 Next = Next->Next->Next; 1928 continue; 1929 } 1930 1931 break; 1932 } 1933 return nullptr; 1934 }; 1935 1936 // Find parentheses of parameter list. 1937 const FormatToken *Next = Current.Next; 1938 if (Current.is(tok::kw_operator)) { 1939 if (Current.Previous && Current.Previous->is(tok::coloncolon)) 1940 return false; 1941 Next = skipOperatorName(Next); 1942 } else { 1943 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) 1944 return false; 1945 for (; Next; Next = Next->Next) { 1946 if (Next->is(TT_TemplateOpener)) { 1947 Next = Next->MatchingParen; 1948 } else if (Next->is(tok::coloncolon)) { 1949 Next = Next->Next; 1950 if (!Next) 1951 return false; 1952 if (Next->is(tok::kw_operator)) { 1953 Next = skipOperatorName(Next->Next); 1954 break; 1955 } 1956 if (!Next->is(tok::identifier)) 1957 return false; 1958 } else if (Next->is(tok::l_paren)) { 1959 break; 1960 } else { 1961 return false; 1962 } 1963 } 1964 } 1965 1966 // Check whether parameter list can belong to a function declaration. 1967 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen) 1968 return false; 1969 // If the lines ends with "{", this is likely an function definition. 1970 if (Line.Last->is(tok::l_brace)) 1971 return true; 1972 if (Next->Next == Next->MatchingParen) 1973 return true; // Empty parentheses. 1974 // If there is an &/&& after the r_paren, this is likely a function. 1975 if (Next->MatchingParen->Next && 1976 Next->MatchingParen->Next->is(TT_PointerOrReference)) 1977 return true; 1978 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; 1979 Tok = Tok->Next) { 1980 if (Tok->is(tok::l_paren) && Tok->MatchingParen) { 1981 Tok = Tok->MatchingParen; 1982 continue; 1983 } 1984 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || 1985 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) 1986 return true; 1987 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) || 1988 Tok->Tok.isLiteral()) 1989 return false; 1990 } 1991 return false; 1992 } 1993 1994 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const { 1995 assert(Line.MightBeFunctionDecl); 1996 1997 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel || 1998 Style.AlwaysBreakAfterReturnType == 1999 FormatStyle::RTBS_TopLevelDefinitions) && 2000 Line.Level > 0) 2001 return false; 2002 2003 switch (Style.AlwaysBreakAfterReturnType) { 2004 case FormatStyle::RTBS_None: 2005 return false; 2006 case FormatStyle::RTBS_All: 2007 case FormatStyle::RTBS_TopLevel: 2008 return true; 2009 case FormatStyle::RTBS_AllDefinitions: 2010 case FormatStyle::RTBS_TopLevelDefinitions: 2011 return Line.mightBeFunctionDefinition(); 2012 } 2013 2014 return false; 2015 } 2016 2017 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 2018 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 2019 E = Line.Children.end(); 2020 I != E; ++I) { 2021 calculateFormattingInformation(**I); 2022 } 2023 2024 Line.First->TotalLength = 2025 Line.First->IsMultiline ? Style.ColumnLimit 2026 : Line.FirstStartColumn + Line.First->ColumnWidth; 2027 FormatToken *Current = Line.First->Next; 2028 bool InFunctionDecl = Line.MightBeFunctionDecl; 2029 while (Current) { 2030 if (isFunctionDeclarationName(*Current, Line)) 2031 Current->Type = TT_FunctionDeclarationName; 2032 if (Current->is(TT_LineComment)) { 2033 if (Current->Previous->BlockKind == BK_BracedInit && 2034 Current->Previous->opensScope()) 2035 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; 2036 else 2037 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 2038 2039 // If we find a trailing comment, iterate backwards to determine whether 2040 // it seems to relate to a specific parameter. If so, break before that 2041 // parameter to avoid changing the comment's meaning. E.g. don't move 'b' 2042 // to the previous line in: 2043 // SomeFunction(a, 2044 // b, // comment 2045 // c); 2046 if (!Current->HasUnescapedNewline) { 2047 for (FormatToken *Parameter = Current->Previous; Parameter; 2048 Parameter = Parameter->Previous) { 2049 if (Parameter->isOneOf(tok::comment, tok::r_brace)) 2050 break; 2051 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { 2052 if (!Parameter->Previous->is(TT_CtorInitializerComma) && 2053 Parameter->HasUnescapedNewline) 2054 Parameter->MustBreakBefore = true; 2055 break; 2056 } 2057 } 2058 } 2059 } else if (Current->SpacesRequiredBefore == 0 && 2060 spaceRequiredBefore(Line, *Current)) { 2061 Current->SpacesRequiredBefore = 1; 2062 } 2063 2064 Current->MustBreakBefore = 2065 Current->MustBreakBefore || mustBreakBefore(Line, *Current); 2066 2067 if (!Current->MustBreakBefore && InFunctionDecl && 2068 Current->is(TT_FunctionDeclarationName)) 2069 Current->MustBreakBefore = mustBreakForReturnType(Line); 2070 2071 Current->CanBreakBefore = 2072 Current->MustBreakBefore || canBreakBefore(Line, *Current); 2073 unsigned ChildSize = 0; 2074 if (Current->Previous->Children.size() == 1) { 2075 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last; 2076 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit 2077 : LastOfChild.TotalLength + 1; 2078 } 2079 const FormatToken *Prev = Current->Previous; 2080 if (Current->MustBreakBefore || Prev->Children.size() > 1 || 2081 (Prev->Children.size() == 1 && 2082 Prev->Children[0]->First->MustBreakBefore) || 2083 Current->IsMultiline) 2084 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; 2085 else 2086 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + 2087 ChildSize + Current->SpacesRequiredBefore; 2088 2089 if (Current->is(TT_CtorInitializerColon)) 2090 InFunctionDecl = false; 2091 2092 // FIXME: Only calculate this if CanBreakBefore is true once static 2093 // initializers etc. are sorted out. 2094 // FIXME: Move magic numbers to a better place. 2095 Current->SplitPenalty = 20 * Current->BindingStrength + 2096 splitPenalty(Line, *Current, InFunctionDecl); 2097 2098 Current = Current->Next; 2099 } 2100 2101 calculateUnbreakableTailLengths(Line); 2102 unsigned IndentLevel = Line.Level; 2103 for (Current = Line.First; Current != nullptr; Current = Current->Next) { 2104 if (Current->Role) 2105 Current->Role->precomputeFormattingInfos(Current); 2106 if (Current->MatchingParen && 2107 Current->MatchingParen->opensBlockOrBlockTypeList(Style)) { 2108 assert(IndentLevel > 0); 2109 --IndentLevel; 2110 } 2111 Current->IndentLevel = IndentLevel; 2112 if (Current->opensBlockOrBlockTypeList(Style)) 2113 ++IndentLevel; 2114 } 2115 2116 DEBUG({ printDebugInfo(Line); }); 2117 } 2118 2119 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { 2120 unsigned UnbreakableTailLength = 0; 2121 FormatToken *Current = Line.Last; 2122 while (Current) { 2123 Current->UnbreakableTailLength = UnbreakableTailLength; 2124 if (Current->CanBreakBefore || 2125 Current->isOneOf(tok::comment, tok::string_literal)) { 2126 UnbreakableTailLength = 0; 2127 } else { 2128 UnbreakableTailLength += 2129 Current->ColumnWidth + Current->SpacesRequiredBefore; 2130 } 2131 Current = Current->Previous; 2132 } 2133 } 2134 2135 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 2136 const FormatToken &Tok, 2137 bool InFunctionDecl) { 2138 const FormatToken &Left = *Tok.Previous; 2139 const FormatToken &Right = Tok; 2140 2141 if (Left.is(tok::semi)) 2142 return 0; 2143 2144 if (Style.Language == FormatStyle::LK_Java) { 2145 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws)) 2146 return 1; 2147 if (Right.is(Keywords.kw_implements)) 2148 return 2; 2149 if (Left.is(tok::comma) && Left.NestingLevel == 0) 2150 return 3; 2151 } else if (Style.Language == FormatStyle::LK_JavaScript) { 2152 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma)) 2153 return 100; 2154 if (Left.is(TT_JsTypeColon)) 2155 return 35; 2156 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || 2157 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) 2158 return 100; 2159 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()". 2160 if (Left.opensScope() && Right.closesScope()) 2161 return 200; 2162 } 2163 2164 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) 2165 return 1; 2166 if (Right.is(tok::l_square)) { 2167 if (Style.Language == FormatStyle::LK_Proto) 2168 return 1; 2169 if (Left.is(tok::r_square)) 2170 return 200; 2171 // Slightly prefer formatting local lambda definitions like functions. 2172 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal)) 2173 return 35; 2174 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, 2175 TT_ArrayInitializerLSquare, 2176 TT_DesignatedInitializerLSquare, TT_AttributeSquare)) 2177 return 500; 2178 } 2179 2180 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || 2181 Right.is(tok::kw_operator)) { 2182 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 2183 return 3; 2184 if (Left.is(TT_StartOfName)) 2185 return 110; 2186 if (InFunctionDecl && Right.NestingLevel == 0) 2187 return Style.PenaltyReturnTypeOnItsOwnLine; 2188 return 200; 2189 } 2190 if (Right.is(TT_PointerOrReference)) 2191 return 190; 2192 if (Right.is(TT_LambdaArrow)) 2193 return 110; 2194 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 2195 return 160; 2196 if (Left.is(TT_CastRParen)) 2197 return 100; 2198 if (Left.is(tok::coloncolon) || 2199 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) 2200 return 500; 2201 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 2202 return 5000; 2203 if (Left.is(tok::comment)) 2204 return 1000; 2205 2206 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, 2207 TT_CtorInitializerColon)) 2208 return 2; 2209 2210 if (Right.isMemberAccess()) { 2211 // Breaking before the "./->" of a chained call/member access is reasonably 2212 // cheap, as formatting those with one call per line is generally 2213 // desirable. In particular, it should be cheaper to break before the call 2214 // than it is to break inside a call's parameters, which could lead to weird 2215 // "hanging" indents. The exception is the very last "./->" to support this 2216 // frequent pattern: 2217 // 2218 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc( 2219 // dddddddd); 2220 // 2221 // which might otherwise be blown up onto many lines. Here, clang-format 2222 // won't produce "hanging" indents anyway as there is no other trailing 2223 // call. 2224 // 2225 // Also apply higher penalty is not a call as that might lead to a wrapping 2226 // like: 2227 // 2228 // aaaaaaa 2229 // .aaaaaaaaa.bbbbbbbb(cccccccc); 2230 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope() 2231 ? 150 2232 : 35; 2233 } 2234 2235 if (Right.is(TT_TrailingAnnotation) && 2236 (!Right.Next || Right.Next->isNot(tok::l_paren))) { 2237 // Moving trailing annotations to the next line is fine for ObjC method 2238 // declarations. 2239 if (Line.startsWith(TT_ObjCMethodSpecifier)) 2240 return 10; 2241 // Generally, breaking before a trailing annotation is bad unless it is 2242 // function-like. It seems to be especially preferable to keep standard 2243 // annotations (i.e. "const", "final" and "override") on the same line. 2244 // Use a slightly higher penalty after ")" so that annotations like 2245 // "const override" are kept together. 2246 bool is_short_annotation = Right.TokenText.size() < 10; 2247 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); 2248 } 2249 2250 // In for-loops, prefer breaking at ',' and ';'. 2251 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal)) 2252 return 4; 2253 2254 // In Objective-C method expressions, prefer breaking before "param:" over 2255 // breaking after it. 2256 if (Right.is(TT_SelectorName)) 2257 return 0; 2258 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) 2259 return Line.MightBeFunctionDecl ? 50 : 500; 2260 2261 if (Left.is(tok::l_paren) && InFunctionDecl && 2262 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) 2263 return 100; 2264 if (Left.is(tok::l_paren) && Left.Previous && 2265 (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) || 2266 Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) 2267 return 1000; 2268 if (Left.is(tok::equal) && InFunctionDecl) 2269 return 110; 2270 if (Right.is(tok::r_brace)) 2271 return 1; 2272 if (Left.is(TT_TemplateOpener)) 2273 return 100; 2274 if (Left.opensScope()) { 2275 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) 2276 return 0; 2277 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter 2278 : 19; 2279 } 2280 if (Left.is(TT_JavaAnnotation)) 2281 return 50; 2282 2283 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous && 2284 Left.Previous->isLabelString() && 2285 (Left.NextOperator || Left.OperatorIndex != 0)) 2286 return 50; 2287 if (Right.is(tok::plus) && Left.isLabelString() && 2288 (Right.NextOperator || Right.OperatorIndex != 0)) 2289 return 25; 2290 if (Left.is(tok::comma)) 2291 return 1; 2292 if (Right.is(tok::lessless) && Left.isLabelString() && 2293 (Right.NextOperator || Right.OperatorIndex != 1)) 2294 return 25; 2295 if (Right.is(tok::lessless)) { 2296 // Breaking at a << is really cheap. 2297 if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) 2298 // Slightly prefer to break before the first one in log-like statements. 2299 return 2; 2300 return 1; 2301 } 2302 if (Left.is(TT_ConditionalExpr)) 2303 return prec::Conditional; 2304 prec::Level Level = Left.getPrecedence(); 2305 if (Level == prec::Unknown) 2306 Level = Right.getPrecedence(); 2307 if (Level == prec::Assignment) 2308 return Style.PenaltyBreakAssignment; 2309 if (Level != prec::Unknown) 2310 return Level; 2311 2312 return 3; 2313 } 2314 2315 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 2316 const FormatToken &Left, 2317 const FormatToken &Right) { 2318 if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) 2319 return true; 2320 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java) 2321 return true; 2322 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && 2323 Left.Tok.getObjCKeywordID() == tok::objc_property) 2324 return true; 2325 if (Right.is(tok::hashhash)) 2326 return Left.is(tok::hash); 2327 if (Left.isOneOf(tok::hashhash, tok::hash)) 2328 return Right.is(tok::hash); 2329 if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) 2330 return Style.SpaceInEmptyParentheses; 2331 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) 2332 return (Right.is(TT_CastRParen) || 2333 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) 2334 ? Style.SpacesInCStyleCastParentheses 2335 : Style.SpacesInParentheses; 2336 if (Right.isOneOf(tok::semi, tok::comma)) 2337 return false; 2338 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl && 2339 Style.ObjCSpaceBeforeProtocolList) 2340 return true; 2341 if (Right.is(tok::less) && Left.is(tok::kw_template)) 2342 return Style.SpaceAfterTemplateKeyword; 2343 if (Left.isOneOf(tok::exclaim, tok::tilde)) 2344 return false; 2345 if (Left.is(tok::at) && 2346 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 2347 tok::numeric_constant, tok::l_paren, tok::l_brace, 2348 tok::kw_true, tok::kw_false)) 2349 return false; 2350 if (Left.is(tok::colon)) 2351 return !Left.is(TT_ObjCMethodExpr); 2352 if (Left.is(tok::coloncolon)) 2353 return false; 2354 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) { 2355 if (Style.Language == FormatStyle::LK_TextProto || 2356 (Style.Language == FormatStyle::LK_Proto && 2357 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) { 2358 // Format empty list as `<>`. 2359 if (Left.is(tok::less) && Right.is(tok::greater)) 2360 return false; 2361 return !Style.Cpp11BracedListStyle; 2362 } 2363 return false; 2364 } 2365 if (Right.is(tok::ellipsis)) 2366 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous && 2367 Left.Previous->is(tok::kw_case)); 2368 if (Left.is(tok::l_square) && Right.is(tok::amp)) 2369 return false; 2370 if (Right.is(TT_PointerOrReference)) { 2371 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) { 2372 if (!Left.MatchingParen) 2373 return true; 2374 FormatToken *TokenBeforeMatchingParen = 2375 Left.MatchingParen->getPreviousNonComment(); 2376 if (!TokenBeforeMatchingParen || 2377 !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype)) 2378 return true; 2379 } 2380 return (Left.Tok.isLiteral() || 2381 (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && 2382 (Style.PointerAlignment != FormatStyle::PAS_Left || 2383 (Line.IsMultiVariableDeclStmt && 2384 (Left.NestingLevel == 0 || 2385 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); 2386 } 2387 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && 2388 (!Left.is(TT_PointerOrReference) || 2389 (Style.PointerAlignment != FormatStyle::PAS_Right && 2390 !Line.IsMultiVariableDeclStmt))) 2391 return true; 2392 if (Left.is(TT_PointerOrReference)) 2393 return Right.Tok.isLiteral() || Right.is(TT_BlockComment) || 2394 (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) && 2395 !Right.is(TT_StartOfName)) || 2396 (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) || 2397 (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, 2398 tok::l_paren) && 2399 (Style.PointerAlignment != FormatStyle::PAS_Right && 2400 !Line.IsMultiVariableDeclStmt) && 2401 Left.Previous && 2402 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); 2403 if (Right.is(tok::star) && Left.is(tok::l_paren)) 2404 return false; 2405 const auto SpaceRequiredForArrayInitializerLSquare = 2406 [](const FormatToken &LSquareTok, const FormatStyle &Style) { 2407 return Style.SpacesInContainerLiterals || 2408 ((Style.Language == FormatStyle::LK_Proto || 2409 Style.Language == FormatStyle::LK_TextProto) && 2410 !Style.Cpp11BracedListStyle && 2411 LSquareTok.endsSequence(tok::l_square, tok::colon, 2412 TT_SelectorName)); 2413 }; 2414 if (Left.is(tok::l_square)) 2415 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) && 2416 SpaceRequiredForArrayInitializerLSquare(Left, Style)) || 2417 (Left.isOneOf(TT_ArraySubscriptLSquare, 2418 TT_StructuredBindingLSquare) && 2419 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); 2420 if (Right.is(tok::r_square)) 2421 return Right.MatchingParen && 2422 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) && 2423 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen, 2424 Style)) || 2425 (Style.SpacesInSquareBrackets && 2426 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare, 2427 TT_StructuredBindingLSquare)) || 2428 Right.MatchingParen->is(TT_AttributeParen)); 2429 if (Right.is(tok::l_square) && 2430 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, 2431 TT_DesignatedInitializerLSquare, 2432 TT_StructuredBindingLSquare, TT_AttributeSquare) && 2433 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) 2434 return false; 2435 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 2436 return !Left.Children.empty(); // No spaces in "{}". 2437 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || 2438 (Right.is(tok::r_brace) && Right.MatchingParen && 2439 Right.MatchingParen->BlockKind != BK_Block)) 2440 return !Style.Cpp11BracedListStyle; 2441 if (Left.is(TT_BlockComment)) 2442 return !Left.TokenText.endswith("=*/"); 2443 if (Right.is(tok::l_paren)) { 2444 if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) || 2445 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) 2446 return true; 2447 return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || 2448 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && 2449 (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while, 2450 tok::kw_switch, tok::kw_case, TT_ForEachMacro, 2451 TT_ObjCForIn) || 2452 Left.endsSequence(tok::kw_constexpr, tok::kw_if) || 2453 (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch, 2454 tok::kw_new, tok::kw_delete) && 2455 (!Left.Previous || Left.Previous->isNot(tok::period))))) || 2456 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && 2457 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() || 2458 Left.is(tok::r_paren)) && 2459 Line.Type != LT_PreprocessorDirective); 2460 } 2461 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) 2462 return false; 2463 if (Right.is(TT_UnaryOperator)) 2464 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && 2465 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); 2466 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, 2467 tok::r_paren) || 2468 Left.isSimpleTypeSpecifier()) && 2469 Right.is(tok::l_brace) && Right.getNextNonComment() && 2470 Right.BlockKind != BK_Block) 2471 return false; 2472 if (Left.is(tok::period) || Right.is(tok::period)) 2473 return false; 2474 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") 2475 return false; 2476 if (Left.is(TT_TemplateCloser) && Left.MatchingParen && 2477 Left.MatchingParen->Previous && 2478 Left.MatchingParen->Previous->is(tok::period)) 2479 // A.<B<C<...>>>DoSomething(); 2480 return false; 2481 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) 2482 return false; 2483 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) 2484 // Objective-C dictionary literal -> no space after opening brace. 2485 return false; 2486 if (Right.is(tok::r_brace) && Right.MatchingParen && 2487 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) 2488 // Objective-C dictionary literal -> no space before closing brace. 2489 return false; 2490 return true; 2491 } 2492 2493 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 2494 const FormatToken &Right) { 2495 const FormatToken &Left = *Right.Previous; 2496 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) 2497 return true; // Never ever merge two identifiers. 2498 if (Style.isCpp()) { 2499 if (Left.is(tok::kw_operator)) 2500 return Right.is(tok::coloncolon); 2501 } else if (Style.Language == FormatStyle::LK_Proto || 2502 Style.Language == FormatStyle::LK_TextProto) { 2503 if (Right.is(tok::period) && 2504 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, 2505 Keywords.kw_repeated, Keywords.kw_extend)) 2506 return true; 2507 if (Right.is(tok::l_paren) && 2508 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) 2509 return true; 2510 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName)) 2511 return true; 2512 // Slashes occur in text protocol extension syntax: [type/type] { ... }. 2513 if (Left.is(tok::slash) || Right.is(tok::slash)) 2514 return false; 2515 if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) && 2516 Right.isOneOf(tok::l_brace, tok::less)) 2517 return !Style.Cpp11BracedListStyle; 2518 // A percent is probably part of a formatting specification, such as %lld. 2519 if (Left.is(tok::percent)) 2520 return false; 2521 } else if (Style.Language == FormatStyle::LK_JavaScript) { 2522 if (Left.is(TT_JsFatArrow)) 2523 return true; 2524 // for await ( ... 2525 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous && 2526 Left.Previous->is(tok::kw_for)) 2527 return true; 2528 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && 2529 Right.MatchingParen) { 2530 const FormatToken *Next = Right.MatchingParen->getNextNonComment(); 2531 // An async arrow function, for example: `x = async () => foo();`, 2532 // as opposed to calling a function called async: `x = async();` 2533 if (Next && Next->is(TT_JsFatArrow)) 2534 return true; 2535 } 2536 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || 2537 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) 2538 return false; 2539 // In tagged template literals ("html`bar baz`"), there is no space between 2540 // the tag identifier and the template string. getIdentifierInfo makes sure 2541 // that the identifier is not a pseudo keyword like `yield`, either. 2542 if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) && 2543 Right.is(TT_TemplateString)) 2544 return false; 2545 if (Right.is(tok::star) && 2546 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) 2547 return false; 2548 if (Right.isOneOf(tok::l_brace, tok::l_square) && 2549 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield, 2550 Keywords.kw_extends, Keywords.kw_implements)) 2551 return true; 2552 if (Right.is(tok::l_paren)) { 2553 // JS methods can use some keywords as names (e.g. `delete()`). 2554 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo()) 2555 return false; 2556 // Valid JS method names can include keywords, e.g. `foo.delete()` or 2557 // `bar.instanceof()`. Recognize call positions by preceding period. 2558 if (Left.Previous && Left.Previous->is(tok::period) && 2559 Left.Tok.getIdentifierInfo()) 2560 return false; 2561 // Additional unary JavaScript operators that need a space after. 2562 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof, 2563 tok::kw_void)) 2564 return true; 2565 } 2566 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, 2567 tok::kw_const) || 2568 // "of" is only a keyword if it appears after another identifier 2569 // (e.g. as "const x of y" in a for loop), or after a destructuring 2570 // operation (const [x, y] of z, const {a, b} of c). 2571 (Left.is(Keywords.kw_of) && Left.Previous && 2572 (Left.Previous->Tok.is(tok::identifier) || 2573 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) && 2574 (!Left.Previous || !Left.Previous->is(tok::period))) 2575 return true; 2576 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && 2577 Left.Previous->is(tok::period) && Right.is(tok::l_paren)) 2578 return false; 2579 if (Left.is(Keywords.kw_as) && 2580 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) 2581 return true; 2582 if (Left.is(tok::kw_default) && Left.Previous && 2583 Left.Previous->is(tok::kw_export)) 2584 return true; 2585 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace)) 2586 return true; 2587 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) 2588 return false; 2589 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator)) 2590 return false; 2591 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) && 2592 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) 2593 return false; 2594 if (Left.is(tok::ellipsis)) 2595 return false; 2596 if (Left.is(TT_TemplateCloser) && 2597 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square, 2598 Keywords.kw_implements, Keywords.kw_extends)) 2599 // Type assertions ('<type>expr') are not followed by whitespace. Other 2600 // locations that should have whitespace following are identified by the 2601 // above set of follower tokens. 2602 return false; 2603 if (Right.is(TT_JsNonNullAssertion)) 2604 return false; 2605 if (Left.is(TT_JsNonNullAssertion) && 2606 Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) 2607 return true; // "x! as string", "x! in y" 2608 } else if (Style.Language == FormatStyle::LK_Java) { 2609 if (Left.is(tok::r_square) && Right.is(tok::l_brace)) 2610 return true; 2611 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) 2612 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never; 2613 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, 2614 tok::kw_protected) || 2615 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract, 2616 Keywords.kw_native)) && 2617 Right.is(TT_TemplateOpener)) 2618 return true; 2619 } 2620 if (Left.is(TT_ImplicitStringLiteral)) 2621 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); 2622 if (Line.Type == LT_ObjCMethodDecl) { 2623 if (Left.is(TT_ObjCMethodSpecifier)) 2624 return true; 2625 if (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_new)) 2626 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a 2627 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class 2628 // method declaration. 2629 return false; 2630 } 2631 if (Line.Type == LT_ObjCProperty && 2632 (Right.is(tok::equal) || Left.is(tok::equal))) 2633 return false; 2634 2635 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) || 2636 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) 2637 return true; 2638 if (Right.is(TT_OverloadedOperatorLParen)) 2639 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; 2640 if (Left.is(tok::comma)) 2641 return true; 2642 if (Right.is(tok::comma)) 2643 return false; 2644 if (Right.is(TT_ObjCBlockLParen)) 2645 return true; 2646 if (Right.is(TT_CtorInitializerColon)) 2647 return Style.SpaceBeforeCtorInitializerColon; 2648 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon) 2649 return false; 2650 if (Right.is(TT_RangeBasedForLoopColon) && 2651 !Style.SpaceBeforeRangeBasedForLoopColon) 2652 return false; 2653 if (Right.is(tok::colon)) { 2654 if (Line.First->isOneOf(tok::kw_case, tok::kw_default) || 2655 !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi)) 2656 return false; 2657 if (Right.is(TT_ObjCMethodExpr)) 2658 return false; 2659 if (Left.is(tok::question)) 2660 return false; 2661 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) 2662 return false; 2663 if (Right.is(TT_DictLiteral)) 2664 return Style.SpacesInContainerLiterals; 2665 if (Right.is(TT_AttributeColon)) 2666 return false; 2667 return true; 2668 } 2669 if (Left.is(TT_UnaryOperator)) 2670 return Right.is(TT_BinaryOperator); 2671 2672 // If the next token is a binary operator or a selector name, we have 2673 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly. 2674 if (Left.is(TT_CastRParen)) 2675 return Style.SpaceAfterCStyleCast || 2676 Right.isOneOf(TT_BinaryOperator, TT_SelectorName); 2677 2678 if (Left.is(tok::greater) && Right.is(tok::greater)) { 2679 if (Style.Language == FormatStyle::LK_TextProto || 2680 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) 2681 return !Style.Cpp11BracedListStyle; 2682 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && 2683 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); 2684 } 2685 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) || 2686 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || 2687 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) 2688 return false; 2689 if (!Style.SpaceBeforeAssignmentOperators && 2690 Right.getPrecedence() == prec::Assignment) 2691 return false; 2692 if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) 2693 // Generally don't remove existing spaces between an identifier and "::". 2694 // The identifier might actually be a macro name such as ALWAYS_INLINE. If 2695 // this turns out to be too lenient, add analysis of the identifier itself. 2696 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); 2697 if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment)) 2698 return (Left.is(TT_TemplateOpener) && 2699 Style.Standard == FormatStyle::LS_Cpp03) || 2700 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, 2701 tok::kw___super, TT_TemplateCloser, 2702 TT_TemplateOpener)) || 2703 (Left.is(tok ::l_paren) && Style.SpacesInParentheses); 2704 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) 2705 return Style.SpacesInAngles; 2706 // Space before TT_StructuredBindingLSquare. 2707 if (Right.is(TT_StructuredBindingLSquare)) 2708 return !Left.isOneOf(tok::amp, tok::ampamp) || 2709 Style.PointerAlignment != FormatStyle::PAS_Right; 2710 // Space before & or && following a TT_StructuredBindingLSquare. 2711 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) && 2712 Right.isOneOf(tok::amp, tok::ampamp)) 2713 return Style.PointerAlignment != FormatStyle::PAS_Left; 2714 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || 2715 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && 2716 !Right.is(tok::r_paren))) 2717 return true; 2718 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) && 2719 Right.isNot(TT_FunctionTypeLParen)) 2720 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; 2721 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && 2722 Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) 2723 return false; 2724 if (Right.is(tok::less) && Left.isNot(tok::l_paren) && 2725 Line.startsWith(tok::hash)) 2726 return true; 2727 if (Right.is(TT_TrailingUnaryOperator)) 2728 return false; 2729 if (Left.is(TT_RegexLiteral)) 2730 return false; 2731 return spaceRequiredBetween(Line, Left, Right); 2732 } 2733 2734 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. 2735 static bool isAllmanBrace(const FormatToken &Tok) { 2736 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && 2737 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral); 2738 } 2739 2740 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, 2741 const FormatToken &Right) { 2742 const FormatToken &Left = *Right.Previous; 2743 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0) 2744 return true; 2745 2746 if (Style.Language == FormatStyle::LK_JavaScript) { 2747 // FIXME: This might apply to other languages and token kinds. 2748 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous && 2749 Left.Previous->is(tok::string_literal)) 2750 return true; 2751 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 && 2752 Left.Previous && Left.Previous->is(tok::equal) && 2753 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export, 2754 tok::kw_const) && 2755 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match 2756 // above. 2757 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) 2758 // Object literals on the top level of a file are treated as "enum-style". 2759 // Each key/value pair is put on a separate line, instead of bin-packing. 2760 return true; 2761 if (Left.is(tok::l_brace) && Line.Level == 0 && 2762 (Line.startsWith(tok::kw_enum) || 2763 Line.startsWith(tok::kw_const, tok::kw_enum) || 2764 Line.startsWith(tok::kw_export, tok::kw_enum) || 2765 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) 2766 // JavaScript top-level enum key/value pairs are put on separate lines 2767 // instead of bin-packing. 2768 return true; 2769 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && 2770 !Left.Children.empty()) 2771 // Support AllowShortFunctionsOnASingleLine for JavaScript. 2772 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || 2773 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty || 2774 (Left.NestingLevel == 0 && Line.Level == 0 && 2775 Style.AllowShortFunctionsOnASingleLine & 2776 FormatStyle::SFS_InlineOnly); 2777 } else if (Style.Language == FormatStyle::LK_Java) { 2778 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && 2779 Right.Next->is(tok::string_literal)) 2780 return true; 2781 } else if (Style.Language == FormatStyle::LK_Cpp || 2782 Style.Language == FormatStyle::LK_ObjC || 2783 Style.Language == FormatStyle::LK_Proto || 2784 Style.Language == FormatStyle::LK_TextProto) { 2785 if (Left.isStringLiteral() && Right.isStringLiteral()) 2786 return true; 2787 } 2788 2789 // If the last token before a '}', ']', or ')' is a comma or a trailing 2790 // comment, the intention is to insert a line break after it in order to make 2791 // shuffling around entries easier. Import statements, especially in 2792 // JavaScript, can be an exception to this rule. 2793 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) { 2794 const FormatToken *BeforeClosingBrace = nullptr; 2795 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || 2796 (Style.Language == FormatStyle::LK_JavaScript && 2797 Left.is(tok::l_paren))) && 2798 Left.BlockKind != BK_Block && Left.MatchingParen) 2799 BeforeClosingBrace = Left.MatchingParen->Previous; 2800 else if (Right.MatchingParen && 2801 (Right.MatchingParen->isOneOf(tok::l_brace, 2802 TT_ArrayInitializerLSquare) || 2803 (Style.Language == FormatStyle::LK_JavaScript && 2804 Right.MatchingParen->is(tok::l_paren)))) 2805 BeforeClosingBrace = &Left; 2806 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || 2807 BeforeClosingBrace->isTrailingComment())) 2808 return true; 2809 } 2810 2811 if (Right.is(tok::comment)) 2812 return Left.BlockKind != BK_BracedInit && 2813 Left.isNot(TT_CtorInitializerColon) && 2814 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); 2815 if (Left.isTrailingComment()) 2816 return true; 2817 if (Right.Previous->IsUnterminatedLiteral) 2818 return true; 2819 if (Right.is(tok::lessless) && Right.Next && 2820 Right.Previous->is(tok::string_literal) && 2821 Right.Next->is(tok::string_literal)) 2822 return true; 2823 if (Right.Previous->ClosesTemplateDeclaration && 2824 Right.Previous->MatchingParen && 2825 Right.Previous->MatchingParen->NestingLevel == 0 && 2826 Style.AlwaysBreakTemplateDeclarations) 2827 return true; 2828 if (Right.is(TT_CtorInitializerComma) && 2829 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && 2830 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 2831 return true; 2832 if (Right.is(TT_CtorInitializerColon) && 2833 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && 2834 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 2835 return true; 2836 // Break only if we have multiple inheritance. 2837 if (Style.BreakBeforeInheritanceComma && Right.is(TT_InheritanceComma)) 2838 return true; 2839 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) 2840 // Multiline raw string literals are special wrt. line breaks. The author 2841 // has made a deliberate choice and might have aligned the contents of the 2842 // string literal accordingly. Thus, we try keep existing line breaks. 2843 return Right.IsMultiline && Right.NewlinesBefore > 0; 2844 if ((Right.Previous->is(tok::l_brace) || 2845 (Right.Previous->is(tok::less) && Right.Previous->Previous && 2846 Right.Previous->Previous->is(tok::equal))) && 2847 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) { 2848 // Don't put enums or option definitions onto single lines in protocol 2849 // buffers. 2850 return true; 2851 } 2852 if (Right.is(TT_InlineASMBrace)) 2853 return Right.HasUnescapedNewline; 2854 if (isAllmanBrace(Left) || isAllmanBrace(Right)) 2855 return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || 2856 (Line.startsWith(tok::kw_typedef, tok::kw_enum) && 2857 Style.BraceWrapping.AfterEnum) || 2858 (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || 2859 (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); 2860 if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) 2861 return true; 2862 2863 if ((Style.Language == FormatStyle::LK_Java || 2864 Style.Language == FormatStyle::LK_JavaScript) && 2865 Left.is(TT_LeadingJavaAnnotation) && 2866 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && 2867 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) 2868 return true; 2869 2870 if (Right.is(TT_ProtoExtensionLSquare)) 2871 return true; 2872 2873 return false; 2874 } 2875 2876 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 2877 const FormatToken &Right) { 2878 const FormatToken &Left = *Right.Previous; 2879 2880 // Language-specific stuff. 2881 if (Style.Language == FormatStyle::LK_Java) { 2882 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 2883 Keywords.kw_implements)) 2884 return false; 2885 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 2886 Keywords.kw_implements)) 2887 return true; 2888 } else if (Style.Language == FormatStyle::LK_JavaScript) { 2889 const FormatToken *NonComment = Right.getPreviousNonComment(); 2890 if (NonComment && 2891 NonComment->isOneOf( 2892 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break, 2893 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type, 2894 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected, 2895 Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get, 2896 Keywords.kw_set, Keywords.kw_async, Keywords.kw_await)) 2897 return false; // Otherwise automatic semicolon insertion would trigger. 2898 if (Right.NestingLevel == 0 && 2899 (Left.Tok.getIdentifierInfo() || 2900 Left.isOneOf(tok::r_square, tok::r_paren)) && 2901 Right.isOneOf(tok::l_square, tok::l_paren)) 2902 return false; // Otherwise automatic semicolon insertion would trigger. 2903 if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) 2904 return false; 2905 if (Left.is(TT_JsTypeColon)) 2906 return true; 2907 if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is)) 2908 return false; 2909 if (Left.is(Keywords.kw_in)) 2910 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None; 2911 if (Right.is(Keywords.kw_in)) 2912 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; 2913 if (Right.is(Keywords.kw_as)) 2914 return false; // must not break before as in 'x as type' casts 2915 if (Left.is(Keywords.kw_as)) 2916 return true; 2917 if (Left.is(TT_JsNonNullAssertion)) 2918 return true; 2919 if (Left.is(Keywords.kw_declare) && 2920 Right.isOneOf(Keywords.kw_module, tok::kw_namespace, 2921 Keywords.kw_function, tok::kw_class, tok::kw_enum, 2922 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, 2923 Keywords.kw_let, tok::kw_const)) 2924 // See grammar for 'declare' statements at: 2925 // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 2926 return false; 2927 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && 2928 Right.isOneOf(tok::identifier, tok::string_literal)) 2929 return false; // must not break in "module foo { ...}" 2930 if (Right.is(TT_TemplateString) && Right.closesScope()) 2931 return false; 2932 if (Left.is(TT_TemplateString) && Left.opensScope()) 2933 return true; 2934 } 2935 2936 if (Left.is(tok::at)) 2937 return false; 2938 if (Left.Tok.getObjCKeywordID() == tok::objc_interface) 2939 return false; 2940 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) 2941 return !Right.is(tok::l_paren); 2942 if (Right.is(TT_PointerOrReference)) 2943 return Line.IsMultiVariableDeclStmt || 2944 (Style.PointerAlignment == FormatStyle::PAS_Right && 2945 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); 2946 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || 2947 Right.is(tok::kw_operator)) 2948 return true; 2949 if (Left.is(TT_PointerOrReference)) 2950 return false; 2951 if (Right.isTrailingComment()) 2952 // We rely on MustBreakBefore being set correctly here as we should not 2953 // change the "binding" behavior of a comment. 2954 // The first comment in a braced lists is always interpreted as belonging to 2955 // the first list element. Otherwise, it should be placed outside of the 2956 // list. 2957 return Left.BlockKind == BK_BracedInit || 2958 (Left.is(TT_CtorInitializerColon) && 2959 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon); 2960 if (Left.is(tok::question) && Right.is(tok::colon)) 2961 return false; 2962 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) 2963 return Style.BreakBeforeTernaryOperators; 2964 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question)) 2965 return !Style.BreakBeforeTernaryOperators; 2966 if (Right.is(TT_InheritanceColon)) 2967 return true; 2968 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && 2969 Left.isNot(TT_SelectorName)) 2970 return true; 2971 2972 if (Right.is(tok::colon) && 2973 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) 2974 return false; 2975 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { 2976 if ((Style.Language == FormatStyle::LK_Proto || 2977 Style.Language == FormatStyle::LK_TextProto) && 2978 !Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral()) 2979 return false; 2980 return true; 2981 } 2982 if (Right.is(tok::r_square) && Right.MatchingParen && 2983 Right.MatchingParen->is(TT_ProtoExtensionLSquare)) 2984 return false; 2985 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && 2986 Right.Next->is(TT_ObjCMethodExpr))) 2987 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. 2988 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) 2989 return true; 2990 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen)) 2991 return true; 2992 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, 2993 TT_OverloadedOperator)) 2994 return false; 2995 if (Left.is(TT_RangeBasedForLoopColon)) 2996 return true; 2997 if (Right.is(TT_RangeBasedForLoopColon)) 2998 return false; 2999 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener)) 3000 return true; 3001 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || 3002 Left.is(tok::kw_operator)) 3003 return false; 3004 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && 3005 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) 3006 return false; 3007 if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) 3008 return false; 3009 if (Left.is(tok::l_paren) && Left.Previous && 3010 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) 3011 return false; 3012 if (Right.is(TT_ImplicitStringLiteral)) 3013 return false; 3014 3015 if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser)) 3016 return false; 3017 if (Right.is(tok::r_square) && Right.MatchingParen && 3018 Right.MatchingParen->is(TT_LambdaLSquare)) 3019 return false; 3020 3021 // We only break before r_brace if there was a corresponding break before 3022 // the l_brace, which is tracked by BreakBeforeClosingBrace. 3023 if (Right.is(tok::r_brace)) 3024 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; 3025 3026 // Allow breaking after a trailing annotation, e.g. after a method 3027 // declaration. 3028 if (Left.is(TT_TrailingAnnotation)) 3029 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, 3030 tok::less, tok::coloncolon); 3031 3032 if (Right.is(tok::kw___attribute) || 3033 (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) 3034 return true; 3035 3036 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 3037 return true; 3038 3039 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) 3040 return true; 3041 3042 if (Left.is(TT_CtorInitializerColon)) 3043 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon; 3044 if (Right.is(TT_CtorInitializerColon)) 3045 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon; 3046 if (Left.is(TT_CtorInitializerComma) && 3047 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) 3048 return false; 3049 if (Right.is(TT_CtorInitializerComma) && 3050 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) 3051 return true; 3052 if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) 3053 return false; 3054 if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) 3055 return true; 3056 if ((Left.is(tok::greater) && Right.is(tok::greater)) || 3057 (Left.is(tok::less) && Right.is(tok::less))) 3058 return false; 3059 if (Right.is(TT_BinaryOperator) && 3060 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && 3061 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || 3062 Right.getPrecedence() != prec::Assignment)) 3063 return true; 3064 if (Left.is(TT_ArrayInitializerLSquare)) 3065 return true; 3066 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) 3067 return true; 3068 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) && 3069 !Left.isOneOf(tok::arrowstar, tok::lessless) && 3070 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && 3071 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || 3072 Left.getPrecedence() == prec::Assignment)) 3073 return true; 3074 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) || 3075 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) 3076 return false; 3077 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 3078 tok::kw_class, tok::kw_struct, tok::comment) || 3079 Right.isMemberAccess() || 3080 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, 3081 tok::colon, tok::l_square, tok::at) || 3082 (Left.is(tok::r_paren) && 3083 Right.isOneOf(tok::identifier, tok::kw_const)) || 3084 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 3085 (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser)); 3086 } 3087 3088 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 3089 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n"; 3090 const FormatToken *Tok = Line.First; 3091 while (Tok) { 3092 llvm::errs() << " M=" << Tok->MustBreakBefore 3093 << " C=" << Tok->CanBreakBefore 3094 << " T=" << getTokenTypeName(Tok->Type) 3095 << " S=" << Tok->SpacesRequiredBefore 3096 << " B=" << Tok->BlockParameterCount 3097 << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty 3098 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength 3099 << " PPK=" << Tok->PackingKind << " FakeLParens="; 3100 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 3101 llvm::errs() << Tok->FakeLParens[i] << "/"; 3102 llvm::errs() << " FakeRParens=" << Tok->FakeRParens; 3103 llvm::errs() << " Text='" << Tok->TokenText << "'\n"; 3104 if (!Tok->Next) 3105 assert(Tok == Line.Last); 3106 Tok = Tok->Next; 3107 } 3108 llvm::errs() << "----\n"; 3109 } 3110 3111 } // namespace format 3112 } // namespace clang 3113