1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 #include <utility> 24 25 #define DEBUG_TYPE "format-parser" 26 27 namespace clang { 28 namespace format { 29 30 class FormatTokenSource { 31 public: 32 virtual ~FormatTokenSource() {} 33 34 // Returns the next token in the token stream. 35 virtual FormatToken *getNextToken() = 0; 36 37 // Returns the token preceding the token returned by the last call to 38 // getNextToken() in the token stream, or nullptr if no such token exists. 39 virtual FormatToken *getPreviousToken() = 0; 40 41 // Returns the token that would be returned by the next call to 42 // getNextToken(). 43 virtual FormatToken *peekNextToken() = 0; 44 45 // Returns the token that would be returned after the next N calls to 46 // getNextToken(). N needs to be greater than zero, and small enough that 47 // there are still tokens. Check for tok::eof with N-1 before calling it with 48 // N. 49 virtual FormatToken *peekNextToken(int N) = 0; 50 51 // Returns whether we are at the end of the file. 52 // This can be different from whether getNextToken() returned an eof token 53 // when the FormatTokenSource is a view on a part of the token stream. 54 virtual bool isEOF() = 0; 55 56 // Gets the current position in the token stream, to be used by setPosition(). 57 virtual unsigned getPosition() = 0; 58 59 // Resets the token stream to the state it was in when getPosition() returned 60 // Position, and return the token at that position in the stream. 61 virtual FormatToken *setPosition(unsigned Position) = 0; 62 }; 63 64 namespace { 65 66 class ScopedDeclarationState { 67 public: 68 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 69 bool MustBeDeclaration) 70 : Line(Line), Stack(Stack) { 71 Line.MustBeDeclaration = MustBeDeclaration; 72 Stack.push_back(MustBeDeclaration); 73 } 74 ~ScopedDeclarationState() { 75 Stack.pop_back(); 76 if (!Stack.empty()) 77 Line.MustBeDeclaration = Stack.back(); 78 else 79 Line.MustBeDeclaration = true; 80 } 81 82 private: 83 UnwrappedLine &Line; 84 llvm::BitVector &Stack; 85 }; 86 87 static bool isLineComment(const FormatToken &FormatTok) { 88 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 89 } 90 91 // Checks if \p FormatTok is a line comment that continues the line comment 92 // \p Previous. The original column of \p MinColumnToken is used to determine 93 // whether \p FormatTok is indented enough to the right to continue \p Previous. 94 static bool continuesLineComment(const FormatToken &FormatTok, 95 const FormatToken *Previous, 96 const FormatToken *MinColumnToken) { 97 if (!Previous || !MinColumnToken) 98 return false; 99 unsigned MinContinueColumn = 100 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 101 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 102 isLineComment(*Previous) && 103 FormatTok.OriginalColumn >= MinContinueColumn; 104 } 105 106 class ScopedMacroState : public FormatTokenSource { 107 public: 108 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 109 FormatToken *&ResetToken) 110 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 111 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 112 Token(nullptr), PreviousToken(nullptr) { 113 FakeEOF.Tok.startToken(); 114 FakeEOF.Tok.setKind(tok::eof); 115 TokenSource = this; 116 Line.Level = 0; 117 Line.InPPDirective = true; 118 } 119 120 ~ScopedMacroState() override { 121 TokenSource = PreviousTokenSource; 122 ResetToken = Token; 123 Line.InPPDirective = false; 124 Line.Level = PreviousLineLevel; 125 } 126 127 FormatToken *getNextToken() override { 128 // The \c UnwrappedLineParser guards against this by never calling 129 // \c getNextToken() after it has encountered the first eof token. 130 assert(!eof()); 131 PreviousToken = Token; 132 Token = PreviousTokenSource->getNextToken(); 133 if (eof()) 134 return &FakeEOF; 135 return Token; 136 } 137 138 FormatToken *getPreviousToken() override { 139 return PreviousTokenSource->getPreviousToken(); 140 } 141 142 FormatToken *peekNextToken() override { 143 if (eof()) 144 return &FakeEOF; 145 return PreviousTokenSource->peekNextToken(); 146 } 147 148 FormatToken *peekNextToken(int N) override { 149 assert(N > 0); 150 if (eof()) 151 return &FakeEOF; 152 return PreviousTokenSource->peekNextToken(N); 153 } 154 155 bool isEOF() override { return PreviousTokenSource->isEOF(); } 156 157 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 158 159 FormatToken *setPosition(unsigned Position) override { 160 PreviousToken = nullptr; 161 Token = PreviousTokenSource->setPosition(Position); 162 return Token; 163 } 164 165 private: 166 bool eof() { 167 return Token && Token->HasUnescapedNewline && 168 !continuesLineComment(*Token, PreviousToken, 169 /*MinColumnToken=*/PreviousToken); 170 } 171 172 FormatToken FakeEOF; 173 UnwrappedLine &Line; 174 FormatTokenSource *&TokenSource; 175 FormatToken *&ResetToken; 176 unsigned PreviousLineLevel; 177 FormatTokenSource *PreviousTokenSource; 178 179 FormatToken *Token; 180 FormatToken *PreviousToken; 181 }; 182 183 } // end anonymous namespace 184 185 class ScopedLineState { 186 public: 187 ScopedLineState(UnwrappedLineParser &Parser, 188 bool SwitchToPreprocessorLines = false) 189 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 190 if (SwitchToPreprocessorLines) 191 Parser.CurrentLines = &Parser.PreprocessorDirectives; 192 else if (!Parser.Line->Tokens.empty()) 193 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 194 PreBlockLine = std::move(Parser.Line); 195 Parser.Line = std::make_unique<UnwrappedLine>(); 196 Parser.Line->Level = PreBlockLine->Level; 197 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 198 } 199 200 ~ScopedLineState() { 201 if (!Parser.Line->Tokens.empty()) 202 Parser.addUnwrappedLine(); 203 assert(Parser.Line->Tokens.empty()); 204 Parser.Line = std::move(PreBlockLine); 205 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 206 Parser.MustBreakBeforeNextToken = true; 207 Parser.CurrentLines = OriginalLines; 208 } 209 210 private: 211 UnwrappedLineParser &Parser; 212 213 std::unique_ptr<UnwrappedLine> PreBlockLine; 214 SmallVectorImpl<UnwrappedLine> *OriginalLines; 215 }; 216 217 class CompoundStatementIndenter { 218 public: 219 CompoundStatementIndenter(UnwrappedLineParser *Parser, 220 const FormatStyle &Style, unsigned &LineLevel) 221 : CompoundStatementIndenter(Parser, LineLevel, 222 Style.BraceWrapping.AfterControlStatement, 223 Style.BraceWrapping.IndentBraces) {} 224 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 225 bool WrapBrace, bool IndentBrace) 226 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 227 if (WrapBrace) 228 Parser->addUnwrappedLine(); 229 if (IndentBrace) 230 ++LineLevel; 231 } 232 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 233 234 private: 235 unsigned &LineLevel; 236 unsigned OldLineLevel; 237 }; 238 239 namespace { 240 241 class IndexedTokenSource : public FormatTokenSource { 242 public: 243 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 244 : Tokens(Tokens), Position(-1) {} 245 246 FormatToken *getNextToken() override { 247 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 248 LLVM_DEBUG({ 249 llvm::dbgs() << "Next "; 250 dbgToken(Position); 251 }); 252 return Tokens[Position]; 253 } 254 ++Position; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Next "; 257 dbgToken(Position); 258 }); 259 return Tokens[Position]; 260 } 261 262 FormatToken *getPreviousToken() override { 263 return Position > 0 ? Tokens[Position - 1] : nullptr; 264 } 265 266 FormatToken *peekNextToken() override { 267 int Next = Position + 1; 268 LLVM_DEBUG({ 269 llvm::dbgs() << "Peeking "; 270 dbgToken(Next); 271 }); 272 return Tokens[Next]; 273 } 274 275 FormatToken *peekNextToken(int N) override { 276 assert(N > 0); 277 int Next = Position + N; 278 LLVM_DEBUG({ 279 llvm::dbgs() << "Peeking (+" << (N - 1) << ") "; 280 dbgToken(Next); 281 }); 282 return Tokens[Next]; 283 } 284 285 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 286 287 unsigned getPosition() override { 288 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 289 assert(Position >= 0); 290 return Position; 291 } 292 293 FormatToken *setPosition(unsigned P) override { 294 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 295 Position = P; 296 return Tokens[Position]; 297 } 298 299 void reset() { Position = -1; } 300 301 private: 302 void dbgToken(int Position, llvm::StringRef Indent = "") { 303 FormatToken *Tok = Tokens[Position]; 304 llvm::dbgs() << Indent << "[" << Position 305 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 306 << ", Macro: " << !!Tok->MacroCtx << "\n"; 307 } 308 309 ArrayRef<FormatToken *> Tokens; 310 int Position; 311 }; 312 313 } // end anonymous namespace 314 315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 316 const AdditionalKeywords &Keywords, 317 unsigned FirstStartColumn, 318 ArrayRef<FormatToken *> Tokens, 319 UnwrappedLineConsumer &Callback) 320 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 321 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 322 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 323 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 324 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 325 ? IG_Rejected 326 : IG_Inited), 327 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 328 329 void UnwrappedLineParser::reset() { 330 PPBranchLevel = -1; 331 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 332 ? IG_Rejected 333 : IG_Inited; 334 IncludeGuardToken = nullptr; 335 Line.reset(new UnwrappedLine); 336 CommentsBeforeNextToken.clear(); 337 FormatTok = nullptr; 338 MustBreakBeforeNextToken = false; 339 PreprocessorDirectives.clear(); 340 CurrentLines = &Lines; 341 DeclarationScopeStack.clear(); 342 NestedTooDeep.clear(); 343 PPStack.clear(); 344 Line->FirstStartColumn = FirstStartColumn; 345 } 346 347 void UnwrappedLineParser::parse() { 348 IndexedTokenSource TokenSource(AllTokens); 349 Line->FirstStartColumn = FirstStartColumn; 350 do { 351 LLVM_DEBUG(llvm::dbgs() << "----\n"); 352 reset(); 353 Tokens = &TokenSource; 354 TokenSource.reset(); 355 356 readToken(); 357 parseFile(); 358 359 // If we found an include guard then all preprocessor directives (other than 360 // the guard) are over-indented by one. 361 if (IncludeGuard == IG_Found) { 362 for (auto &Line : Lines) 363 if (Line.InPPDirective && Line.Level > 0) 364 --Line.Level; 365 } 366 367 // Create line with eof token. 368 pushToken(FormatTok); 369 addUnwrappedLine(); 370 371 for (const UnwrappedLine &Line : Lines) 372 Callback.consumeUnwrappedLine(Line); 373 374 Callback.finishRun(); 375 Lines.clear(); 376 while (!PPLevelBranchIndex.empty() && 377 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 378 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 379 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 380 } 381 if (!PPLevelBranchIndex.empty()) { 382 ++PPLevelBranchIndex.back(); 383 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 384 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 385 } 386 } while (!PPLevelBranchIndex.empty()); 387 } 388 389 void UnwrappedLineParser::parseFile() { 390 // The top-level context in a file always has declarations, except for pre- 391 // processor directives and JavaScript files. 392 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 393 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 394 MustBeDeclaration); 395 if (Style.Language == FormatStyle::LK_TextProto) 396 parseBracedList(); 397 else 398 parseLevel(/*OpeningBrace=*/nullptr, /*CanContainBracedList=*/true); 399 // Make sure to format the remaining tokens. 400 // 401 // LK_TextProto is special since its top-level is parsed as the body of a 402 // braced list, which does not necessarily have natural line separators such 403 // as a semicolon. Comments after the last entry that have been determined to 404 // not belong to that line, as in: 405 // key: value 406 // // endfile comment 407 // do not have a chance to be put on a line of their own until this point. 408 // Here we add this newline before end-of-file comments. 409 if (Style.Language == FormatStyle::LK_TextProto && 410 !CommentsBeforeNextToken.empty()) { 411 addUnwrappedLine(); 412 } 413 flushComments(true); 414 addUnwrappedLine(); 415 } 416 417 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 418 do { 419 switch (FormatTok->Tok.getKind()) { 420 case tok::l_brace: 421 return; 422 default: 423 if (FormatTok->is(Keywords.kw_where)) { 424 addUnwrappedLine(); 425 nextToken(); 426 parseCSharpGenericTypeConstraint(); 427 break; 428 } 429 nextToken(); 430 break; 431 } 432 } while (!eof()); 433 } 434 435 void UnwrappedLineParser::parseCSharpAttribute() { 436 int UnpairedSquareBrackets = 1; 437 do { 438 switch (FormatTok->Tok.getKind()) { 439 case tok::r_square: 440 nextToken(); 441 --UnpairedSquareBrackets; 442 if (UnpairedSquareBrackets == 0) { 443 addUnwrappedLine(); 444 return; 445 } 446 break; 447 case tok::l_square: 448 ++UnpairedSquareBrackets; 449 nextToken(); 450 break; 451 default: 452 nextToken(); 453 break; 454 } 455 } while (!eof()); 456 } 457 458 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 459 if (!Lines.empty() && Lines.back().InPPDirective) 460 return true; 461 462 const FormatToken *Previous = Tokens->getPreviousToken(); 463 return Previous && Previous->is(tok::comment) && 464 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 465 } 466 467 /// \brief Parses a level, that is ???. 468 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 469 /// \param CanContainBracedList If the content can contain (at any level) a 470 /// braced list. 471 /// \param NextLBracesType The type for left brace found in this level. 472 /// \returns true if a simple block of if/else/for/while, or false otherwise. 473 /// (A simple block has a single statement.) 474 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 475 bool CanContainBracedList, 476 IfStmtKind *IfKind, 477 TokenType NextLBracesType) { 478 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 479 ? TT_BracedListLBrace 480 : TT_Unknown; 481 const bool IsPrecededByCommentOrPPDirective = 482 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 483 bool HasLabel = false; 484 unsigned StatementCount = 0; 485 bool SwitchLabelEncountered = false; 486 do { 487 if (FormatTok->getType() == TT_AttributeMacro) { 488 nextToken(); 489 continue; 490 } 491 tok::TokenKind kind = FormatTok->Tok.getKind(); 492 if (FormatTok->getType() == TT_MacroBlockBegin) 493 kind = tok::l_brace; 494 else if (FormatTok->getType() == TT_MacroBlockEnd) 495 kind = tok::r_brace; 496 497 auto ParseDefault = [this, OpeningBrace, IfKind, NextLevelLBracesType, 498 &HasLabel, &StatementCount] { 499 parseStructuralElement(IfKind, !OpeningBrace, NextLevelLBracesType, 500 HasLabel ? nullptr : &HasLabel); 501 ++StatementCount; 502 assert(StatementCount > 0 && "StatementCount overflow!"); 503 }; 504 505 switch (kind) { 506 case tok::comment: 507 nextToken(); 508 addUnwrappedLine(); 509 break; 510 case tok::l_brace: 511 if (NextLBracesType != TT_Unknown) { 512 FormatTok->setFinalizedType(NextLBracesType); 513 } else if (FormatTok->Previous && 514 FormatTok->Previous->ClosesRequiresClause) { 515 // We need the 'default' case here to correctly parse a function 516 // l_brace. 517 ParseDefault(); 518 continue; 519 } 520 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 521 tryToParseBracedList()) { 522 continue; 523 } 524 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 525 /*MunchSemi=*/true, /*KeepBraces=*/true, 526 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 527 NextLBracesType); 528 ++StatementCount; 529 assert(StatementCount > 0 && "StatementCount overflow!"); 530 addUnwrappedLine(); 531 break; 532 case tok::r_brace: 533 if (OpeningBrace) { 534 if (!Style.RemoveBracesLLVM || 535 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 536 return false; 537 } 538 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 539 IsPrecededByCommentOrPPDirective || 540 precededByCommentOrPPDirective()) { 541 return false; 542 } 543 const FormatToken *Next = Tokens->peekNextToken(); 544 return Next->isNot(tok::comment) || Next->NewlinesBefore > 0; 545 } 546 nextToken(); 547 addUnwrappedLine(); 548 break; 549 case tok::kw_default: { 550 unsigned StoredPosition = Tokens->getPosition(); 551 FormatToken *Next; 552 do { 553 Next = Tokens->getNextToken(); 554 assert(Next); 555 } while (Next->is(tok::comment)); 556 FormatTok = Tokens->setPosition(StoredPosition); 557 if (Next->isNot(tok::colon)) { 558 // default not followed by ':' is not a case label; treat it like 559 // an identifier. 560 parseStructuralElement(); 561 break; 562 } 563 // Else, if it is 'default:', fall through to the case handling. 564 LLVM_FALLTHROUGH; 565 } 566 case tok::kw_case: 567 if (Style.isJavaScript() && Line->MustBeDeclaration) { 568 // A 'case: string' style field declaration. 569 parseStructuralElement(); 570 break; 571 } 572 if (!SwitchLabelEncountered && 573 (Style.IndentCaseLabels || 574 (Line->InPPDirective && Line->Level == 1))) { 575 ++Line->Level; 576 } 577 SwitchLabelEncountered = true; 578 parseStructuralElement(); 579 break; 580 case tok::l_square: 581 if (Style.isCSharp()) { 582 nextToken(); 583 parseCSharpAttribute(); 584 break; 585 } 586 if (handleCppAttributes()) 587 break; 588 LLVM_FALLTHROUGH; 589 default: 590 ParseDefault(); 591 break; 592 } 593 } while (!eof()); 594 return false; 595 } 596 597 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 598 // We'll parse forward through the tokens until we hit 599 // a closing brace or eof - note that getNextToken() will 600 // parse macros, so this will magically work inside macro 601 // definitions, too. 602 unsigned StoredPosition = Tokens->getPosition(); 603 FormatToken *Tok = FormatTok; 604 const FormatToken *PrevTok = Tok->Previous; 605 // Keep a stack of positions of lbrace tokens. We will 606 // update information about whether an lbrace starts a 607 // braced init list or a different block during the loop. 608 SmallVector<FormatToken *, 8> LBraceStack; 609 assert(Tok->is(tok::l_brace)); 610 do { 611 // Get next non-comment token. 612 FormatToken *NextTok; 613 do { 614 NextTok = Tokens->getNextToken(); 615 } while (NextTok->is(tok::comment)); 616 617 switch (Tok->Tok.getKind()) { 618 case tok::l_brace: 619 if (Style.isJavaScript() && PrevTok) { 620 if (PrevTok->isOneOf(tok::colon, tok::less)) { 621 // A ':' indicates this code is in a type, or a braced list 622 // following a label in an object literal ({a: {b: 1}}). 623 // A '<' could be an object used in a comparison, but that is nonsense 624 // code (can never return true), so more likely it is a generic type 625 // argument (`X<{a: string; b: number}>`). 626 // The code below could be confused by semicolons between the 627 // individual members in a type member list, which would normally 628 // trigger BK_Block. In both cases, this must be parsed as an inline 629 // braced init. 630 Tok->setBlockKind(BK_BracedInit); 631 } else if (PrevTok->is(tok::r_paren)) { 632 // `) { }` can only occur in function or method declarations in JS. 633 Tok->setBlockKind(BK_Block); 634 } 635 } else { 636 Tok->setBlockKind(BK_Unknown); 637 } 638 LBraceStack.push_back(Tok); 639 break; 640 case tok::r_brace: 641 if (LBraceStack.empty()) 642 break; 643 if (LBraceStack.back()->is(BK_Unknown)) { 644 bool ProbablyBracedList = false; 645 if (Style.Language == FormatStyle::LK_Proto) { 646 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 647 } else { 648 // Skip NextTok over preprocessor lines, otherwise we may not 649 // properly diagnose the block as a braced intializer 650 // if the comma separator appears after the pp directive. 651 while (NextTok->is(tok::hash)) { 652 ScopedMacroState MacroState(*Line, Tokens, NextTok); 653 do { 654 NextTok = Tokens->getNextToken(); 655 } while (NextTok->isNot(tok::eof)); 656 } 657 658 // Using OriginalColumn to distinguish between ObjC methods and 659 // binary operators is a bit hacky. 660 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 661 NextTok->OriginalColumn == 0; 662 663 // Try to detect a braced list. Note that regardless how we mark inner 664 // braces here, we will overwrite the BlockKind later if we parse a 665 // braced list (where all blocks inside are by default braced lists), 666 // or when we explicitly detect blocks (for example while parsing 667 // lambdas). 668 669 // If we already marked the opening brace as braced list, the closing 670 // must also be part of it. 671 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace); 672 673 ProbablyBracedList = ProbablyBracedList || 674 (Style.isJavaScript() && 675 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 676 Keywords.kw_as)); 677 ProbablyBracedList = ProbablyBracedList || 678 (Style.isCpp() && NextTok->is(tok::l_paren)); 679 680 // If there is a comma, semicolon or right paren after the closing 681 // brace, we assume this is a braced initializer list. 682 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 683 // braced list in JS. 684 ProbablyBracedList = 685 ProbablyBracedList || 686 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 687 tok::r_paren, tok::r_square, tok::l_brace, 688 tok::ellipsis); 689 690 ProbablyBracedList = 691 ProbablyBracedList || 692 (NextTok->is(tok::identifier) && 693 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 694 695 ProbablyBracedList = ProbablyBracedList || 696 (NextTok->is(tok::semi) && 697 (!ExpectClassBody || LBraceStack.size() != 1)); 698 699 ProbablyBracedList = 700 ProbablyBracedList || 701 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 702 703 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 704 // We can have an array subscript after a braced init 705 // list, but C++11 attributes are expected after blocks. 706 NextTok = Tokens->getNextToken(); 707 ProbablyBracedList = NextTok->isNot(tok::l_square); 708 } 709 } 710 if (ProbablyBracedList) { 711 Tok->setBlockKind(BK_BracedInit); 712 LBraceStack.back()->setBlockKind(BK_BracedInit); 713 } else { 714 Tok->setBlockKind(BK_Block); 715 LBraceStack.back()->setBlockKind(BK_Block); 716 } 717 } 718 LBraceStack.pop_back(); 719 break; 720 case tok::identifier: 721 if (!Tok->is(TT_StatementMacro)) 722 break; 723 LLVM_FALLTHROUGH; 724 case tok::at: 725 case tok::semi: 726 case tok::kw_if: 727 case tok::kw_while: 728 case tok::kw_for: 729 case tok::kw_switch: 730 case tok::kw_try: 731 case tok::kw___try: 732 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 733 LBraceStack.back()->setBlockKind(BK_Block); 734 break; 735 default: 736 break; 737 } 738 PrevTok = Tok; 739 Tok = NextTok; 740 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 741 742 // Assume other blocks for all unclosed opening braces. 743 for (FormatToken *LBrace : LBraceStack) 744 if (LBrace->is(BK_Unknown)) 745 LBrace->setBlockKind(BK_Block); 746 747 FormatTok = Tokens->setPosition(StoredPosition); 748 } 749 750 template <class T> 751 static inline void hash_combine(std::size_t &seed, const T &v) { 752 std::hash<T> hasher; 753 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 754 } 755 756 size_t UnwrappedLineParser::computePPHash() const { 757 size_t h = 0; 758 for (const auto &i : PPStack) { 759 hash_combine(h, size_t(i.Kind)); 760 hash_combine(h, i.Line); 761 } 762 return h; 763 } 764 765 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 766 // is not null, subtracts its length (plus the preceding space) when computing 767 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 768 // running the token annotator on it so that we can restore them afterward. 769 bool UnwrappedLineParser::mightFitOnOneLine( 770 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 771 const auto ColumnLimit = Style.ColumnLimit; 772 if (ColumnLimit == 0) 773 return true; 774 775 auto &Tokens = ParsedLine.Tokens; 776 assert(!Tokens.empty()); 777 778 const auto *LastToken = Tokens.back().Tok; 779 assert(LastToken); 780 781 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 782 783 int Index = 0; 784 for (const auto &Token : Tokens) { 785 assert(Token.Tok); 786 auto &SavedToken = SavedTokens[Index++]; 787 SavedToken.Tok = new FormatToken; 788 SavedToken.Tok->copyFrom(*Token.Tok); 789 SavedToken.Children = std::move(Token.Children); 790 } 791 792 AnnotatedLine Line(ParsedLine); 793 assert(Line.Last == LastToken); 794 795 TokenAnnotator Annotator(Style, Keywords); 796 Annotator.annotate(Line); 797 Annotator.calculateFormattingInformation(Line); 798 799 auto Length = LastToken->TotalLength; 800 if (OpeningBrace) { 801 assert(OpeningBrace != Tokens.front().Tok); 802 Length -= OpeningBrace->TokenText.size() + 1; 803 } 804 805 Index = 0; 806 for (auto &Token : Tokens) { 807 const auto &SavedToken = SavedTokens[Index++]; 808 Token.Tok->copyFrom(*SavedToken.Tok); 809 Token.Children = std::move(SavedToken.Children); 810 delete SavedToken.Tok; 811 } 812 813 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 814 } 815 816 UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock( 817 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 818 bool UnindentWhitesmithsBraces, bool CanContainBracedList, 819 TokenType NextLBracesType) { 820 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 821 "'{' or macro block token expected"); 822 FormatToken *Tok = FormatTok; 823 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 824 auto Index = CurrentLines->size(); 825 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 826 FormatTok->setBlockKind(BK_Block); 827 828 // For Whitesmiths mode, jump to the next level prior to skipping over the 829 // braces. 830 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 831 ++Line->Level; 832 833 size_t PPStartHash = computePPHash(); 834 835 unsigned InitialLevel = Line->Level; 836 nextToken(/*LevelDifference=*/AddLevels); 837 838 if (MacroBlock && FormatTok->is(tok::l_paren)) 839 parseParens(); 840 841 size_t NbPreprocessorDirectives = 842 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 843 addUnwrappedLine(); 844 size_t OpeningLineIndex = 845 CurrentLines->empty() 846 ? (UnwrappedLine::kInvalidIndex) 847 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 848 849 // Whitesmiths is weird here. The brace needs to be indented for the namespace 850 // block, but the block itself may not be indented depending on the style 851 // settings. This allows the format to back up one level in those cases. 852 if (UnindentWhitesmithsBraces) 853 --Line->Level; 854 855 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 856 MustBeDeclaration); 857 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 858 Line->Level += AddLevels; 859 860 IfStmtKind IfKind = IfStmtKind::NotIf; 861 const bool SimpleBlock = 862 parseLevel(Tok, CanContainBracedList, &IfKind, NextLBracesType); 863 864 if (eof()) 865 return IfKind; 866 867 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 868 : !FormatTok->is(tok::r_brace)) { 869 Line->Level = InitialLevel; 870 FormatTok->setBlockKind(BK_Block); 871 return IfKind; 872 } 873 874 if (SimpleBlock && !KeepBraces) { 875 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 876 assert(FormatTok->is(tok::r_brace)); 877 const FormatToken *Previous = Tokens->getPreviousToken(); 878 assert(Previous); 879 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 880 assert(!CurrentLines->empty()); 881 const FormatToken *OpeningBrace = Tok; 882 if (!Tok->Previous) { // Wrapped l_brace. 883 if (FollowedByComment) { 884 KeepBraces = true; 885 } else { 886 assert(Index > 0); 887 --Index; // The line above the wrapped l_brace. 888 OpeningBrace = nullptr; 889 } 890 } 891 if (!KeepBraces && mightFitOnOneLine(CurrentLines->back()) && 892 (Tok->is(TT_ElseLBrace) || 893 mightFitOnOneLine((*CurrentLines)[Index], OpeningBrace))) { 894 Tok->MatchingParen = FormatTok; 895 FormatTok->MatchingParen = Tok; 896 } 897 } 898 } 899 900 size_t PPEndHash = computePPHash(); 901 902 // Munch the closing brace. 903 nextToken(/*LevelDifference=*/-AddLevels); 904 905 if (MacroBlock && FormatTok->is(tok::l_paren)) 906 parseParens(); 907 908 if (FormatTok->is(tok::kw_noexcept)) { 909 // A noexcept in a requires expression. 910 nextToken(); 911 } 912 913 if (FormatTok->is(tok::arrow)) { 914 // Following the } or noexcept we can find a trailing return type arrow 915 // as part of an implicit conversion constraint. 916 nextToken(); 917 parseStructuralElement(); 918 } 919 920 if (MunchSemi && FormatTok->is(tok::semi)) 921 nextToken(); 922 923 Line->Level = InitialLevel; 924 925 if (PPStartHash == PPEndHash) { 926 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 927 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 928 // Update the opening line to add the forward reference as well 929 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 930 CurrentLines->size() - 1; 931 } 932 } 933 934 return IfKind; 935 } 936 937 static bool isGoogScope(const UnwrappedLine &Line) { 938 // FIXME: Closure-library specific stuff should not be hard-coded but be 939 // configurable. 940 if (Line.Tokens.size() < 4) 941 return false; 942 auto I = Line.Tokens.begin(); 943 if (I->Tok->TokenText != "goog") 944 return false; 945 ++I; 946 if (I->Tok->isNot(tok::period)) 947 return false; 948 ++I; 949 if (I->Tok->TokenText != "scope") 950 return false; 951 ++I; 952 return I->Tok->is(tok::l_paren); 953 } 954 955 static bool isIIFE(const UnwrappedLine &Line, 956 const AdditionalKeywords &Keywords) { 957 // Look for the start of an immediately invoked anonymous function. 958 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 959 // This is commonly done in JavaScript to create a new, anonymous scope. 960 // Example: (function() { ... })() 961 if (Line.Tokens.size() < 3) 962 return false; 963 auto I = Line.Tokens.begin(); 964 if (I->Tok->isNot(tok::l_paren)) 965 return false; 966 ++I; 967 if (I->Tok->isNot(Keywords.kw_function)) 968 return false; 969 ++I; 970 return I->Tok->is(tok::l_paren); 971 } 972 973 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 974 const FormatToken &InitialToken) { 975 tok::TokenKind Kind = InitialToken.Tok.getKind(); 976 if (InitialToken.is(TT_NamespaceMacro)) 977 Kind = tok::kw_namespace; 978 979 switch (Kind) { 980 case tok::kw_namespace: 981 return Style.BraceWrapping.AfterNamespace; 982 case tok::kw_class: 983 return Style.BraceWrapping.AfterClass; 984 case tok::kw_union: 985 return Style.BraceWrapping.AfterUnion; 986 case tok::kw_struct: 987 return Style.BraceWrapping.AfterStruct; 988 case tok::kw_enum: 989 return Style.BraceWrapping.AfterEnum; 990 default: 991 return false; 992 } 993 } 994 995 void UnwrappedLineParser::parseChildBlock( 996 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 997 assert(FormatTok->is(tok::l_brace)); 998 FormatTok->setBlockKind(BK_Block); 999 const FormatToken *OpeningBrace = FormatTok; 1000 nextToken(); 1001 { 1002 bool SkipIndent = (Style.isJavaScript() && 1003 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 1004 ScopedLineState LineState(*this); 1005 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 1006 /*MustBeDeclaration=*/false); 1007 Line->Level += SkipIndent ? 0 : 1; 1008 parseLevel(OpeningBrace, CanContainBracedList, /*IfKind=*/nullptr, 1009 NextLBracesType); 1010 flushComments(isOnNewLine(*FormatTok)); 1011 Line->Level -= SkipIndent ? 0 : 1; 1012 } 1013 nextToken(); 1014 } 1015 1016 void UnwrappedLineParser::parsePPDirective() { 1017 assert(FormatTok->is(tok::hash) && "'#' expected"); 1018 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1019 1020 nextToken(); 1021 1022 if (!FormatTok->Tok.getIdentifierInfo()) { 1023 parsePPUnknown(); 1024 return; 1025 } 1026 1027 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1028 case tok::pp_define: 1029 parsePPDefine(); 1030 return; 1031 case tok::pp_if: 1032 parsePPIf(/*IfDef=*/false); 1033 break; 1034 case tok::pp_ifdef: 1035 case tok::pp_ifndef: 1036 parsePPIf(/*IfDef=*/true); 1037 break; 1038 case tok::pp_else: 1039 parsePPElse(); 1040 break; 1041 case tok::pp_elifdef: 1042 case tok::pp_elifndef: 1043 case tok::pp_elif: 1044 parsePPElIf(); 1045 break; 1046 case tok::pp_endif: 1047 parsePPEndIf(); 1048 break; 1049 default: 1050 parsePPUnknown(); 1051 break; 1052 } 1053 } 1054 1055 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1056 size_t Line = CurrentLines->size(); 1057 if (CurrentLines == &PreprocessorDirectives) 1058 Line += Lines.size(); 1059 1060 if (Unreachable || 1061 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1062 PPStack.push_back({PP_Unreachable, Line}); 1063 } else { 1064 PPStack.push_back({PP_Conditional, Line}); 1065 } 1066 } 1067 1068 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1069 ++PPBranchLevel; 1070 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1071 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1072 PPLevelBranchIndex.push_back(0); 1073 PPLevelBranchCount.push_back(0); 1074 } 1075 PPChainBranchIndex.push(0); 1076 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1077 conditionalCompilationCondition(Unreachable || Skip); 1078 } 1079 1080 void UnwrappedLineParser::conditionalCompilationAlternative() { 1081 if (!PPStack.empty()) 1082 PPStack.pop_back(); 1083 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1084 if (!PPChainBranchIndex.empty()) 1085 ++PPChainBranchIndex.top(); 1086 conditionalCompilationCondition( 1087 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1088 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1089 } 1090 1091 void UnwrappedLineParser::conditionalCompilationEnd() { 1092 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1093 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1094 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1095 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1096 } 1097 // Guard against #endif's without #if. 1098 if (PPBranchLevel > -1) 1099 --PPBranchLevel; 1100 if (!PPChainBranchIndex.empty()) 1101 PPChainBranchIndex.pop(); 1102 if (!PPStack.empty()) 1103 PPStack.pop_back(); 1104 } 1105 1106 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1107 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1108 nextToken(); 1109 bool Unreachable = false; 1110 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1111 Unreachable = true; 1112 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1113 Unreachable = true; 1114 conditionalCompilationStart(Unreachable); 1115 FormatToken *IfCondition = FormatTok; 1116 // If there's a #ifndef on the first line, and the only lines before it are 1117 // comments, it could be an include guard. 1118 bool MaybeIncludeGuard = IfNDef; 1119 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1120 for (auto &Line : Lines) { 1121 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1122 MaybeIncludeGuard = false; 1123 IncludeGuard = IG_Rejected; 1124 break; 1125 } 1126 } 1127 } 1128 --PPBranchLevel; 1129 parsePPUnknown(); 1130 ++PPBranchLevel; 1131 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1132 IncludeGuard = IG_IfNdefed; 1133 IncludeGuardToken = IfCondition; 1134 } 1135 } 1136 1137 void UnwrappedLineParser::parsePPElse() { 1138 // If a potential include guard has an #else, it's not an include guard. 1139 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1140 IncludeGuard = IG_Rejected; 1141 conditionalCompilationAlternative(); 1142 if (PPBranchLevel > -1) 1143 --PPBranchLevel; 1144 parsePPUnknown(); 1145 ++PPBranchLevel; 1146 } 1147 1148 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1149 1150 void UnwrappedLineParser::parsePPEndIf() { 1151 conditionalCompilationEnd(); 1152 parsePPUnknown(); 1153 // If the #endif of a potential include guard is the last thing in the file, 1154 // then we found an include guard. 1155 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1156 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1157 IncludeGuard = IG_Found; 1158 } 1159 } 1160 1161 void UnwrappedLineParser::parsePPDefine() { 1162 nextToken(); 1163 1164 if (!FormatTok->Tok.getIdentifierInfo()) { 1165 IncludeGuard = IG_Rejected; 1166 IncludeGuardToken = nullptr; 1167 parsePPUnknown(); 1168 return; 1169 } 1170 1171 if (IncludeGuard == IG_IfNdefed && 1172 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1173 IncludeGuard = IG_Defined; 1174 IncludeGuardToken = nullptr; 1175 for (auto &Line : Lines) { 1176 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1177 IncludeGuard = IG_Rejected; 1178 break; 1179 } 1180 } 1181 } 1182 1183 // In the context of a define, even keywords should be treated as normal 1184 // identifiers. Setting the kind to identifier is not enough, because we need 1185 // to treat additional keywords like __except as well, which are already 1186 // identifiers. Setting the identifier info to null interferes with include 1187 // guard processing above, and changes preprocessing nesting. 1188 FormatTok->Tok.setKind(tok::identifier); 1189 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1190 nextToken(); 1191 if (FormatTok->Tok.getKind() == tok::l_paren && 1192 !FormatTok->hasWhitespaceBefore()) { 1193 parseParens(); 1194 } 1195 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1196 Line->Level += PPBranchLevel + 1; 1197 addUnwrappedLine(); 1198 ++Line->Level; 1199 1200 // Errors during a preprocessor directive can only affect the layout of the 1201 // preprocessor directive, and thus we ignore them. An alternative approach 1202 // would be to use the same approach we use on the file level (no 1203 // re-indentation if there was a structural error) within the macro 1204 // definition. 1205 parseFile(); 1206 } 1207 1208 void UnwrappedLineParser::parsePPUnknown() { 1209 do { 1210 nextToken(); 1211 } while (!eof()); 1212 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1213 Line->Level += PPBranchLevel + 1; 1214 addUnwrappedLine(); 1215 } 1216 1217 // Here we exclude certain tokens that are not usually the first token in an 1218 // unwrapped line. This is used in attempt to distinguish macro calls without 1219 // trailing semicolons from other constructs split to several lines. 1220 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1221 // Semicolon can be a null-statement, l_square can be a start of a macro or 1222 // a C++11 attribute, but this doesn't seem to be common. 1223 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1224 Tok.isNot(TT_AttributeSquare) && 1225 // Tokens that can only be used as binary operators and a part of 1226 // overloaded operator names. 1227 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1228 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1229 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1230 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1231 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1232 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1233 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1234 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1235 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1236 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1237 Tok.isNot(tok::lesslessequal) && 1238 // Colon is used in labels, base class lists, initializer lists, 1239 // range-based for loops, ternary operator, but should never be the 1240 // first token in an unwrapped line. 1241 Tok.isNot(tok::colon) && 1242 // 'noexcept' is a trailing annotation. 1243 Tok.isNot(tok::kw_noexcept); 1244 } 1245 1246 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1247 const FormatToken *FormatTok) { 1248 // FIXME: This returns true for C/C++ keywords like 'struct'. 1249 return FormatTok->is(tok::identifier) && 1250 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1251 !FormatTok->isOneOf( 1252 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1253 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1254 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1255 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1256 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1257 Keywords.kw_instanceof, Keywords.kw_interface, 1258 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1259 } 1260 1261 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1262 const FormatToken *FormatTok) { 1263 return FormatTok->Tok.isLiteral() || 1264 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1265 mustBeJSIdent(Keywords, FormatTok); 1266 } 1267 1268 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1269 // when encountered after a value (see mustBeJSIdentOrValue). 1270 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1271 const FormatToken *FormatTok) { 1272 return FormatTok->isOneOf( 1273 tok::kw_return, Keywords.kw_yield, 1274 // conditionals 1275 tok::kw_if, tok::kw_else, 1276 // loops 1277 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1278 // switch/case 1279 tok::kw_switch, tok::kw_case, 1280 // exceptions 1281 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1282 // declaration 1283 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1284 Keywords.kw_async, Keywords.kw_function, 1285 // import/export 1286 Keywords.kw_import, tok::kw_export); 1287 } 1288 1289 // Checks whether a token is a type in K&R C (aka C78). 1290 static bool isC78Type(const FormatToken &Tok) { 1291 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1292 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1293 tok::identifier); 1294 } 1295 1296 // This function checks whether a token starts the first parameter declaration 1297 // in a K&R C (aka C78) function definition, e.g.: 1298 // int f(a, b) 1299 // short a, b; 1300 // { 1301 // return a + b; 1302 // } 1303 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1304 const FormatToken *FuncName) { 1305 assert(Tok); 1306 assert(Next); 1307 assert(FuncName); 1308 1309 if (FuncName->isNot(tok::identifier)) 1310 return false; 1311 1312 const FormatToken *Prev = FuncName->Previous; 1313 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1314 return false; 1315 1316 if (!isC78Type(*Tok) && 1317 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1318 return false; 1319 } 1320 1321 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1322 return false; 1323 1324 Tok = Tok->Previous; 1325 if (!Tok || Tok->isNot(tok::r_paren)) 1326 return false; 1327 1328 Tok = Tok->Previous; 1329 if (!Tok || Tok->isNot(tok::identifier)) 1330 return false; 1331 1332 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1333 } 1334 1335 void UnwrappedLineParser::parseModuleImport() { 1336 nextToken(); 1337 while (!eof()) { 1338 if (FormatTok->is(tok::colon)) { 1339 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1340 } 1341 // Handle import <foo/bar.h> as we would an include statement. 1342 else if (FormatTok->is(tok::less)) { 1343 nextToken(); 1344 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1345 // Mark tokens up to the trailing line comments as implicit string 1346 // literals. 1347 if (FormatTok->isNot(tok::comment) && 1348 !FormatTok->TokenText.startswith("//")) { 1349 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1350 } 1351 nextToken(); 1352 } 1353 } 1354 if (FormatTok->is(tok::semi)) { 1355 nextToken(); 1356 break; 1357 } 1358 nextToken(); 1359 } 1360 1361 addUnwrappedLine(); 1362 } 1363 1364 // readTokenWithJavaScriptASI reads the next token and terminates the current 1365 // line if JavaScript Automatic Semicolon Insertion must 1366 // happen between the current token and the next token. 1367 // 1368 // This method is conservative - it cannot cover all edge cases of JavaScript, 1369 // but only aims to correctly handle certain well known cases. It *must not* 1370 // return true in speculative cases. 1371 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1372 FormatToken *Previous = FormatTok; 1373 readToken(); 1374 FormatToken *Next = FormatTok; 1375 1376 bool IsOnSameLine = 1377 CommentsBeforeNextToken.empty() 1378 ? Next->NewlinesBefore == 0 1379 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1380 if (IsOnSameLine) 1381 return; 1382 1383 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1384 bool PreviousStartsTemplateExpr = 1385 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1386 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1387 // If the line contains an '@' sign, the previous token might be an 1388 // annotation, which can precede another identifier/value. 1389 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1390 return LineNode.Tok->is(tok::at); 1391 }); 1392 if (HasAt) 1393 return; 1394 } 1395 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1396 return addUnwrappedLine(); 1397 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1398 bool NextEndsTemplateExpr = 1399 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1400 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1401 (PreviousMustBeValue || 1402 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1403 tok::minusminus))) { 1404 return addUnwrappedLine(); 1405 } 1406 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1407 isJSDeclOrStmt(Keywords, Next)) { 1408 return addUnwrappedLine(); 1409 } 1410 } 1411 1412 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1413 bool IsTopLevel, 1414 TokenType NextLBracesType, 1415 bool *HasLabel) { 1416 if (Style.Language == FormatStyle::LK_TableGen && 1417 FormatTok->is(tok::pp_include)) { 1418 nextToken(); 1419 if (FormatTok->is(tok::string_literal)) 1420 nextToken(); 1421 addUnwrappedLine(); 1422 return; 1423 } 1424 switch (FormatTok->Tok.getKind()) { 1425 case tok::kw_asm: 1426 nextToken(); 1427 if (FormatTok->is(tok::l_brace)) { 1428 FormatTok->setFinalizedType(TT_InlineASMBrace); 1429 nextToken(); 1430 while (FormatTok && FormatTok->isNot(tok::eof)) { 1431 if (FormatTok->is(tok::r_brace)) { 1432 FormatTok->setFinalizedType(TT_InlineASMBrace); 1433 nextToken(); 1434 addUnwrappedLine(); 1435 break; 1436 } 1437 FormatTok->Finalized = true; 1438 nextToken(); 1439 } 1440 } 1441 break; 1442 case tok::kw_namespace: 1443 parseNamespace(); 1444 return; 1445 case tok::kw_public: 1446 case tok::kw_protected: 1447 case tok::kw_private: 1448 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1449 Style.isCSharp()) { 1450 nextToken(); 1451 } else { 1452 parseAccessSpecifier(); 1453 } 1454 return; 1455 case tok::kw_if: 1456 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1457 // field/method declaration. 1458 break; 1459 } 1460 parseIfThenElse(IfKind); 1461 return; 1462 case tok::kw_for: 1463 case tok::kw_while: 1464 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1465 // field/method declaration. 1466 break; 1467 } 1468 parseForOrWhileLoop(); 1469 return; 1470 case tok::kw_do: 1471 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1472 // field/method declaration. 1473 break; 1474 } 1475 parseDoWhile(); 1476 return; 1477 case tok::kw_switch: 1478 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1479 // 'switch: string' field declaration. 1480 break; 1481 } 1482 parseSwitch(); 1483 return; 1484 case tok::kw_default: 1485 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1486 // 'default: string' field declaration. 1487 break; 1488 } 1489 nextToken(); 1490 if (FormatTok->is(tok::colon)) { 1491 parseLabel(); 1492 return; 1493 } 1494 // e.g. "default void f() {}" in a Java interface. 1495 break; 1496 case tok::kw_case: 1497 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1498 // 'case: string' field declaration. 1499 nextToken(); 1500 break; 1501 } 1502 parseCaseLabel(); 1503 return; 1504 case tok::kw_try: 1505 case tok::kw___try: 1506 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1507 // field/method declaration. 1508 break; 1509 } 1510 parseTryCatch(); 1511 return; 1512 case tok::kw_extern: 1513 nextToken(); 1514 if (FormatTok->is(tok::string_literal)) { 1515 nextToken(); 1516 if (FormatTok->is(tok::l_brace)) { 1517 if (Style.BraceWrapping.AfterExternBlock) 1518 addUnwrappedLine(); 1519 // Either we indent or for backwards compatibility we follow the 1520 // AfterExternBlock style. 1521 unsigned AddLevels = 1522 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1523 (Style.BraceWrapping.AfterExternBlock && 1524 Style.IndentExternBlock == 1525 FormatStyle::IEBS_AfterExternBlock) 1526 ? 1u 1527 : 0u; 1528 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1529 addUnwrappedLine(); 1530 return; 1531 } 1532 } 1533 break; 1534 case tok::kw_export: 1535 if (Style.isJavaScript()) { 1536 parseJavaScriptEs6ImportExport(); 1537 return; 1538 } 1539 if (!Style.isCpp()) 1540 break; 1541 // Handle C++ "(inline|export) namespace". 1542 LLVM_FALLTHROUGH; 1543 case tok::kw_inline: 1544 nextToken(); 1545 if (FormatTok->is(tok::kw_namespace)) { 1546 parseNamespace(); 1547 return; 1548 } 1549 break; 1550 case tok::identifier: 1551 if (FormatTok->is(TT_ForEachMacro)) { 1552 parseForOrWhileLoop(); 1553 return; 1554 } 1555 if (FormatTok->is(TT_MacroBlockBegin)) { 1556 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1557 /*MunchSemi=*/false); 1558 return; 1559 } 1560 if (FormatTok->is(Keywords.kw_import)) { 1561 if (Style.isJavaScript()) { 1562 parseJavaScriptEs6ImportExport(); 1563 return; 1564 } 1565 if (Style.Language == FormatStyle::LK_Proto) { 1566 nextToken(); 1567 if (FormatTok->is(tok::kw_public)) 1568 nextToken(); 1569 if (!FormatTok->is(tok::string_literal)) 1570 return; 1571 nextToken(); 1572 if (FormatTok->is(tok::semi)) 1573 nextToken(); 1574 addUnwrappedLine(); 1575 return; 1576 } 1577 if (Style.isCpp()) { 1578 parseModuleImport(); 1579 return; 1580 } 1581 } 1582 if (Style.isCpp() && 1583 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1584 Keywords.kw_slots, Keywords.kw_qslots)) { 1585 nextToken(); 1586 if (FormatTok->is(tok::colon)) { 1587 nextToken(); 1588 addUnwrappedLine(); 1589 return; 1590 } 1591 } 1592 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1593 parseStatementMacro(); 1594 return; 1595 } 1596 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1597 parseNamespace(); 1598 return; 1599 } 1600 // In all other cases, parse the declaration. 1601 break; 1602 default: 1603 break; 1604 } 1605 do { 1606 const FormatToken *Previous = FormatTok->Previous; 1607 switch (FormatTok->Tok.getKind()) { 1608 case tok::at: 1609 nextToken(); 1610 if (FormatTok->is(tok::l_brace)) { 1611 nextToken(); 1612 parseBracedList(); 1613 break; 1614 } else if (Style.Language == FormatStyle::LK_Java && 1615 FormatTok->is(Keywords.kw_interface)) { 1616 nextToken(); 1617 break; 1618 } 1619 switch (FormatTok->Tok.getObjCKeywordID()) { 1620 case tok::objc_public: 1621 case tok::objc_protected: 1622 case tok::objc_package: 1623 case tok::objc_private: 1624 return parseAccessSpecifier(); 1625 case tok::objc_interface: 1626 case tok::objc_implementation: 1627 return parseObjCInterfaceOrImplementation(); 1628 case tok::objc_protocol: 1629 if (parseObjCProtocol()) 1630 return; 1631 break; 1632 case tok::objc_end: 1633 return; // Handled by the caller. 1634 case tok::objc_optional: 1635 case tok::objc_required: 1636 nextToken(); 1637 addUnwrappedLine(); 1638 return; 1639 case tok::objc_autoreleasepool: 1640 nextToken(); 1641 if (FormatTok->is(tok::l_brace)) { 1642 if (Style.BraceWrapping.AfterControlStatement == 1643 FormatStyle::BWACS_Always) { 1644 addUnwrappedLine(); 1645 } 1646 parseBlock(); 1647 } 1648 addUnwrappedLine(); 1649 return; 1650 case tok::objc_synchronized: 1651 nextToken(); 1652 if (FormatTok->is(tok::l_paren)) { 1653 // Skip synchronization object 1654 parseParens(); 1655 } 1656 if (FormatTok->is(tok::l_brace)) { 1657 if (Style.BraceWrapping.AfterControlStatement == 1658 FormatStyle::BWACS_Always) { 1659 addUnwrappedLine(); 1660 } 1661 parseBlock(); 1662 } 1663 addUnwrappedLine(); 1664 return; 1665 case tok::objc_try: 1666 // This branch isn't strictly necessary (the kw_try case below would 1667 // do this too after the tok::at is parsed above). But be explicit. 1668 parseTryCatch(); 1669 return; 1670 default: 1671 break; 1672 } 1673 break; 1674 case tok::kw_concept: 1675 parseConcept(); 1676 return; 1677 case tok::kw_requires: { 1678 if (Style.isCpp()) { 1679 bool ParsedClause = parseRequires(); 1680 if (ParsedClause) 1681 return; 1682 } else { 1683 nextToken(); 1684 } 1685 break; 1686 } 1687 case tok::kw_enum: 1688 // Ignore if this is part of "template <enum ...". 1689 if (Previous && Previous->is(tok::less)) { 1690 nextToken(); 1691 break; 1692 } 1693 1694 // parseEnum falls through and does not yet add an unwrapped line as an 1695 // enum definition can start a structural element. 1696 if (!parseEnum()) 1697 break; 1698 // This only applies for C++. 1699 if (!Style.isCpp()) { 1700 addUnwrappedLine(); 1701 return; 1702 } 1703 break; 1704 case tok::kw_typedef: 1705 nextToken(); 1706 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1707 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1708 Keywords.kw_CF_CLOSED_ENUM, 1709 Keywords.kw_NS_CLOSED_ENUM)) { 1710 parseEnum(); 1711 } 1712 break; 1713 case tok::kw_struct: 1714 case tok::kw_union: 1715 case tok::kw_class: 1716 if (parseStructLike()) 1717 return; 1718 break; 1719 case tok::period: 1720 nextToken(); 1721 // In Java, classes have an implicit static member "class". 1722 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1723 FormatTok->is(tok::kw_class)) { 1724 nextToken(); 1725 } 1726 if (Style.isJavaScript() && FormatTok && 1727 FormatTok->Tok.getIdentifierInfo()) { 1728 // JavaScript only has pseudo keywords, all keywords are allowed to 1729 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1730 nextToken(); 1731 } 1732 break; 1733 case tok::semi: 1734 nextToken(); 1735 addUnwrappedLine(); 1736 return; 1737 case tok::r_brace: 1738 addUnwrappedLine(); 1739 return; 1740 case tok::l_paren: { 1741 parseParens(); 1742 // Break the unwrapped line if a K&R C function definition has a parameter 1743 // declaration. 1744 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1745 break; 1746 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1747 addUnwrappedLine(); 1748 return; 1749 } 1750 break; 1751 } 1752 case tok::kw_operator: 1753 nextToken(); 1754 if (FormatTok->isBinaryOperator()) 1755 nextToken(); 1756 break; 1757 case tok::caret: 1758 nextToken(); 1759 if (FormatTok->Tok.isAnyIdentifier() || 1760 FormatTok->isSimpleTypeSpecifier()) { 1761 nextToken(); 1762 } 1763 if (FormatTok->is(tok::l_paren)) 1764 parseParens(); 1765 if (FormatTok->is(tok::l_brace)) 1766 parseChildBlock(); 1767 break; 1768 case tok::l_brace: 1769 if (NextLBracesType != TT_Unknown) 1770 FormatTok->setFinalizedType(NextLBracesType); 1771 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1772 // A block outside of parentheses must be the last part of a 1773 // structural element. 1774 // FIXME: Figure out cases where this is not true, and add projections 1775 // for them (the one we know is missing are lambdas). 1776 if (Style.Language == FormatStyle::LK_Java && 1777 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1778 // If necessary, we could set the type to something different than 1779 // TT_FunctionLBrace. 1780 if (Style.BraceWrapping.AfterControlStatement == 1781 FormatStyle::BWACS_Always) { 1782 addUnwrappedLine(); 1783 } 1784 } else if (Style.BraceWrapping.AfterFunction) { 1785 addUnwrappedLine(); 1786 } 1787 if (!Line->InPPDirective) 1788 FormatTok->setFinalizedType(TT_FunctionLBrace); 1789 parseBlock(); 1790 addUnwrappedLine(); 1791 return; 1792 } 1793 // Otherwise this was a braced init list, and the structural 1794 // element continues. 1795 break; 1796 case tok::kw_try: 1797 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1798 // field/method declaration. 1799 nextToken(); 1800 break; 1801 } 1802 // We arrive here when parsing function-try blocks. 1803 if (Style.BraceWrapping.AfterFunction) 1804 addUnwrappedLine(); 1805 parseTryCatch(); 1806 return; 1807 case tok::identifier: { 1808 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1809 Line->MustBeDeclaration) { 1810 addUnwrappedLine(); 1811 parseCSharpGenericTypeConstraint(); 1812 break; 1813 } 1814 if (FormatTok->is(TT_MacroBlockEnd)) { 1815 addUnwrappedLine(); 1816 return; 1817 } 1818 1819 // Function declarations (as opposed to function expressions) are parsed 1820 // on their own unwrapped line by continuing this loop. Function 1821 // expressions (functions that are not on their own line) must not create 1822 // a new unwrapped line, so they are special cased below. 1823 size_t TokenCount = Line->Tokens.size(); 1824 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1825 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1826 Keywords.kw_async)))) { 1827 tryToParseJSFunction(); 1828 break; 1829 } 1830 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1831 FormatTok->is(Keywords.kw_interface)) { 1832 if (Style.isJavaScript()) { 1833 // In JavaScript/TypeScript, "interface" can be used as a standalone 1834 // identifier, e.g. in `var interface = 1;`. If "interface" is 1835 // followed by another identifier, it is very like to be an actual 1836 // interface declaration. 1837 unsigned StoredPosition = Tokens->getPosition(); 1838 FormatToken *Next = Tokens->getNextToken(); 1839 FormatTok = Tokens->setPosition(StoredPosition); 1840 if (!mustBeJSIdent(Keywords, Next)) { 1841 nextToken(); 1842 break; 1843 } 1844 } 1845 parseRecord(); 1846 addUnwrappedLine(); 1847 return; 1848 } 1849 1850 if (FormatTok->is(Keywords.kw_interface)) { 1851 if (parseStructLike()) 1852 return; 1853 break; 1854 } 1855 1856 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1857 parseStatementMacro(); 1858 return; 1859 } 1860 1861 // See if the following token should start a new unwrapped line. 1862 StringRef Text = FormatTok->TokenText; 1863 1864 FormatToken *PreviousToken = FormatTok; 1865 nextToken(); 1866 1867 // JS doesn't have macros, and within classes colons indicate fields, not 1868 // labels. 1869 if (Style.isJavaScript()) 1870 break; 1871 1872 TokenCount = Line->Tokens.size(); 1873 if (TokenCount == 1 || 1874 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1875 if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) { 1876 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1877 parseLabel(!Style.IndentGotoLabels); 1878 if (HasLabel) 1879 *HasLabel = true; 1880 return; 1881 } 1882 // Recognize function-like macro usages without trailing semicolon as 1883 // well as free-standing macros like Q_OBJECT. 1884 bool FunctionLike = FormatTok->is(tok::l_paren); 1885 if (FunctionLike) 1886 parseParens(); 1887 1888 bool FollowedByNewline = 1889 CommentsBeforeNextToken.empty() 1890 ? FormatTok->NewlinesBefore > 0 1891 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1892 1893 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1894 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1895 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 1896 addUnwrappedLine(); 1897 return; 1898 } 1899 } 1900 break; 1901 } 1902 case tok::equal: 1903 if ((Style.isJavaScript() || Style.isCSharp()) && 1904 FormatTok->is(TT_FatArrow)) { 1905 tryToParseChildBlock(); 1906 break; 1907 } 1908 1909 nextToken(); 1910 if (FormatTok->is(tok::l_brace)) { 1911 // Block kind should probably be set to BK_BracedInit for any language. 1912 // C# needs this change to ensure that array initialisers and object 1913 // initialisers are indented the same way. 1914 if (Style.isCSharp()) 1915 FormatTok->setBlockKind(BK_BracedInit); 1916 nextToken(); 1917 parseBracedList(); 1918 } else if (Style.Language == FormatStyle::LK_Proto && 1919 FormatTok->is(tok::less)) { 1920 nextToken(); 1921 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1922 /*ClosingBraceKind=*/tok::greater); 1923 } 1924 break; 1925 case tok::l_square: 1926 parseSquare(); 1927 break; 1928 case tok::kw_new: 1929 parseNew(); 1930 break; 1931 case tok::kw_case: 1932 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1933 // 'case: string' field declaration. 1934 nextToken(); 1935 break; 1936 } 1937 parseCaseLabel(); 1938 break; 1939 default: 1940 nextToken(); 1941 break; 1942 } 1943 } while (!eof()); 1944 } 1945 1946 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1947 assert(FormatTok->is(tok::l_brace)); 1948 if (!Style.isCSharp()) 1949 return false; 1950 // See if it's a property accessor. 1951 if (FormatTok->Previous->isNot(tok::identifier)) 1952 return false; 1953 1954 // See if we are inside a property accessor. 1955 // 1956 // Record the current tokenPosition so that we can advance and 1957 // reset the current token. `Next` is not set yet so we need 1958 // another way to advance along the token stream. 1959 unsigned int StoredPosition = Tokens->getPosition(); 1960 FormatToken *Tok = Tokens->getNextToken(); 1961 1962 // A trivial property accessor is of the form: 1963 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 1964 // Track these as they do not require line breaks to be introduced. 1965 bool HasSpecialAccessor = false; 1966 bool IsTrivialPropertyAccessor = true; 1967 while (!eof()) { 1968 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1969 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1970 Keywords.kw_init, Keywords.kw_set)) { 1971 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 1972 HasSpecialAccessor = true; 1973 Tok = Tokens->getNextToken(); 1974 continue; 1975 } 1976 if (Tok->isNot(tok::r_brace)) 1977 IsTrivialPropertyAccessor = false; 1978 break; 1979 } 1980 1981 if (!HasSpecialAccessor) { 1982 Tokens->setPosition(StoredPosition); 1983 return false; 1984 } 1985 1986 // Try to parse the property accessor: 1987 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1988 Tokens->setPosition(StoredPosition); 1989 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1990 addUnwrappedLine(); 1991 nextToken(); 1992 do { 1993 switch (FormatTok->Tok.getKind()) { 1994 case tok::r_brace: 1995 nextToken(); 1996 if (FormatTok->is(tok::equal)) { 1997 while (!eof() && FormatTok->isNot(tok::semi)) 1998 nextToken(); 1999 nextToken(); 2000 } 2001 addUnwrappedLine(); 2002 return true; 2003 case tok::l_brace: 2004 ++Line->Level; 2005 parseBlock(/*MustBeDeclaration=*/true); 2006 addUnwrappedLine(); 2007 --Line->Level; 2008 break; 2009 case tok::equal: 2010 if (FormatTok->is(TT_FatArrow)) { 2011 ++Line->Level; 2012 do { 2013 nextToken(); 2014 } while (!eof() && FormatTok->isNot(tok::semi)); 2015 nextToken(); 2016 addUnwrappedLine(); 2017 --Line->Level; 2018 break; 2019 } 2020 nextToken(); 2021 break; 2022 default: 2023 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2024 Keywords.kw_set) && 2025 !IsTrivialPropertyAccessor) { 2026 // Non-trivial get/set needs to be on its own line. 2027 addUnwrappedLine(); 2028 } 2029 nextToken(); 2030 } 2031 } while (!eof()); 2032 2033 // Unreachable for well-formed code (paired '{' and '}'). 2034 return true; 2035 } 2036 2037 bool UnwrappedLineParser::tryToParseLambda() { 2038 assert(FormatTok->is(tok::l_square)); 2039 if (!Style.isCpp()) { 2040 nextToken(); 2041 return false; 2042 } 2043 FormatToken &LSquare = *FormatTok; 2044 if (!tryToParseLambdaIntroducer()) 2045 return false; 2046 2047 bool SeenArrow = false; 2048 bool InTemplateParameterList = false; 2049 2050 while (FormatTok->isNot(tok::l_brace)) { 2051 if (FormatTok->isSimpleTypeSpecifier()) { 2052 nextToken(); 2053 continue; 2054 } 2055 switch (FormatTok->Tok.getKind()) { 2056 case tok::l_brace: 2057 break; 2058 case tok::l_paren: 2059 parseParens(); 2060 break; 2061 case tok::l_square: 2062 parseSquare(); 2063 break; 2064 case tok::kw_class: 2065 case tok::kw_template: 2066 case tok::kw_typename: 2067 assert(FormatTok->Previous); 2068 if (FormatTok->Previous->is(tok::less)) 2069 InTemplateParameterList = true; 2070 nextToken(); 2071 break; 2072 case tok::amp: 2073 case tok::star: 2074 case tok::kw_const: 2075 case tok::comma: 2076 case tok::less: 2077 case tok::greater: 2078 case tok::identifier: 2079 case tok::numeric_constant: 2080 case tok::coloncolon: 2081 case tok::kw_mutable: 2082 case tok::kw_noexcept: 2083 nextToken(); 2084 break; 2085 // Specialization of a template with an integer parameter can contain 2086 // arithmetic, logical, comparison and ternary operators. 2087 // 2088 // FIXME: This also accepts sequences of operators that are not in the scope 2089 // of a template argument list. 2090 // 2091 // In a C++ lambda a template type can only occur after an arrow. We use 2092 // this as an heuristic to distinguish between Objective-C expressions 2093 // followed by an `a->b` expression, such as: 2094 // ([obj func:arg] + a->b) 2095 // Otherwise the code below would parse as a lambda. 2096 // 2097 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2098 // explicit template lists: []<bool b = true && false>(U &&u){} 2099 case tok::plus: 2100 case tok::minus: 2101 case tok::exclaim: 2102 case tok::tilde: 2103 case tok::slash: 2104 case tok::percent: 2105 case tok::lessless: 2106 case tok::pipe: 2107 case tok::pipepipe: 2108 case tok::ampamp: 2109 case tok::caret: 2110 case tok::equalequal: 2111 case tok::exclaimequal: 2112 case tok::greaterequal: 2113 case tok::lessequal: 2114 case tok::question: 2115 case tok::colon: 2116 case tok::ellipsis: 2117 case tok::kw_true: 2118 case tok::kw_false: 2119 if (SeenArrow || InTemplateParameterList) { 2120 nextToken(); 2121 break; 2122 } 2123 return true; 2124 case tok::arrow: 2125 // This might or might not actually be a lambda arrow (this could be an 2126 // ObjC method invocation followed by a dereferencing arrow). We might 2127 // reset this back to TT_Unknown in TokenAnnotator. 2128 FormatTok->setFinalizedType(TT_LambdaArrow); 2129 SeenArrow = true; 2130 nextToken(); 2131 break; 2132 default: 2133 return true; 2134 } 2135 } 2136 FormatTok->setFinalizedType(TT_LambdaLBrace); 2137 LSquare.setFinalizedType(TT_LambdaLSquare); 2138 parseChildBlock(); 2139 return true; 2140 } 2141 2142 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2143 const FormatToken *Previous = FormatTok->Previous; 2144 const FormatToken *LeftSquare = FormatTok; 2145 nextToken(); 2146 if (Previous && 2147 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 2148 tok::kw_delete, tok::l_square) || 2149 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() || 2150 Previous->isSimpleTypeSpecifier())) { 2151 return false; 2152 } 2153 if (FormatTok->is(tok::l_square)) 2154 return false; 2155 if (FormatTok->is(tok::r_square)) { 2156 const FormatToken *Next = Tokens->peekNextToken(); 2157 if (Next->is(tok::greater)) 2158 return false; 2159 } 2160 parseSquare(/*LambdaIntroducer=*/true); 2161 return true; 2162 } 2163 2164 void UnwrappedLineParser::tryToParseJSFunction() { 2165 assert(FormatTok->is(Keywords.kw_function) || 2166 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2167 if (FormatTok->is(Keywords.kw_async)) 2168 nextToken(); 2169 // Consume "function". 2170 nextToken(); 2171 2172 // Consume * (generator function). Treat it like C++'s overloaded operators. 2173 if (FormatTok->is(tok::star)) { 2174 FormatTok->setFinalizedType(TT_OverloadedOperator); 2175 nextToken(); 2176 } 2177 2178 // Consume function name. 2179 if (FormatTok->is(tok::identifier)) 2180 nextToken(); 2181 2182 if (FormatTok->isNot(tok::l_paren)) 2183 return; 2184 2185 // Parse formal parameter list. 2186 parseParens(); 2187 2188 if (FormatTok->is(tok::colon)) { 2189 // Parse a type definition. 2190 nextToken(); 2191 2192 // Eat the type declaration. For braced inline object types, balance braces, 2193 // otherwise just parse until finding an l_brace for the function body. 2194 if (FormatTok->is(tok::l_brace)) 2195 tryToParseBracedList(); 2196 else 2197 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2198 nextToken(); 2199 } 2200 2201 if (FormatTok->is(tok::semi)) 2202 return; 2203 2204 parseChildBlock(); 2205 } 2206 2207 bool UnwrappedLineParser::tryToParseBracedList() { 2208 if (FormatTok->is(BK_Unknown)) 2209 calculateBraceTypes(); 2210 assert(FormatTok->isNot(BK_Unknown)); 2211 if (FormatTok->is(BK_Block)) 2212 return false; 2213 nextToken(); 2214 parseBracedList(); 2215 return true; 2216 } 2217 2218 bool UnwrappedLineParser::tryToParseChildBlock() { 2219 assert(Style.isJavaScript() || Style.isCSharp()); 2220 assert(FormatTok->is(TT_FatArrow)); 2221 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2222 // They always start an expression or a child block if followed by a curly 2223 // brace. 2224 nextToken(); 2225 if (FormatTok->isNot(tok::l_brace)) 2226 return false; 2227 parseChildBlock(); 2228 return true; 2229 } 2230 2231 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2232 bool IsEnum, 2233 tok::TokenKind ClosingBraceKind) { 2234 bool HasError = false; 2235 2236 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2237 // replace this by using parseAssignmentExpression() inside. 2238 do { 2239 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2240 tryToParseChildBlock()) { 2241 continue; 2242 } 2243 if (Style.isJavaScript()) { 2244 if (FormatTok->is(Keywords.kw_function) || 2245 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2246 tryToParseJSFunction(); 2247 continue; 2248 } 2249 if (FormatTok->is(tok::l_brace)) { 2250 // Could be a method inside of a braced list `{a() { return 1; }}`. 2251 if (tryToParseBracedList()) 2252 continue; 2253 parseChildBlock(); 2254 } 2255 } 2256 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2257 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2258 addUnwrappedLine(); 2259 nextToken(); 2260 return !HasError; 2261 } 2262 switch (FormatTok->Tok.getKind()) { 2263 case tok::l_square: 2264 if (Style.isCSharp()) 2265 parseSquare(); 2266 else 2267 tryToParseLambda(); 2268 break; 2269 case tok::l_paren: 2270 parseParens(); 2271 // JavaScript can just have free standing methods and getters/setters in 2272 // object literals. Detect them by a "{" following ")". 2273 if (Style.isJavaScript()) { 2274 if (FormatTok->is(tok::l_brace)) 2275 parseChildBlock(); 2276 break; 2277 } 2278 break; 2279 case tok::l_brace: 2280 // Assume there are no blocks inside a braced init list apart 2281 // from the ones we explicitly parse out (like lambdas). 2282 FormatTok->setBlockKind(BK_BracedInit); 2283 nextToken(); 2284 parseBracedList(); 2285 break; 2286 case tok::less: 2287 if (Style.Language == FormatStyle::LK_Proto || 2288 ClosingBraceKind == tok::greater) { 2289 nextToken(); 2290 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2291 /*ClosingBraceKind=*/tok::greater); 2292 } else { 2293 nextToken(); 2294 } 2295 break; 2296 case tok::semi: 2297 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2298 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2299 // used for error recovery if we have otherwise determined that this is 2300 // a braced list. 2301 if (Style.isJavaScript()) { 2302 nextToken(); 2303 break; 2304 } 2305 HasError = true; 2306 if (!ContinueOnSemicolons) 2307 return !HasError; 2308 nextToken(); 2309 break; 2310 case tok::comma: 2311 nextToken(); 2312 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2313 addUnwrappedLine(); 2314 break; 2315 default: 2316 nextToken(); 2317 break; 2318 } 2319 } while (!eof()); 2320 return false; 2321 } 2322 2323 /// \brief Parses a pair of parentheses (and everything between them). 2324 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2325 /// double ampersands. This only counts for the current parens scope. 2326 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2327 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2328 nextToken(); 2329 do { 2330 switch (FormatTok->Tok.getKind()) { 2331 case tok::l_paren: 2332 parseParens(); 2333 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2334 parseChildBlock(); 2335 break; 2336 case tok::r_paren: 2337 nextToken(); 2338 return; 2339 case tok::r_brace: 2340 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2341 return; 2342 case tok::l_square: 2343 tryToParseLambda(); 2344 break; 2345 case tok::l_brace: 2346 if (!tryToParseBracedList()) 2347 parseChildBlock(); 2348 break; 2349 case tok::at: 2350 nextToken(); 2351 if (FormatTok->is(tok::l_brace)) { 2352 nextToken(); 2353 parseBracedList(); 2354 } 2355 break; 2356 case tok::equal: 2357 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2358 tryToParseChildBlock(); 2359 else 2360 nextToken(); 2361 break; 2362 case tok::kw_class: 2363 if (Style.isJavaScript()) 2364 parseRecord(/*ParseAsExpr=*/true); 2365 else 2366 nextToken(); 2367 break; 2368 case tok::identifier: 2369 if (Style.isJavaScript() && 2370 (FormatTok->is(Keywords.kw_function) || 2371 FormatTok->startsSequence(Keywords.kw_async, 2372 Keywords.kw_function))) { 2373 tryToParseJSFunction(); 2374 } else { 2375 nextToken(); 2376 } 2377 break; 2378 case tok::kw_requires: { 2379 auto RequiresToken = FormatTok; 2380 nextToken(); 2381 parseRequiresExpression(RequiresToken); 2382 break; 2383 } 2384 case tok::ampamp: 2385 if (AmpAmpTokenType != TT_Unknown) 2386 FormatTok->setFinalizedType(AmpAmpTokenType); 2387 LLVM_FALLTHROUGH; 2388 default: 2389 nextToken(); 2390 break; 2391 } 2392 } while (!eof()); 2393 } 2394 2395 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2396 if (!LambdaIntroducer) { 2397 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2398 if (tryToParseLambda()) 2399 return; 2400 } 2401 do { 2402 switch (FormatTok->Tok.getKind()) { 2403 case tok::l_paren: 2404 parseParens(); 2405 break; 2406 case tok::r_square: 2407 nextToken(); 2408 return; 2409 case tok::r_brace: 2410 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2411 return; 2412 case tok::l_square: 2413 parseSquare(); 2414 break; 2415 case tok::l_brace: { 2416 if (!tryToParseBracedList()) 2417 parseChildBlock(); 2418 break; 2419 } 2420 case tok::at: 2421 nextToken(); 2422 if (FormatTok->is(tok::l_brace)) { 2423 nextToken(); 2424 parseBracedList(); 2425 } 2426 break; 2427 default: 2428 nextToken(); 2429 break; 2430 } 2431 } while (!eof()); 2432 } 2433 2434 void UnwrappedLineParser::keepAncestorBraces() { 2435 if (!Style.RemoveBracesLLVM) 2436 return; 2437 2438 const int MaxNestingLevels = 2; 2439 const int Size = NestedTooDeep.size(); 2440 if (Size >= MaxNestingLevels) 2441 NestedTooDeep[Size - MaxNestingLevels] = true; 2442 NestedTooDeep.push_back(false); 2443 } 2444 2445 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2446 for (const auto &Token : llvm::reverse(Line.Tokens)) 2447 if (Token.Tok->isNot(tok::comment)) 2448 return Token.Tok; 2449 2450 return nullptr; 2451 } 2452 2453 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2454 FormatToken *Tok = nullptr; 2455 2456 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2457 PreprocessorDirectives.empty()) { 2458 Tok = getLastNonComment(*Line); 2459 assert(Tok); 2460 if (Tok->BraceCount < 0) { 2461 assert(Tok->BraceCount == -1); 2462 Tok = nullptr; 2463 } else { 2464 Tok->BraceCount = -1; 2465 } 2466 } 2467 2468 addUnwrappedLine(); 2469 ++Line->Level; 2470 parseStructuralElement(); 2471 2472 if (Tok) { 2473 assert(!Line->InPPDirective); 2474 Tok = nullptr; 2475 for (const auto &L : llvm::reverse(*CurrentLines)) { 2476 if (!L.InPPDirective && getLastNonComment(L)) { 2477 Tok = L.Tokens.back().Tok; 2478 break; 2479 } 2480 } 2481 assert(Tok); 2482 ++Tok->BraceCount; 2483 } 2484 2485 if (CheckEOF && FormatTok->is(tok::eof)) 2486 addUnwrappedLine(); 2487 2488 --Line->Level; 2489 } 2490 2491 static void markOptionalBraces(FormatToken *LeftBrace) { 2492 if (!LeftBrace) 2493 return; 2494 2495 assert(LeftBrace->is(tok::l_brace)); 2496 2497 FormatToken *RightBrace = LeftBrace->MatchingParen; 2498 if (!RightBrace) { 2499 assert(!LeftBrace->Optional); 2500 return; 2501 } 2502 2503 assert(RightBrace->is(tok::r_brace)); 2504 assert(RightBrace->MatchingParen == LeftBrace); 2505 assert(LeftBrace->Optional == RightBrace->Optional); 2506 2507 LeftBrace->Optional = true; 2508 RightBrace->Optional = true; 2509 } 2510 2511 void UnwrappedLineParser::handleAttributes() { 2512 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2513 if (FormatTok->is(TT_AttributeMacro)) 2514 nextToken(); 2515 handleCppAttributes(); 2516 } 2517 2518 bool UnwrappedLineParser::handleCppAttributes() { 2519 // Handle [[likely]] / [[unlikely]] attributes. 2520 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) { 2521 parseSquare(); 2522 return true; 2523 } 2524 return false; 2525 } 2526 2527 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2528 bool KeepBraces) { 2529 assert(FormatTok->is(tok::kw_if) && "'if' expected"); 2530 nextToken(); 2531 if (FormatTok->is(tok::exclaim)) 2532 nextToken(); 2533 2534 bool KeepIfBraces = true; 2535 if (FormatTok->is(tok::kw_consteval)) { 2536 nextToken(); 2537 } else { 2538 if (Style.RemoveBracesLLVM) 2539 KeepIfBraces = KeepBraces; 2540 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2541 nextToken(); 2542 if (FormatTok->is(tok::l_paren)) 2543 parseParens(); 2544 } 2545 handleAttributes(); 2546 2547 bool NeedsUnwrappedLine = false; 2548 keepAncestorBraces(); 2549 2550 FormatToken *IfLeftBrace = nullptr; 2551 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2552 2553 if (FormatTok->is(tok::l_brace)) { 2554 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2555 IfLeftBrace = FormatTok; 2556 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2557 IfBlockKind = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2558 /*MunchSemi=*/true, KeepIfBraces); 2559 if (Style.BraceWrapping.BeforeElse) 2560 addUnwrappedLine(); 2561 else 2562 NeedsUnwrappedLine = true; 2563 } else { 2564 parseUnbracedBody(); 2565 } 2566 2567 if (Style.RemoveBracesLLVM) { 2568 assert(!NestedTooDeep.empty()); 2569 KeepIfBraces = KeepIfBraces || 2570 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2571 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2572 IfBlockKind == IfStmtKind::IfElseIf; 2573 } 2574 2575 bool KeepElseBraces = KeepIfBraces; 2576 FormatToken *ElseLeftBrace = nullptr; 2577 IfStmtKind Kind = IfStmtKind::IfOnly; 2578 2579 if (FormatTok->is(tok::kw_else)) { 2580 if (Style.RemoveBracesLLVM) { 2581 NestedTooDeep.back() = false; 2582 Kind = IfStmtKind::IfElse; 2583 } 2584 nextToken(); 2585 handleAttributes(); 2586 if (FormatTok->is(tok::l_brace)) { 2587 FormatTok->setFinalizedType(TT_ElseLBrace); 2588 ElseLeftBrace = FormatTok; 2589 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2590 if (parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2591 /*MunchSemi=*/true, 2592 KeepElseBraces) == IfStmtKind::IfOnly) { 2593 Kind = IfStmtKind::IfElseIf; 2594 } 2595 addUnwrappedLine(); 2596 } else if (FormatTok->is(tok::kw_if)) { 2597 const FormatToken *Previous = Tokens->getPreviousToken(); 2598 assert(Previous); 2599 const bool IsPrecededByComment = Previous->is(tok::comment); 2600 if (IsPrecededByComment) { 2601 addUnwrappedLine(); 2602 ++Line->Level; 2603 } 2604 bool TooDeep = true; 2605 if (Style.RemoveBracesLLVM) { 2606 Kind = IfStmtKind::IfElseIf; 2607 TooDeep = NestedTooDeep.pop_back_val(); 2608 } 2609 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2610 if (Style.RemoveBracesLLVM) 2611 NestedTooDeep.push_back(TooDeep); 2612 if (IsPrecededByComment) 2613 --Line->Level; 2614 } else { 2615 parseUnbracedBody(/*CheckEOF=*/true); 2616 } 2617 } else { 2618 if (Style.RemoveBracesLLVM) 2619 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2620 if (NeedsUnwrappedLine) 2621 addUnwrappedLine(); 2622 } 2623 2624 if (!Style.RemoveBracesLLVM) 2625 return nullptr; 2626 2627 assert(!NestedTooDeep.empty()); 2628 KeepElseBraces = KeepElseBraces || 2629 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2630 NestedTooDeep.back(); 2631 2632 NestedTooDeep.pop_back(); 2633 2634 if (!KeepIfBraces && !KeepElseBraces) { 2635 markOptionalBraces(IfLeftBrace); 2636 markOptionalBraces(ElseLeftBrace); 2637 } else if (IfLeftBrace) { 2638 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2639 if (IfRightBrace) { 2640 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2641 assert(!IfLeftBrace->Optional); 2642 assert(!IfRightBrace->Optional); 2643 IfLeftBrace->MatchingParen = nullptr; 2644 IfRightBrace->MatchingParen = nullptr; 2645 } 2646 } 2647 2648 if (IfKind) 2649 *IfKind = Kind; 2650 2651 return IfLeftBrace; 2652 } 2653 2654 void UnwrappedLineParser::parseTryCatch() { 2655 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2656 nextToken(); 2657 bool NeedsUnwrappedLine = false; 2658 if (FormatTok->is(tok::colon)) { 2659 // We are in a function try block, what comes is an initializer list. 2660 nextToken(); 2661 2662 // In case identifiers were removed by clang-tidy, what might follow is 2663 // multiple commas in sequence - before the first identifier. 2664 while (FormatTok->is(tok::comma)) 2665 nextToken(); 2666 2667 while (FormatTok->is(tok::identifier)) { 2668 nextToken(); 2669 if (FormatTok->is(tok::l_paren)) 2670 parseParens(); 2671 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2672 FormatTok->is(tok::l_brace)) { 2673 do { 2674 nextToken(); 2675 } while (!FormatTok->is(tok::r_brace)); 2676 nextToken(); 2677 } 2678 2679 // In case identifiers were removed by clang-tidy, what might follow is 2680 // multiple commas in sequence - after the first identifier. 2681 while (FormatTok->is(tok::comma)) 2682 nextToken(); 2683 } 2684 } 2685 // Parse try with resource. 2686 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2687 parseParens(); 2688 2689 keepAncestorBraces(); 2690 2691 if (FormatTok->is(tok::l_brace)) { 2692 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2693 parseBlock(); 2694 if (Style.BraceWrapping.BeforeCatch) 2695 addUnwrappedLine(); 2696 else 2697 NeedsUnwrappedLine = true; 2698 } else if (!FormatTok->is(tok::kw_catch)) { 2699 // The C++ standard requires a compound-statement after a try. 2700 // If there's none, we try to assume there's a structuralElement 2701 // and try to continue. 2702 addUnwrappedLine(); 2703 ++Line->Level; 2704 parseStructuralElement(); 2705 --Line->Level; 2706 } 2707 while (true) { 2708 if (FormatTok->is(tok::at)) 2709 nextToken(); 2710 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2711 tok::kw___finally) || 2712 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2713 FormatTok->is(Keywords.kw_finally)) || 2714 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2715 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2716 break; 2717 } 2718 nextToken(); 2719 while (FormatTok->isNot(tok::l_brace)) { 2720 if (FormatTok->is(tok::l_paren)) { 2721 parseParens(); 2722 continue; 2723 } 2724 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2725 if (Style.RemoveBracesLLVM) 2726 NestedTooDeep.pop_back(); 2727 return; 2728 } 2729 nextToken(); 2730 } 2731 NeedsUnwrappedLine = false; 2732 Line->MustBeDeclaration = false; 2733 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2734 parseBlock(); 2735 if (Style.BraceWrapping.BeforeCatch) 2736 addUnwrappedLine(); 2737 else 2738 NeedsUnwrappedLine = true; 2739 } 2740 2741 if (Style.RemoveBracesLLVM) 2742 NestedTooDeep.pop_back(); 2743 2744 if (NeedsUnwrappedLine) 2745 addUnwrappedLine(); 2746 } 2747 2748 void UnwrappedLineParser::parseNamespace() { 2749 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2750 "'namespace' expected"); 2751 2752 const FormatToken &InitialToken = *FormatTok; 2753 nextToken(); 2754 if (InitialToken.is(TT_NamespaceMacro)) { 2755 parseParens(); 2756 } else { 2757 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2758 tok::l_square, tok::period, tok::l_paren) || 2759 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 2760 if (FormatTok->is(tok::l_square)) 2761 parseSquare(); 2762 else if (FormatTok->is(tok::l_paren)) 2763 parseParens(); 2764 else 2765 nextToken(); 2766 } 2767 } 2768 if (FormatTok->is(tok::l_brace)) { 2769 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2770 addUnwrappedLine(); 2771 2772 unsigned AddLevels = 2773 Style.NamespaceIndentation == FormatStyle::NI_All || 2774 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2775 DeclarationScopeStack.size() > 1) 2776 ? 1u 2777 : 0u; 2778 bool ManageWhitesmithsBraces = 2779 AddLevels == 0u && 2780 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2781 2782 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2783 // the whole block. 2784 if (ManageWhitesmithsBraces) 2785 ++Line->Level; 2786 2787 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 2788 /*KeepBraces=*/true, ManageWhitesmithsBraces); 2789 2790 // Munch the semicolon after a namespace. This is more common than one would 2791 // think. Putting the semicolon into its own line is very ugly. 2792 if (FormatTok->is(tok::semi)) 2793 nextToken(); 2794 2795 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2796 2797 if (ManageWhitesmithsBraces) 2798 --Line->Level; 2799 } 2800 // FIXME: Add error handling. 2801 } 2802 2803 void UnwrappedLineParser::parseNew() { 2804 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2805 nextToken(); 2806 2807 if (Style.isCSharp()) { 2808 do { 2809 if (FormatTok->is(tok::l_brace)) 2810 parseBracedList(); 2811 2812 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2813 return; 2814 2815 nextToken(); 2816 } while (!eof()); 2817 } 2818 2819 if (Style.Language != FormatStyle::LK_Java) 2820 return; 2821 2822 // In Java, we can parse everything up to the parens, which aren't optional. 2823 do { 2824 // There should not be a ;, { or } before the new's open paren. 2825 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2826 return; 2827 2828 // Consume the parens. 2829 if (FormatTok->is(tok::l_paren)) { 2830 parseParens(); 2831 2832 // If there is a class body of an anonymous class, consume that as child. 2833 if (FormatTok->is(tok::l_brace)) 2834 parseChildBlock(); 2835 return; 2836 } 2837 nextToken(); 2838 } while (!eof()); 2839 } 2840 2841 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 2842 keepAncestorBraces(); 2843 2844 if (FormatTok->is(tok::l_brace)) { 2845 if (!KeepBraces) 2846 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2847 FormatToken *LeftBrace = FormatTok; 2848 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2849 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2850 /*MunchSemi=*/true, KeepBraces); 2851 if (!KeepBraces) { 2852 assert(!NestedTooDeep.empty()); 2853 if (!NestedTooDeep.back()) 2854 markOptionalBraces(LeftBrace); 2855 } 2856 if (WrapRightBrace) 2857 addUnwrappedLine(); 2858 } else { 2859 parseUnbracedBody(); 2860 } 2861 2862 if (!KeepBraces) 2863 NestedTooDeep.pop_back(); 2864 } 2865 2866 void UnwrappedLineParser::parseForOrWhileLoop() { 2867 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2868 "'for', 'while' or foreach macro expected"); 2869 const bool KeepBraces = !Style.RemoveBracesLLVM || 2870 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 2871 2872 nextToken(); 2873 // JS' for await ( ... 2874 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2875 nextToken(); 2876 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2877 nextToken(); 2878 if (FormatTok->is(tok::l_paren)) 2879 parseParens(); 2880 2881 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 2882 } 2883 2884 void UnwrappedLineParser::parseDoWhile() { 2885 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 2886 nextToken(); 2887 2888 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 2889 2890 // FIXME: Add error handling. 2891 if (!FormatTok->is(tok::kw_while)) { 2892 addUnwrappedLine(); 2893 return; 2894 } 2895 2896 // If in Whitesmiths mode, the line with the while() needs to be indented 2897 // to the same level as the block. 2898 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2899 ++Line->Level; 2900 2901 nextToken(); 2902 parseStructuralElement(); 2903 } 2904 2905 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2906 nextToken(); 2907 unsigned OldLineLevel = Line->Level; 2908 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2909 --Line->Level; 2910 if (LeftAlignLabel) 2911 Line->Level = 0; 2912 2913 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2914 FormatTok->is(tok::l_brace)) { 2915 2916 CompoundStatementIndenter Indenter(this, Line->Level, 2917 Style.BraceWrapping.AfterCaseLabel, 2918 Style.BraceWrapping.IndentBraces); 2919 parseBlock(); 2920 if (FormatTok->is(tok::kw_break)) { 2921 if (Style.BraceWrapping.AfterControlStatement == 2922 FormatStyle::BWACS_Always) { 2923 addUnwrappedLine(); 2924 if (!Style.IndentCaseBlocks && 2925 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2926 ++Line->Level; 2927 } 2928 } 2929 parseStructuralElement(); 2930 } 2931 addUnwrappedLine(); 2932 } else { 2933 if (FormatTok->is(tok::semi)) 2934 nextToken(); 2935 addUnwrappedLine(); 2936 } 2937 Line->Level = OldLineLevel; 2938 if (FormatTok->isNot(tok::l_brace)) { 2939 parseStructuralElement(); 2940 addUnwrappedLine(); 2941 } 2942 } 2943 2944 void UnwrappedLineParser::parseCaseLabel() { 2945 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 2946 2947 // FIXME: fix handling of complex expressions here. 2948 do { 2949 nextToken(); 2950 } while (!eof() && !FormatTok->is(tok::colon)); 2951 parseLabel(); 2952 } 2953 2954 void UnwrappedLineParser::parseSwitch() { 2955 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 2956 nextToken(); 2957 if (FormatTok->is(tok::l_paren)) 2958 parseParens(); 2959 2960 keepAncestorBraces(); 2961 2962 if (FormatTok->is(tok::l_brace)) { 2963 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2964 parseBlock(); 2965 addUnwrappedLine(); 2966 } else { 2967 addUnwrappedLine(); 2968 ++Line->Level; 2969 parseStructuralElement(); 2970 --Line->Level; 2971 } 2972 2973 if (Style.RemoveBracesLLVM) 2974 NestedTooDeep.pop_back(); 2975 } 2976 2977 // Operators that can follow a C variable. 2978 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 2979 switch (kind) { 2980 case tok::ampamp: 2981 case tok::ampequal: 2982 case tok::arrow: 2983 case tok::caret: 2984 case tok::caretequal: 2985 case tok::comma: 2986 case tok::ellipsis: 2987 case tok::equal: 2988 case tok::equalequal: 2989 case tok::exclaim: 2990 case tok::exclaimequal: 2991 case tok::greater: 2992 case tok::greaterequal: 2993 case tok::greatergreater: 2994 case tok::greatergreaterequal: 2995 case tok::l_paren: 2996 case tok::l_square: 2997 case tok::less: 2998 case tok::lessequal: 2999 case tok::lessless: 3000 case tok::lesslessequal: 3001 case tok::minus: 3002 case tok::minusequal: 3003 case tok::minusminus: 3004 case tok::percent: 3005 case tok::percentequal: 3006 case tok::period: 3007 case tok::pipe: 3008 case tok::pipeequal: 3009 case tok::pipepipe: 3010 case tok::plus: 3011 case tok::plusequal: 3012 case tok::plusplus: 3013 case tok::question: 3014 case tok::r_brace: 3015 case tok::r_paren: 3016 case tok::r_square: 3017 case tok::semi: 3018 case tok::slash: 3019 case tok::slashequal: 3020 case tok::star: 3021 case tok::starequal: 3022 return true; 3023 default: 3024 return false; 3025 } 3026 } 3027 3028 void UnwrappedLineParser::parseAccessSpecifier() { 3029 FormatToken *AccessSpecifierCandidate = FormatTok; 3030 nextToken(); 3031 // Understand Qt's slots. 3032 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3033 nextToken(); 3034 // Otherwise, we don't know what it is, and we'd better keep the next token. 3035 if (FormatTok->is(tok::colon)) { 3036 nextToken(); 3037 addUnwrappedLine(); 3038 } else if (!FormatTok->is(tok::coloncolon) && 3039 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3040 // Not a variable name nor namespace name. 3041 addUnwrappedLine(); 3042 } else if (AccessSpecifierCandidate) { 3043 // Consider the access specifier to be a C identifier. 3044 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3045 } 3046 } 3047 3048 /// \brief Parses a concept definition. 3049 /// \pre The current token has to be the concept keyword. 3050 /// 3051 /// Returns if either the concept has been completely parsed, or if it detects 3052 /// that the concept definition is incorrect. 3053 void UnwrappedLineParser::parseConcept() { 3054 assert(FormatTok->is(tok::kw_concept) && "'concept' expected"); 3055 nextToken(); 3056 if (!FormatTok->is(tok::identifier)) 3057 return; 3058 nextToken(); 3059 if (!FormatTok->is(tok::equal)) 3060 return; 3061 nextToken(); 3062 parseConstraintExpression(); 3063 if (FormatTok->is(tok::semi)) 3064 nextToken(); 3065 addUnwrappedLine(); 3066 } 3067 3068 /// \brief Parses a requires, decides if it is a clause or an expression. 3069 /// \pre The current token has to be the requires keyword. 3070 /// \returns true if it parsed a clause. 3071 bool clang::format::UnwrappedLineParser::parseRequires() { 3072 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3073 auto RequiresToken = FormatTok; 3074 3075 // We try to guess if it is a requires clause, or a requires expression. For 3076 // that we first consume the keyword and check the next token. 3077 nextToken(); 3078 3079 switch (FormatTok->Tok.getKind()) { 3080 case tok::l_brace: 3081 // This can only be an expression, never a clause. 3082 parseRequiresExpression(RequiresToken); 3083 return false; 3084 case tok::l_paren: 3085 // Clauses and expression can start with a paren, it's unclear what we have. 3086 break; 3087 default: 3088 // All other tokens can only be a clause. 3089 parseRequiresClause(RequiresToken); 3090 return true; 3091 } 3092 3093 // Looking forward we would have to decide if there are function declaration 3094 // like arguments to the requires expression: 3095 // requires (T t) { 3096 // Or there is a constraint expression for the requires clause: 3097 // requires (C<T> && ... 3098 3099 // But first let's look behind. 3100 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3101 3102 if (!PreviousNonComment || 3103 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3104 // If there is no token, or an expression left brace, we are a requires 3105 // clause within a requires expression. 3106 parseRequiresClause(RequiresToken); 3107 return true; 3108 } 3109 3110 switch (PreviousNonComment->Tok.getKind()) { 3111 case tok::greater: 3112 case tok::r_paren: 3113 case tok::kw_noexcept: 3114 case tok::kw_const: 3115 // This is a requires clause. 3116 parseRequiresClause(RequiresToken); 3117 return true; 3118 case tok::amp: 3119 case tok::ampamp: { 3120 // This can be either: 3121 // if (... && requires (T t) ...) 3122 // Or 3123 // void member(...) && requires (C<T> ... 3124 // We check the one token before that for a const: 3125 // void member(...) const && requires (C<T> ... 3126 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3127 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3128 parseRequiresClause(RequiresToken); 3129 return true; 3130 } 3131 break; 3132 } 3133 default: 3134 // It's an expression. 3135 parseRequiresExpression(RequiresToken); 3136 return false; 3137 } 3138 3139 // Now we look forward and try to check if the paren content is a parameter 3140 // list. The parameters can be cv-qualified and contain references or 3141 // pointers. 3142 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3143 // of stuff: typename, const, *, &, &&, ::, identifiers. 3144 3145 int NextTokenOffset = 1; 3146 auto NextToken = Tokens->peekNextToken(NextTokenOffset); 3147 auto PeekNext = [&NextTokenOffset, &NextToken, this] { 3148 ++NextTokenOffset; 3149 NextToken = Tokens->peekNextToken(NextTokenOffset); 3150 }; 3151 3152 bool FoundType = false; 3153 bool LastWasColonColon = false; 3154 int OpenAngles = 0; 3155 3156 for (; NextTokenOffset < 50; PeekNext()) { 3157 switch (NextToken->Tok.getKind()) { 3158 case tok::kw_volatile: 3159 case tok::kw_const: 3160 case tok::comma: 3161 parseRequiresExpression(RequiresToken); 3162 return false; 3163 case tok::r_paren: 3164 case tok::pipepipe: 3165 parseRequiresClause(RequiresToken); 3166 return true; 3167 case tok::eof: 3168 // Break out of the loop. 3169 NextTokenOffset = 50; 3170 break; 3171 case tok::coloncolon: 3172 LastWasColonColon = true; 3173 break; 3174 case tok::identifier: 3175 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3176 parseRequiresExpression(RequiresToken); 3177 return false; 3178 } 3179 FoundType = true; 3180 LastWasColonColon = false; 3181 break; 3182 case tok::less: 3183 ++OpenAngles; 3184 break; 3185 case tok::greater: 3186 --OpenAngles; 3187 break; 3188 default: 3189 if (NextToken->isSimpleTypeSpecifier()) { 3190 parseRequiresExpression(RequiresToken); 3191 return false; 3192 } 3193 break; 3194 } 3195 } 3196 3197 // This seems to be a complicated expression, just assume it's a clause. 3198 parseRequiresClause(RequiresToken); 3199 return true; 3200 } 3201 3202 /// \brief Parses a requires clause. 3203 /// \param RequiresToken The requires keyword token, which starts this clause. 3204 /// \pre We need to be on the next token after the requires keyword. 3205 /// \sa parseRequiresExpression 3206 /// 3207 /// Returns if it either has finished parsing the clause, or it detects, that 3208 /// the clause is incorrect. 3209 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3210 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3211 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3212 3213 // If there is no previous token, we are within a requires expression, 3214 // otherwise we will always have the template or function declaration in front 3215 // of it. 3216 bool InRequiresExpression = 3217 !RequiresToken->Previous || 3218 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3219 3220 RequiresToken->setFinalizedType(InRequiresExpression 3221 ? TT_RequiresClauseInARequiresExpression 3222 : TT_RequiresClause); 3223 3224 parseConstraintExpression(); 3225 3226 if (!InRequiresExpression) 3227 FormatTok->Previous->ClosesRequiresClause = true; 3228 } 3229 3230 /// \brief Parses a requires expression. 3231 /// \param RequiresToken The requires keyword token, which starts this clause. 3232 /// \pre We need to be on the next token after the requires keyword. 3233 /// \sa parseRequiresClause 3234 /// 3235 /// Returns if it either has finished parsing the expression, or it detects, 3236 /// that the expression is incorrect. 3237 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3238 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3239 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3240 3241 RequiresToken->setFinalizedType(TT_RequiresExpression); 3242 3243 if (FormatTok->is(tok::l_paren)) { 3244 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3245 parseParens(); 3246 } 3247 3248 if (FormatTok->is(tok::l_brace)) { 3249 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3250 parseChildBlock(/*CanContainBracedList=*/false, 3251 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3252 } 3253 } 3254 3255 /// \brief Parses a constraint expression. 3256 /// 3257 /// This is either the definition of a concept, or the body of a requires 3258 /// clause. It returns, when the parsing is complete, or the expression is 3259 /// incorrect. 3260 void UnwrappedLineParser::parseConstraintExpression() { 3261 // The special handling for lambdas is needed since tryToParseLambda() eats a 3262 // token and if a requires expression is the last part of a requires clause 3263 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3264 // not set on the correct token. Thus we need to be aware if we even expect a 3265 // lambda to be possible. 3266 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3267 bool LambdaNextTimeAllowed = true; 3268 do { 3269 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3270 3271 switch (FormatTok->Tok.getKind()) { 3272 case tok::kw_requires: { 3273 auto RequiresToken = FormatTok; 3274 nextToken(); 3275 parseRequiresExpression(RequiresToken); 3276 break; 3277 } 3278 3279 case tok::l_paren: 3280 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3281 break; 3282 3283 case tok::l_square: 3284 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3285 return; 3286 break; 3287 3288 case tok::kw_const: 3289 case tok::semi: 3290 case tok::kw_class: 3291 case tok::kw_struct: 3292 case tok::kw_union: 3293 return; 3294 3295 case tok::l_brace: 3296 // Potential function body. 3297 return; 3298 3299 case tok::ampamp: 3300 case tok::pipepipe: 3301 FormatTok->setFinalizedType(TT_BinaryOperator); 3302 nextToken(); 3303 LambdaNextTimeAllowed = true; 3304 break; 3305 3306 case tok::comma: 3307 case tok::comment: 3308 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3309 nextToken(); 3310 break; 3311 3312 case tok::kw_sizeof: 3313 case tok::greater: 3314 case tok::greaterequal: 3315 case tok::greatergreater: 3316 case tok::less: 3317 case tok::lessequal: 3318 case tok::lessless: 3319 case tok::equalequal: 3320 case tok::exclaim: 3321 case tok::exclaimequal: 3322 case tok::plus: 3323 case tok::minus: 3324 case tok::star: 3325 case tok::slash: 3326 case tok::kw_decltype: 3327 LambdaNextTimeAllowed = true; 3328 // Just eat them. 3329 nextToken(); 3330 break; 3331 3332 case tok::numeric_constant: 3333 case tok::coloncolon: 3334 case tok::kw_true: 3335 case tok::kw_false: 3336 // Just eat them. 3337 nextToken(); 3338 break; 3339 3340 case tok::kw_static_cast: 3341 case tok::kw_const_cast: 3342 case tok::kw_reinterpret_cast: 3343 case tok::kw_dynamic_cast: 3344 nextToken(); 3345 if (!FormatTok->is(tok::less)) 3346 return; 3347 3348 nextToken(); 3349 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3350 /*ClosingBraceKind=*/tok::greater); 3351 break; 3352 3353 case tok::kw_bool: 3354 // bool is only allowed if it is directly followed by a paren for a cast: 3355 // concept C = bool(...); 3356 // and bool is the only type, all other types as cast must be inside a 3357 // cast to bool an thus are handled by the other cases. 3358 nextToken(); 3359 if (FormatTok->isNot(tok::l_paren)) 3360 return; 3361 parseParens(); 3362 break; 3363 3364 default: 3365 if (!FormatTok->Tok.getIdentifierInfo()) { 3366 // Identifiers are part of the default case, we check for more then 3367 // tok::identifier to handle builtin type traits. 3368 return; 3369 } 3370 3371 // We need to differentiate identifiers for a template deduction guide, 3372 // variables, or function return types (the constraint expression has 3373 // ended before that), and basically all other cases. But it's easier to 3374 // check the other way around. 3375 assert(FormatTok->Previous); 3376 switch (FormatTok->Previous->Tok.getKind()) { 3377 case tok::coloncolon: // Nested identifier. 3378 case tok::ampamp: // Start of a function or variable for the 3379 case tok::pipepipe: // constraint expression. 3380 case tok::kw_requires: // Initial identifier of a requires clause. 3381 case tok::equal: // Initial identifier of a concept declaration. 3382 break; 3383 default: 3384 return; 3385 } 3386 3387 // Read identifier with optional template declaration. 3388 nextToken(); 3389 if (FormatTok->is(tok::less)) { 3390 nextToken(); 3391 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3392 /*ClosingBraceKind=*/tok::greater); 3393 } 3394 break; 3395 } 3396 } while (!eof()); 3397 } 3398 3399 bool UnwrappedLineParser::parseEnum() { 3400 const FormatToken &InitialToken = *FormatTok; 3401 3402 // Won't be 'enum' for NS_ENUMs. 3403 if (FormatTok->is(tok::kw_enum)) 3404 nextToken(); 3405 3406 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3407 // declarations. An "enum" keyword followed by a colon would be a syntax 3408 // error and thus assume it is just an identifier. 3409 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3410 return false; 3411 3412 // In protobuf, "enum" can be used as a field name. 3413 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3414 return false; 3415 3416 // Eat up enum class ... 3417 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3418 nextToken(); 3419 3420 while (FormatTok->Tok.getIdentifierInfo() || 3421 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3422 tok::greater, tok::comma, tok::question)) { 3423 nextToken(); 3424 // We can have macros or attributes in between 'enum' and the enum name. 3425 if (FormatTok->is(tok::l_paren)) 3426 parseParens(); 3427 if (FormatTok->is(tok::identifier)) { 3428 nextToken(); 3429 // If there are two identifiers in a row, this is likely an elaborate 3430 // return type. In Java, this can be "implements", etc. 3431 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3432 return false; 3433 } 3434 } 3435 3436 // Just a declaration or something is wrong. 3437 if (FormatTok->isNot(tok::l_brace)) 3438 return true; 3439 FormatTok->setFinalizedType(TT_EnumLBrace); 3440 FormatTok->setBlockKind(BK_Block); 3441 3442 if (Style.Language == FormatStyle::LK_Java) { 3443 // Java enums are different. 3444 parseJavaEnumBody(); 3445 return true; 3446 } 3447 if (Style.Language == FormatStyle::LK_Proto) { 3448 parseBlock(/*MustBeDeclaration=*/true); 3449 return true; 3450 } 3451 3452 if (!Style.AllowShortEnumsOnASingleLine && 3453 ShouldBreakBeforeBrace(Style, InitialToken)) { 3454 addUnwrappedLine(); 3455 } 3456 // Parse enum body. 3457 nextToken(); 3458 if (!Style.AllowShortEnumsOnASingleLine) { 3459 addUnwrappedLine(); 3460 Line->Level += 1; 3461 } 3462 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3463 /*IsEnum=*/true); 3464 if (!Style.AllowShortEnumsOnASingleLine) 3465 Line->Level -= 1; 3466 if (HasError) { 3467 if (FormatTok->is(tok::semi)) 3468 nextToken(); 3469 addUnwrappedLine(); 3470 } 3471 return true; 3472 3473 // There is no addUnwrappedLine() here so that we fall through to parsing a 3474 // structural element afterwards. Thus, in "enum A {} n, m;", 3475 // "} n, m;" will end up in one unwrapped line. 3476 } 3477 3478 bool UnwrappedLineParser::parseStructLike() { 3479 // parseRecord falls through and does not yet add an unwrapped line as a 3480 // record declaration or definition can start a structural element. 3481 parseRecord(); 3482 // This does not apply to Java, JavaScript and C#. 3483 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3484 Style.isCSharp()) { 3485 if (FormatTok->is(tok::semi)) 3486 nextToken(); 3487 addUnwrappedLine(); 3488 return true; 3489 } 3490 return false; 3491 } 3492 3493 namespace { 3494 // A class used to set and restore the Token position when peeking 3495 // ahead in the token source. 3496 class ScopedTokenPosition { 3497 unsigned StoredPosition; 3498 FormatTokenSource *Tokens; 3499 3500 public: 3501 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3502 assert(Tokens && "Tokens expected to not be null"); 3503 StoredPosition = Tokens->getPosition(); 3504 } 3505 3506 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3507 }; 3508 } // namespace 3509 3510 // Look to see if we have [[ by looking ahead, if 3511 // its not then rewind to the original position. 3512 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3513 ScopedTokenPosition AutoPosition(Tokens); 3514 FormatToken *Tok = Tokens->getNextToken(); 3515 // We already read the first [ check for the second. 3516 if (!Tok->is(tok::l_square)) 3517 return false; 3518 // Double check that the attribute is just something 3519 // fairly simple. 3520 while (Tok->isNot(tok::eof)) { 3521 if (Tok->is(tok::r_square)) 3522 break; 3523 Tok = Tokens->getNextToken(); 3524 } 3525 if (Tok->is(tok::eof)) 3526 return false; 3527 Tok = Tokens->getNextToken(); 3528 if (!Tok->is(tok::r_square)) 3529 return false; 3530 Tok = Tokens->getNextToken(); 3531 if (Tok->is(tok::semi)) 3532 return false; 3533 return true; 3534 } 3535 3536 void UnwrappedLineParser::parseJavaEnumBody() { 3537 assert(FormatTok->is(tok::l_brace)); 3538 const FormatToken *OpeningBrace = FormatTok; 3539 3540 // Determine whether the enum is simple, i.e. does not have a semicolon or 3541 // constants with class bodies. Simple enums can be formatted like braced 3542 // lists, contracted to a single line, etc. 3543 unsigned StoredPosition = Tokens->getPosition(); 3544 bool IsSimple = true; 3545 FormatToken *Tok = Tokens->getNextToken(); 3546 while (!Tok->is(tok::eof)) { 3547 if (Tok->is(tok::r_brace)) 3548 break; 3549 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3550 IsSimple = false; 3551 break; 3552 } 3553 // FIXME: This will also mark enums with braces in the arguments to enum 3554 // constants as "not simple". This is probably fine in practice, though. 3555 Tok = Tokens->getNextToken(); 3556 } 3557 FormatTok = Tokens->setPosition(StoredPosition); 3558 3559 if (IsSimple) { 3560 nextToken(); 3561 parseBracedList(); 3562 addUnwrappedLine(); 3563 return; 3564 } 3565 3566 // Parse the body of a more complex enum. 3567 // First add a line for everything up to the "{". 3568 nextToken(); 3569 addUnwrappedLine(); 3570 ++Line->Level; 3571 3572 // Parse the enum constants. 3573 while (FormatTok->isNot(tok::eof)) { 3574 if (FormatTok->is(tok::l_brace)) { 3575 // Parse the constant's class body. 3576 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3577 /*MunchSemi=*/false); 3578 } else if (FormatTok->is(tok::l_paren)) { 3579 parseParens(); 3580 } else if (FormatTok->is(tok::comma)) { 3581 nextToken(); 3582 addUnwrappedLine(); 3583 } else if (FormatTok->is(tok::semi)) { 3584 nextToken(); 3585 addUnwrappedLine(); 3586 break; 3587 } else if (FormatTok->is(tok::r_brace)) { 3588 addUnwrappedLine(); 3589 break; 3590 } else { 3591 nextToken(); 3592 } 3593 } 3594 3595 // Parse the class body after the enum's ";" if any. 3596 parseLevel(OpeningBrace, /*CanContainBracedList=*/true); 3597 nextToken(); 3598 --Line->Level; 3599 addUnwrappedLine(); 3600 } 3601 3602 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3603 const FormatToken &InitialToken = *FormatTok; 3604 nextToken(); 3605 3606 // The actual identifier can be a nested name specifier, and in macros 3607 // it is often token-pasted. 3608 // An [[attribute]] can be before the identifier. 3609 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3610 tok::kw___attribute, tok::kw___declspec, 3611 tok::kw_alignas, tok::l_square, tok::r_square) || 3612 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3613 FormatTok->isOneOf(tok::period, tok::comma))) { 3614 if (Style.isJavaScript() && 3615 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3616 // JavaScript/TypeScript supports inline object types in 3617 // extends/implements positions: 3618 // class Foo implements {bar: number} { } 3619 nextToken(); 3620 if (FormatTok->is(tok::l_brace)) { 3621 tryToParseBracedList(); 3622 continue; 3623 } 3624 } 3625 bool IsNonMacroIdentifier = 3626 FormatTok->is(tok::identifier) && 3627 FormatTok->TokenText != FormatTok->TokenText.upper(); 3628 nextToken(); 3629 // We can have macros or attributes in between 'class' and the class name. 3630 if (!IsNonMacroIdentifier) { 3631 if (FormatTok->is(tok::l_paren)) { 3632 parseParens(); 3633 } else if (FormatTok->is(TT_AttributeSquare)) { 3634 parseSquare(); 3635 // Consume the closing TT_AttributeSquare. 3636 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3637 nextToken(); 3638 } 3639 } 3640 } 3641 3642 // Note that parsing away template declarations here leads to incorrectly 3643 // accepting function declarations as record declarations. 3644 // In general, we cannot solve this problem. Consider: 3645 // class A<int> B() {} 3646 // which can be a function definition or a class definition when B() is a 3647 // macro. If we find enough real-world cases where this is a problem, we 3648 // can parse for the 'template' keyword in the beginning of the statement, 3649 // and thus rule out the record production in case there is no template 3650 // (this would still leave us with an ambiguity between template function 3651 // and class declarations). 3652 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3653 do { 3654 if (FormatTok->is(tok::l_brace)) { 3655 calculateBraceTypes(/*ExpectClassBody=*/true); 3656 if (!tryToParseBracedList()) 3657 break; 3658 } 3659 if (FormatTok->is(tok::l_square)) { 3660 FormatToken *Previous = FormatTok->Previous; 3661 if (!Previous || 3662 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3663 // Don't try parsing a lambda if we had a closing parenthesis before, 3664 // it was probably a pointer to an array: int (*)[]. 3665 if (!tryToParseLambda()) 3666 break; 3667 } else { 3668 parseSquare(); 3669 continue; 3670 } 3671 } 3672 if (FormatTok->is(tok::semi)) 3673 return; 3674 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3675 addUnwrappedLine(); 3676 nextToken(); 3677 parseCSharpGenericTypeConstraint(); 3678 break; 3679 } 3680 nextToken(); 3681 } while (!eof()); 3682 } 3683 3684 auto GetBraceType = [](const FormatToken &RecordTok) { 3685 switch (RecordTok.Tok.getKind()) { 3686 case tok::kw_class: 3687 return TT_ClassLBrace; 3688 case tok::kw_struct: 3689 return TT_StructLBrace; 3690 case tok::kw_union: 3691 return TT_UnionLBrace; 3692 default: 3693 // Useful for e.g. interface. 3694 return TT_RecordLBrace; 3695 } 3696 }; 3697 if (FormatTok->is(tok::l_brace)) { 3698 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3699 if (ParseAsExpr) { 3700 parseChildBlock(); 3701 } else { 3702 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3703 addUnwrappedLine(); 3704 3705 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3706 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3707 } 3708 } 3709 // There is no addUnwrappedLine() here so that we fall through to parsing a 3710 // structural element afterwards. Thus, in "class A {} n, m;", 3711 // "} n, m;" will end up in one unwrapped line. 3712 } 3713 3714 void UnwrappedLineParser::parseObjCMethod() { 3715 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3716 "'(' or identifier expected."); 3717 do { 3718 if (FormatTok->is(tok::semi)) { 3719 nextToken(); 3720 addUnwrappedLine(); 3721 return; 3722 } else if (FormatTok->is(tok::l_brace)) { 3723 if (Style.BraceWrapping.AfterFunction) 3724 addUnwrappedLine(); 3725 parseBlock(); 3726 addUnwrappedLine(); 3727 return; 3728 } else { 3729 nextToken(); 3730 } 3731 } while (!eof()); 3732 } 3733 3734 void UnwrappedLineParser::parseObjCProtocolList() { 3735 assert(FormatTok->is(tok::less) && "'<' expected."); 3736 do { 3737 nextToken(); 3738 // Early exit in case someone forgot a close angle. 3739 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3740 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3741 return; 3742 } 3743 } while (!eof() && FormatTok->isNot(tok::greater)); 3744 nextToken(); // Skip '>'. 3745 } 3746 3747 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3748 do { 3749 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3750 nextToken(); 3751 addUnwrappedLine(); 3752 break; 3753 } 3754 if (FormatTok->is(tok::l_brace)) { 3755 parseBlock(); 3756 // In ObjC interfaces, nothing should be following the "}". 3757 addUnwrappedLine(); 3758 } else if (FormatTok->is(tok::r_brace)) { 3759 // Ignore stray "}". parseStructuralElement doesn't consume them. 3760 nextToken(); 3761 addUnwrappedLine(); 3762 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3763 nextToken(); 3764 parseObjCMethod(); 3765 } else { 3766 parseStructuralElement(); 3767 } 3768 } while (!eof()); 3769 } 3770 3771 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3772 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3773 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3774 nextToken(); 3775 nextToken(); // interface name 3776 3777 // @interface can be followed by a lightweight generic 3778 // specialization list, then either a base class or a category. 3779 if (FormatTok->is(tok::less)) 3780 parseObjCLightweightGenerics(); 3781 if (FormatTok->is(tok::colon)) { 3782 nextToken(); 3783 nextToken(); // base class name 3784 // The base class can also have lightweight generics applied to it. 3785 if (FormatTok->is(tok::less)) 3786 parseObjCLightweightGenerics(); 3787 } else if (FormatTok->is(tok::l_paren)) { 3788 // Skip category, if present. 3789 parseParens(); 3790 } 3791 3792 if (FormatTok->is(tok::less)) 3793 parseObjCProtocolList(); 3794 3795 if (FormatTok->is(tok::l_brace)) { 3796 if (Style.BraceWrapping.AfterObjCDeclaration) 3797 addUnwrappedLine(); 3798 parseBlock(/*MustBeDeclaration=*/true); 3799 } 3800 3801 // With instance variables, this puts '}' on its own line. Without instance 3802 // variables, this ends the @interface line. 3803 addUnwrappedLine(); 3804 3805 parseObjCUntilAtEnd(); 3806 } 3807 3808 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3809 assert(FormatTok->is(tok::less)); 3810 // Unlike protocol lists, generic parameterizations support 3811 // nested angles: 3812 // 3813 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3814 // NSObject <NSCopying, NSSecureCoding> 3815 // 3816 // so we need to count how many open angles we have left. 3817 unsigned NumOpenAngles = 1; 3818 do { 3819 nextToken(); 3820 // Early exit in case someone forgot a close angle. 3821 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3822 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3823 break; 3824 } 3825 if (FormatTok->is(tok::less)) { 3826 ++NumOpenAngles; 3827 } else if (FormatTok->is(tok::greater)) { 3828 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3829 --NumOpenAngles; 3830 } 3831 } while (!eof() && NumOpenAngles != 0); 3832 nextToken(); // Skip '>'. 3833 } 3834 3835 // Returns true for the declaration/definition form of @protocol, 3836 // false for the expression form. 3837 bool UnwrappedLineParser::parseObjCProtocol() { 3838 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3839 nextToken(); 3840 3841 if (FormatTok->is(tok::l_paren)) { 3842 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3843 return false; 3844 } 3845 3846 // The definition/declaration form, 3847 // @protocol Foo 3848 // - (int)someMethod; 3849 // @end 3850 3851 nextToken(); // protocol name 3852 3853 if (FormatTok->is(tok::less)) 3854 parseObjCProtocolList(); 3855 3856 // Check for protocol declaration. 3857 if (FormatTok->is(tok::semi)) { 3858 nextToken(); 3859 addUnwrappedLine(); 3860 return true; 3861 } 3862 3863 addUnwrappedLine(); 3864 parseObjCUntilAtEnd(); 3865 return true; 3866 } 3867 3868 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3869 bool IsImport = FormatTok->is(Keywords.kw_import); 3870 assert(IsImport || FormatTok->is(tok::kw_export)); 3871 nextToken(); 3872 3873 // Consume the "default" in "export default class/function". 3874 if (FormatTok->is(tok::kw_default)) 3875 nextToken(); 3876 3877 // Consume "async function", "function" and "default function", so that these 3878 // get parsed as free-standing JS functions, i.e. do not require a trailing 3879 // semicolon. 3880 if (FormatTok->is(Keywords.kw_async)) 3881 nextToken(); 3882 if (FormatTok->is(Keywords.kw_function)) { 3883 nextToken(); 3884 return; 3885 } 3886 3887 // For imports, `export *`, `export {...}`, consume the rest of the line up 3888 // to the terminating `;`. For everything else, just return and continue 3889 // parsing the structural element, i.e. the declaration or expression for 3890 // `export default`. 3891 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3892 !FormatTok->isStringLiteral()) { 3893 return; 3894 } 3895 3896 while (!eof()) { 3897 if (FormatTok->is(tok::semi)) 3898 return; 3899 if (Line->Tokens.empty()) { 3900 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3901 // import statement should terminate. 3902 return; 3903 } 3904 if (FormatTok->is(tok::l_brace)) { 3905 FormatTok->setBlockKind(BK_Block); 3906 nextToken(); 3907 parseBracedList(); 3908 } else { 3909 nextToken(); 3910 } 3911 } 3912 } 3913 3914 void UnwrappedLineParser::parseStatementMacro() { 3915 nextToken(); 3916 if (FormatTok->is(tok::l_paren)) 3917 parseParens(); 3918 if (FormatTok->is(tok::semi)) 3919 nextToken(); 3920 addUnwrappedLine(); 3921 } 3922 3923 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3924 StringRef Prefix = "") { 3925 llvm::dbgs() << Prefix << "Line(" << Line.Level 3926 << ", FSC=" << Line.FirstStartColumn << ")" 3927 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3928 for (const auto &Node : Line.Tokens) { 3929 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3930 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3931 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3932 } 3933 for (const auto &Node : Line.Tokens) 3934 for (const auto &ChildNode : Node.Children) 3935 printDebugInfo(ChildNode, "\nChild: "); 3936 3937 llvm::dbgs() << "\n"; 3938 } 3939 3940 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3941 if (Line->Tokens.empty()) 3942 return; 3943 LLVM_DEBUG({ 3944 if (CurrentLines == &Lines) 3945 printDebugInfo(*Line); 3946 }); 3947 3948 // If this line closes a block when in Whitesmiths mode, remember that 3949 // information so that the level can be decreased after the line is added. 3950 // This has to happen after the addition of the line since the line itself 3951 // needs to be indented. 3952 bool ClosesWhitesmithsBlock = 3953 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3954 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3955 3956 CurrentLines->push_back(std::move(*Line)); 3957 Line->Tokens.clear(); 3958 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3959 Line->FirstStartColumn = 0; 3960 3961 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3962 --Line->Level; 3963 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3964 CurrentLines->append( 3965 std::make_move_iterator(PreprocessorDirectives.begin()), 3966 std::make_move_iterator(PreprocessorDirectives.end())); 3967 PreprocessorDirectives.clear(); 3968 } 3969 // Disconnect the current token from the last token on the previous line. 3970 FormatTok->Previous = nullptr; 3971 } 3972 3973 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 3974 3975 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3976 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3977 FormatTok.NewlinesBefore > 0; 3978 } 3979 3980 // Checks if \p FormatTok is a line comment that continues the line comment 3981 // section on \p Line. 3982 static bool 3983 continuesLineCommentSection(const FormatToken &FormatTok, 3984 const UnwrappedLine &Line, 3985 const llvm::Regex &CommentPragmasRegex) { 3986 if (Line.Tokens.empty()) 3987 return false; 3988 3989 StringRef IndentContent = FormatTok.TokenText; 3990 if (FormatTok.TokenText.startswith("//") || 3991 FormatTok.TokenText.startswith("/*")) { 3992 IndentContent = FormatTok.TokenText.substr(2); 3993 } 3994 if (CommentPragmasRegex.match(IndentContent)) 3995 return false; 3996 3997 // If Line starts with a line comment, then FormatTok continues the comment 3998 // section if its original column is greater or equal to the original start 3999 // column of the line. 4000 // 4001 // Define the min column token of a line as follows: if a line ends in '{' or 4002 // contains a '{' followed by a line comment, then the min column token is 4003 // that '{'. Otherwise, the min column token of the line is the first token of 4004 // the line. 4005 // 4006 // If Line starts with a token other than a line comment, then FormatTok 4007 // continues the comment section if its original column is greater than the 4008 // original start column of the min column token of the line. 4009 // 4010 // For example, the second line comment continues the first in these cases: 4011 // 4012 // // first line 4013 // // second line 4014 // 4015 // and: 4016 // 4017 // // first line 4018 // // second line 4019 // 4020 // and: 4021 // 4022 // int i; // first line 4023 // // second line 4024 // 4025 // and: 4026 // 4027 // do { // first line 4028 // // second line 4029 // int i; 4030 // } while (true); 4031 // 4032 // and: 4033 // 4034 // enum { 4035 // a, // first line 4036 // // second line 4037 // b 4038 // }; 4039 // 4040 // The second line comment doesn't continue the first in these cases: 4041 // 4042 // // first line 4043 // // second line 4044 // 4045 // and: 4046 // 4047 // int i; // first line 4048 // // second line 4049 // 4050 // and: 4051 // 4052 // do { // first line 4053 // // second line 4054 // int i; 4055 // } while (true); 4056 // 4057 // and: 4058 // 4059 // enum { 4060 // a, // first line 4061 // // second line 4062 // }; 4063 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4064 4065 // Scan for '{//'. If found, use the column of '{' as a min column for line 4066 // comment section continuation. 4067 const FormatToken *PreviousToken = nullptr; 4068 for (const UnwrappedLineNode &Node : Line.Tokens) { 4069 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4070 isLineComment(*Node.Tok)) { 4071 MinColumnToken = PreviousToken; 4072 break; 4073 } 4074 PreviousToken = Node.Tok; 4075 4076 // Grab the last newline preceding a token in this unwrapped line. 4077 if (Node.Tok->NewlinesBefore > 0) 4078 MinColumnToken = Node.Tok; 4079 } 4080 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4081 MinColumnToken = PreviousToken; 4082 4083 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4084 MinColumnToken); 4085 } 4086 4087 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4088 bool JustComments = Line->Tokens.empty(); 4089 for (FormatToken *Tok : CommentsBeforeNextToken) { 4090 // Line comments that belong to the same line comment section are put on the 4091 // same line since later we might want to reflow content between them. 4092 // Additional fine-grained breaking of line comment sections is controlled 4093 // by the class BreakableLineCommentSection in case it is desirable to keep 4094 // several line comment sections in the same unwrapped line. 4095 // 4096 // FIXME: Consider putting separate line comment sections as children to the 4097 // unwrapped line instead. 4098 Tok->ContinuesLineCommentSection = 4099 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4100 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4101 addUnwrappedLine(); 4102 pushToken(Tok); 4103 } 4104 if (NewlineBeforeNext && JustComments) 4105 addUnwrappedLine(); 4106 CommentsBeforeNextToken.clear(); 4107 } 4108 4109 void UnwrappedLineParser::nextToken(int LevelDifference) { 4110 if (eof()) 4111 return; 4112 flushComments(isOnNewLine(*FormatTok)); 4113 pushToken(FormatTok); 4114 FormatToken *Previous = FormatTok; 4115 if (!Style.isJavaScript()) 4116 readToken(LevelDifference); 4117 else 4118 readTokenWithJavaScriptASI(); 4119 FormatTok->Previous = Previous; 4120 } 4121 4122 void UnwrappedLineParser::distributeComments( 4123 const SmallVectorImpl<FormatToken *> &Comments, 4124 const FormatToken *NextTok) { 4125 // Whether or not a line comment token continues a line is controlled by 4126 // the method continuesLineCommentSection, with the following caveat: 4127 // 4128 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4129 // that each comment line from the trail is aligned with the next token, if 4130 // the next token exists. If a trail exists, the beginning of the maximal 4131 // trail is marked as a start of a new comment section. 4132 // 4133 // For example in this code: 4134 // 4135 // int a; // line about a 4136 // // line 1 about b 4137 // // line 2 about b 4138 // int b; 4139 // 4140 // the two lines about b form a maximal trail, so there are two sections, the 4141 // first one consisting of the single comment "// line about a" and the 4142 // second one consisting of the next two comments. 4143 if (Comments.empty()) 4144 return; 4145 bool ShouldPushCommentsInCurrentLine = true; 4146 bool HasTrailAlignedWithNextToken = false; 4147 unsigned StartOfTrailAlignedWithNextToken = 0; 4148 if (NextTok) { 4149 // We are skipping the first element intentionally. 4150 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4151 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4152 HasTrailAlignedWithNextToken = true; 4153 StartOfTrailAlignedWithNextToken = i; 4154 } 4155 } 4156 } 4157 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4158 FormatToken *FormatTok = Comments[i]; 4159 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4160 FormatTok->ContinuesLineCommentSection = false; 4161 } else { 4162 FormatTok->ContinuesLineCommentSection = 4163 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4164 } 4165 if (!FormatTok->ContinuesLineCommentSection && 4166 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4167 ShouldPushCommentsInCurrentLine = false; 4168 } 4169 if (ShouldPushCommentsInCurrentLine) 4170 pushToken(FormatTok); 4171 else 4172 CommentsBeforeNextToken.push_back(FormatTok); 4173 } 4174 } 4175 4176 void UnwrappedLineParser::readToken(int LevelDifference) { 4177 SmallVector<FormatToken *, 1> Comments; 4178 bool PreviousWasComment = false; 4179 bool FirstNonCommentOnLine = false; 4180 do { 4181 FormatTok = Tokens->getNextToken(); 4182 assert(FormatTok); 4183 while (FormatTok->getType() == TT_ConflictStart || 4184 FormatTok->getType() == TT_ConflictEnd || 4185 FormatTok->getType() == TT_ConflictAlternative) { 4186 if (FormatTok->getType() == TT_ConflictStart) 4187 conditionalCompilationStart(/*Unreachable=*/false); 4188 else if (FormatTok->getType() == TT_ConflictAlternative) 4189 conditionalCompilationAlternative(); 4190 else if (FormatTok->getType() == TT_ConflictEnd) 4191 conditionalCompilationEnd(); 4192 FormatTok = Tokens->getNextToken(); 4193 FormatTok->MustBreakBefore = true; 4194 } 4195 4196 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4197 const FormatToken &Tok, 4198 bool PreviousWasComment) { 4199 auto IsFirstOnLine = [](const FormatToken &Tok) { 4200 return Tok.HasUnescapedNewline || Tok.IsFirst; 4201 }; 4202 4203 // Consider preprocessor directives preceded by block comments as first 4204 // on line. 4205 if (PreviousWasComment) 4206 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4207 return IsFirstOnLine(Tok); 4208 }; 4209 4210 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4211 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4212 PreviousWasComment = FormatTok->is(tok::comment); 4213 4214 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4215 FirstNonCommentOnLine) { 4216 distributeComments(Comments, FormatTok); 4217 Comments.clear(); 4218 // If there is an unfinished unwrapped line, we flush the preprocessor 4219 // directives only after that unwrapped line was finished later. 4220 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4221 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4222 assert((LevelDifference >= 0 || 4223 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4224 "LevelDifference makes Line->Level negative"); 4225 Line->Level += LevelDifference; 4226 // Comments stored before the preprocessor directive need to be output 4227 // before the preprocessor directive, at the same level as the 4228 // preprocessor directive, as we consider them to apply to the directive. 4229 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4230 PPBranchLevel > 0) { 4231 Line->Level += PPBranchLevel; 4232 } 4233 flushComments(isOnNewLine(*FormatTok)); 4234 parsePPDirective(); 4235 PreviousWasComment = FormatTok->is(tok::comment); 4236 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4237 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4238 } 4239 4240 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4241 !Line->InPPDirective) { 4242 continue; 4243 } 4244 4245 if (!FormatTok->is(tok::comment)) { 4246 distributeComments(Comments, FormatTok); 4247 Comments.clear(); 4248 return; 4249 } 4250 4251 Comments.push_back(FormatTok); 4252 } while (!eof()); 4253 4254 distributeComments(Comments, nullptr); 4255 Comments.clear(); 4256 } 4257 4258 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4259 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4260 if (MustBreakBeforeNextToken) { 4261 Line->Tokens.back().Tok->MustBreakBefore = true; 4262 MustBreakBeforeNextToken = false; 4263 } 4264 } 4265 4266 } // end namespace format 4267 } // end namespace clang 4268