1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 #include <utility> 24 25 #define DEBUG_TYPE "format-parser" 26 27 namespace clang { 28 namespace format { 29 30 class FormatTokenSource { 31 public: 32 virtual ~FormatTokenSource() {} 33 34 // Returns the next token in the token stream. 35 virtual FormatToken *getNextToken() = 0; 36 37 // Returns the token preceding the token returned by the last call to 38 // getNextToken() in the token stream, or nullptr if no such token exists. 39 virtual FormatToken *getPreviousToken() = 0; 40 41 // Returns the token that would be returned by the next call to 42 // getNextToken(). 43 virtual FormatToken *peekNextToken() = 0; 44 45 // Returns the token that would be returned after the next N calls to 46 // getNextToken(). N needs to be greater than zero, and small enough that 47 // there are still tokens. Check for tok::eof with N-1 before calling it with 48 // N. 49 virtual FormatToken *peekNextToken(int N) = 0; 50 51 // Returns whether we are at the end of the file. 52 // This can be different from whether getNextToken() returned an eof token 53 // when the FormatTokenSource is a view on a part of the token stream. 54 virtual bool isEOF() = 0; 55 56 // Gets the current position in the token stream, to be used by setPosition(). 57 virtual unsigned getPosition() = 0; 58 59 // Resets the token stream to the state it was in when getPosition() returned 60 // Position, and return the token at that position in the stream. 61 virtual FormatToken *setPosition(unsigned Position) = 0; 62 }; 63 64 namespace { 65 66 class ScopedDeclarationState { 67 public: 68 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 69 bool MustBeDeclaration) 70 : Line(Line), Stack(Stack) { 71 Line.MustBeDeclaration = MustBeDeclaration; 72 Stack.push_back(MustBeDeclaration); 73 } 74 ~ScopedDeclarationState() { 75 Stack.pop_back(); 76 if (!Stack.empty()) 77 Line.MustBeDeclaration = Stack.back(); 78 else 79 Line.MustBeDeclaration = true; 80 } 81 82 private: 83 UnwrappedLine &Line; 84 llvm::BitVector &Stack; 85 }; 86 87 static bool isLineComment(const FormatToken &FormatTok) { 88 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 89 } 90 91 // Checks if \p FormatTok is a line comment that continues the line comment 92 // \p Previous. The original column of \p MinColumnToken is used to determine 93 // whether \p FormatTok is indented enough to the right to continue \p Previous. 94 static bool continuesLineComment(const FormatToken &FormatTok, 95 const FormatToken *Previous, 96 const FormatToken *MinColumnToken) { 97 if (!Previous || !MinColumnToken) 98 return false; 99 unsigned MinContinueColumn = 100 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 101 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 102 isLineComment(*Previous) && 103 FormatTok.OriginalColumn >= MinContinueColumn; 104 } 105 106 class ScopedMacroState : public FormatTokenSource { 107 public: 108 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 109 FormatToken *&ResetToken) 110 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 111 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 112 Token(nullptr), PreviousToken(nullptr) { 113 FakeEOF.Tok.startToken(); 114 FakeEOF.Tok.setKind(tok::eof); 115 TokenSource = this; 116 Line.Level = 0; 117 Line.InPPDirective = true; 118 } 119 120 ~ScopedMacroState() override { 121 TokenSource = PreviousTokenSource; 122 ResetToken = Token; 123 Line.InPPDirective = false; 124 Line.Level = PreviousLineLevel; 125 } 126 127 FormatToken *getNextToken() override { 128 // The \c UnwrappedLineParser guards against this by never calling 129 // \c getNextToken() after it has encountered the first eof token. 130 assert(!eof()); 131 PreviousToken = Token; 132 Token = PreviousTokenSource->getNextToken(); 133 if (eof()) 134 return &FakeEOF; 135 return Token; 136 } 137 138 FormatToken *getPreviousToken() override { 139 return PreviousTokenSource->getPreviousToken(); 140 } 141 142 FormatToken *peekNextToken() override { 143 if (eof()) 144 return &FakeEOF; 145 return PreviousTokenSource->peekNextToken(); 146 } 147 148 FormatToken *peekNextToken(int N) override { 149 assert(N > 0); 150 if (eof()) 151 return &FakeEOF; 152 return PreviousTokenSource->peekNextToken(N); 153 } 154 155 bool isEOF() override { return PreviousTokenSource->isEOF(); } 156 157 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 158 159 FormatToken *setPosition(unsigned Position) override { 160 PreviousToken = nullptr; 161 Token = PreviousTokenSource->setPosition(Position); 162 return Token; 163 } 164 165 private: 166 bool eof() { 167 return Token && Token->HasUnescapedNewline && 168 !continuesLineComment(*Token, PreviousToken, 169 /*MinColumnToken=*/PreviousToken); 170 } 171 172 FormatToken FakeEOF; 173 UnwrappedLine &Line; 174 FormatTokenSource *&TokenSource; 175 FormatToken *&ResetToken; 176 unsigned PreviousLineLevel; 177 FormatTokenSource *PreviousTokenSource; 178 179 FormatToken *Token; 180 FormatToken *PreviousToken; 181 }; 182 183 } // end anonymous namespace 184 185 class ScopedLineState { 186 public: 187 ScopedLineState(UnwrappedLineParser &Parser, 188 bool SwitchToPreprocessorLines = false) 189 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 190 if (SwitchToPreprocessorLines) 191 Parser.CurrentLines = &Parser.PreprocessorDirectives; 192 else if (!Parser.Line->Tokens.empty()) 193 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 194 PreBlockLine = std::move(Parser.Line); 195 Parser.Line = std::make_unique<UnwrappedLine>(); 196 Parser.Line->Level = PreBlockLine->Level; 197 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 198 } 199 200 ~ScopedLineState() { 201 if (!Parser.Line->Tokens.empty()) 202 Parser.addUnwrappedLine(); 203 assert(Parser.Line->Tokens.empty()); 204 Parser.Line = std::move(PreBlockLine); 205 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 206 Parser.MustBreakBeforeNextToken = true; 207 Parser.CurrentLines = OriginalLines; 208 } 209 210 private: 211 UnwrappedLineParser &Parser; 212 213 std::unique_ptr<UnwrappedLine> PreBlockLine; 214 SmallVectorImpl<UnwrappedLine> *OriginalLines; 215 }; 216 217 class CompoundStatementIndenter { 218 public: 219 CompoundStatementIndenter(UnwrappedLineParser *Parser, 220 const FormatStyle &Style, unsigned &LineLevel) 221 : CompoundStatementIndenter(Parser, LineLevel, 222 Style.BraceWrapping.AfterControlStatement, 223 Style.BraceWrapping.IndentBraces) {} 224 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 225 bool WrapBrace, bool IndentBrace) 226 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 227 if (WrapBrace) 228 Parser->addUnwrappedLine(); 229 if (IndentBrace) 230 ++LineLevel; 231 } 232 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 233 234 private: 235 unsigned &LineLevel; 236 unsigned OldLineLevel; 237 }; 238 239 namespace { 240 241 class IndexedTokenSource : public FormatTokenSource { 242 public: 243 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 244 : Tokens(Tokens), Position(-1) {} 245 246 FormatToken *getNextToken() override { 247 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 248 LLVM_DEBUG({ 249 llvm::dbgs() << "Next "; 250 dbgToken(Position); 251 }); 252 return Tokens[Position]; 253 } 254 ++Position; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Next "; 257 dbgToken(Position); 258 }); 259 return Tokens[Position]; 260 } 261 262 FormatToken *getPreviousToken() override { 263 return Position > 0 ? Tokens[Position - 1] : nullptr; 264 } 265 266 FormatToken *peekNextToken() override { 267 int Next = Position + 1; 268 LLVM_DEBUG({ 269 llvm::dbgs() << "Peeking "; 270 dbgToken(Next); 271 }); 272 return Tokens[Next]; 273 } 274 275 FormatToken *peekNextToken(int N) override { 276 assert(N > 0); 277 int Next = Position + N; 278 LLVM_DEBUG({ 279 llvm::dbgs() << "Peeking (+" << (N - 1) << ") "; 280 dbgToken(Next); 281 }); 282 return Tokens[Next]; 283 } 284 285 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 286 287 unsigned getPosition() override { 288 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 289 assert(Position >= 0); 290 return Position; 291 } 292 293 FormatToken *setPosition(unsigned P) override { 294 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 295 Position = P; 296 return Tokens[Position]; 297 } 298 299 void reset() { Position = -1; } 300 301 private: 302 void dbgToken(int Position, llvm::StringRef Indent = "") { 303 FormatToken *Tok = Tokens[Position]; 304 llvm::dbgs() << Indent << "[" << Position 305 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 306 << ", Macro: " << !!Tok->MacroCtx << "\n"; 307 } 308 309 ArrayRef<FormatToken *> Tokens; 310 int Position; 311 }; 312 313 } // end anonymous namespace 314 315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 316 const AdditionalKeywords &Keywords, 317 unsigned FirstStartColumn, 318 ArrayRef<FormatToken *> Tokens, 319 UnwrappedLineConsumer &Callback) 320 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 321 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 322 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 323 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 324 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 325 ? IG_Rejected 326 : IG_Inited), 327 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 328 329 void UnwrappedLineParser::reset() { 330 PPBranchLevel = -1; 331 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 332 ? IG_Rejected 333 : IG_Inited; 334 IncludeGuardToken = nullptr; 335 Line.reset(new UnwrappedLine); 336 CommentsBeforeNextToken.clear(); 337 FormatTok = nullptr; 338 MustBreakBeforeNextToken = false; 339 PreprocessorDirectives.clear(); 340 CurrentLines = &Lines; 341 DeclarationScopeStack.clear(); 342 NestedTooDeep.clear(); 343 PPStack.clear(); 344 Line->FirstStartColumn = FirstStartColumn; 345 } 346 347 void UnwrappedLineParser::parse() { 348 IndexedTokenSource TokenSource(AllTokens); 349 Line->FirstStartColumn = FirstStartColumn; 350 do { 351 LLVM_DEBUG(llvm::dbgs() << "----\n"); 352 reset(); 353 Tokens = &TokenSource; 354 TokenSource.reset(); 355 356 readToken(); 357 parseFile(); 358 359 // If we found an include guard then all preprocessor directives (other than 360 // the guard) are over-indented by one. 361 if (IncludeGuard == IG_Found) { 362 for (auto &Line : Lines) 363 if (Line.InPPDirective && Line.Level > 0) 364 --Line.Level; 365 } 366 367 // Create line with eof token. 368 pushToken(FormatTok); 369 addUnwrappedLine(); 370 371 for (const UnwrappedLine &Line : Lines) 372 Callback.consumeUnwrappedLine(Line); 373 374 Callback.finishRun(); 375 Lines.clear(); 376 while (!PPLevelBranchIndex.empty() && 377 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 378 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 379 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 380 } 381 if (!PPLevelBranchIndex.empty()) { 382 ++PPLevelBranchIndex.back(); 383 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 384 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 385 } 386 } while (!PPLevelBranchIndex.empty()); 387 } 388 389 void UnwrappedLineParser::parseFile() { 390 // The top-level context in a file always has declarations, except for pre- 391 // processor directives and JavaScript files. 392 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 393 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 394 MustBeDeclaration); 395 if (Style.Language == FormatStyle::LK_TextProto) 396 parseBracedList(); 397 else 398 parseLevel(); 399 // Make sure to format the remaining tokens. 400 // 401 // LK_TextProto is special since its top-level is parsed as the body of a 402 // braced list, which does not necessarily have natural line separators such 403 // as a semicolon. Comments after the last entry that have been determined to 404 // not belong to that line, as in: 405 // key: value 406 // // endfile comment 407 // do not have a chance to be put on a line of their own until this point. 408 // Here we add this newline before end-of-file comments. 409 if (Style.Language == FormatStyle::LK_TextProto && 410 !CommentsBeforeNextToken.empty()) { 411 addUnwrappedLine(); 412 } 413 flushComments(true); 414 addUnwrappedLine(); 415 } 416 417 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 418 do { 419 switch (FormatTok->Tok.getKind()) { 420 case tok::l_brace: 421 return; 422 default: 423 if (FormatTok->is(Keywords.kw_where)) { 424 addUnwrappedLine(); 425 nextToken(); 426 parseCSharpGenericTypeConstraint(); 427 break; 428 } 429 nextToken(); 430 break; 431 } 432 } while (!eof()); 433 } 434 435 void UnwrappedLineParser::parseCSharpAttribute() { 436 int UnpairedSquareBrackets = 1; 437 do { 438 switch (FormatTok->Tok.getKind()) { 439 case tok::r_square: 440 nextToken(); 441 --UnpairedSquareBrackets; 442 if (UnpairedSquareBrackets == 0) { 443 addUnwrappedLine(); 444 return; 445 } 446 break; 447 case tok::l_square: 448 ++UnpairedSquareBrackets; 449 nextToken(); 450 break; 451 default: 452 nextToken(); 453 break; 454 } 455 } while (!eof()); 456 } 457 458 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 459 if (!Lines.empty() && Lines.back().InPPDirective) 460 return true; 461 462 const FormatToken *Previous = Tokens->getPreviousToken(); 463 return Previous && Previous->is(tok::comment) && 464 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 465 } 466 467 /// \brief Parses a level, that is ???. 468 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 469 /// \param CanContainBracedList If the content can contain (at any level) a 470 /// braced list. 471 /// \param NextLBracesType The type for left brace found in this level. 472 /// \param IfKind The \p if statement kind in the level. 473 /// \param IfLeftBrace The left brace of the \p if block in the level. 474 /// \returns true if a simple block of if/else/for/while, or false otherwise. 475 /// (A simple block has a single statement.) 476 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 477 bool CanContainBracedList, 478 TokenType NextLBracesType, 479 IfStmtKind *IfKind, 480 FormatToken **IfLeftBrace) { 481 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 482 ? TT_BracedListLBrace 483 : TT_Unknown; 484 const bool IsPrecededByCommentOrPPDirective = 485 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 486 FormatToken *IfLBrace = nullptr; 487 bool HasDoWhile = false; 488 bool HasLabel = false; 489 unsigned StatementCount = 0; 490 bool SwitchLabelEncountered = false; 491 492 do { 493 if (FormatTok->getType() == TT_AttributeMacro) { 494 nextToken(); 495 continue; 496 } 497 tok::TokenKind kind = FormatTok->Tok.getKind(); 498 if (FormatTok->getType() == TT_MacroBlockBegin) 499 kind = tok::l_brace; 500 else if (FormatTok->getType() == TT_MacroBlockEnd) 501 kind = tok::r_brace; 502 503 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind, 504 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { 505 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind, 506 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile, 507 HasLabel ? nullptr : &HasLabel); 508 ++StatementCount; 509 assert(StatementCount > 0 && "StatementCount overflow!"); 510 }; 511 512 switch (kind) { 513 case tok::comment: 514 nextToken(); 515 addUnwrappedLine(); 516 break; 517 case tok::l_brace: 518 if (NextLBracesType != TT_Unknown) { 519 FormatTok->setFinalizedType(NextLBracesType); 520 } else if (FormatTok->Previous && 521 FormatTok->Previous->ClosesRequiresClause) { 522 // We need the 'default' case here to correctly parse a function 523 // l_brace. 524 ParseDefault(); 525 continue; 526 } 527 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 528 tryToParseBracedList()) { 529 continue; 530 } 531 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 532 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr, 533 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 534 NextLBracesType); 535 ++StatementCount; 536 assert(StatementCount > 0 && "StatementCount overflow!"); 537 addUnwrappedLine(); 538 break; 539 case tok::r_brace: 540 if (OpeningBrace) { 541 if (!Style.RemoveBracesLLVM || 542 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 543 return false; 544 } 545 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 546 HasDoWhile || IsPrecededByCommentOrPPDirective || 547 precededByCommentOrPPDirective()) { 548 return false; 549 } 550 const FormatToken *Next = Tokens->peekNextToken(); 551 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 552 return false; 553 if (IfLeftBrace) 554 *IfLeftBrace = IfLBrace; 555 return true; 556 } 557 nextToken(); 558 addUnwrappedLine(); 559 break; 560 case tok::kw_default: { 561 unsigned StoredPosition = Tokens->getPosition(); 562 FormatToken *Next; 563 do { 564 Next = Tokens->getNextToken(); 565 assert(Next); 566 } while (Next->is(tok::comment)); 567 FormatTok = Tokens->setPosition(StoredPosition); 568 if (Next->isNot(tok::colon)) { 569 // default not followed by ':' is not a case label; treat it like 570 // an identifier. 571 parseStructuralElement(); 572 break; 573 } 574 // Else, if it is 'default:', fall through to the case handling. 575 LLVM_FALLTHROUGH; 576 } 577 case tok::kw_case: 578 if (Style.isJavaScript() && Line->MustBeDeclaration) { 579 // A 'case: string' style field declaration. 580 parseStructuralElement(); 581 break; 582 } 583 if (!SwitchLabelEncountered && 584 (Style.IndentCaseLabels || 585 (Line->InPPDirective && Line->Level == 1))) { 586 ++Line->Level; 587 } 588 SwitchLabelEncountered = true; 589 parseStructuralElement(); 590 break; 591 case tok::l_square: 592 if (Style.isCSharp()) { 593 nextToken(); 594 parseCSharpAttribute(); 595 break; 596 } 597 if (handleCppAttributes()) 598 break; 599 LLVM_FALLTHROUGH; 600 default: 601 ParseDefault(); 602 break; 603 } 604 } while (!eof()); 605 606 return false; 607 } 608 609 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 610 // We'll parse forward through the tokens until we hit 611 // a closing brace or eof - note that getNextToken() will 612 // parse macros, so this will magically work inside macro 613 // definitions, too. 614 unsigned StoredPosition = Tokens->getPosition(); 615 FormatToken *Tok = FormatTok; 616 const FormatToken *PrevTok = Tok->Previous; 617 // Keep a stack of positions of lbrace tokens. We will 618 // update information about whether an lbrace starts a 619 // braced init list or a different block during the loop. 620 SmallVector<FormatToken *, 8> LBraceStack; 621 assert(Tok->is(tok::l_brace)); 622 do { 623 // Get next non-comment token. 624 FormatToken *NextTok; 625 do { 626 NextTok = Tokens->getNextToken(); 627 } while (NextTok->is(tok::comment)); 628 629 switch (Tok->Tok.getKind()) { 630 case tok::l_brace: 631 if (Style.isJavaScript() && PrevTok) { 632 if (PrevTok->isOneOf(tok::colon, tok::less)) { 633 // A ':' indicates this code is in a type, or a braced list 634 // following a label in an object literal ({a: {b: 1}}). 635 // A '<' could be an object used in a comparison, but that is nonsense 636 // code (can never return true), so more likely it is a generic type 637 // argument (`X<{a: string; b: number}>`). 638 // The code below could be confused by semicolons between the 639 // individual members in a type member list, which would normally 640 // trigger BK_Block. In both cases, this must be parsed as an inline 641 // braced init. 642 Tok->setBlockKind(BK_BracedInit); 643 } else if (PrevTok->is(tok::r_paren)) { 644 // `) { }` can only occur in function or method declarations in JS. 645 Tok->setBlockKind(BK_Block); 646 } 647 } else { 648 Tok->setBlockKind(BK_Unknown); 649 } 650 LBraceStack.push_back(Tok); 651 break; 652 case tok::r_brace: 653 if (LBraceStack.empty()) 654 break; 655 if (LBraceStack.back()->is(BK_Unknown)) { 656 bool ProbablyBracedList = false; 657 if (Style.Language == FormatStyle::LK_Proto) { 658 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 659 } else { 660 // Skip NextTok over preprocessor lines, otherwise we may not 661 // properly diagnose the block as a braced intializer 662 // if the comma separator appears after the pp directive. 663 while (NextTok->is(tok::hash)) { 664 ScopedMacroState MacroState(*Line, Tokens, NextTok); 665 do { 666 NextTok = Tokens->getNextToken(); 667 } while (NextTok->isNot(tok::eof)); 668 } 669 670 // Using OriginalColumn to distinguish between ObjC methods and 671 // binary operators is a bit hacky. 672 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 673 NextTok->OriginalColumn == 0; 674 675 // Try to detect a braced list. Note that regardless how we mark inner 676 // braces here, we will overwrite the BlockKind later if we parse a 677 // braced list (where all blocks inside are by default braced lists), 678 // or when we explicitly detect blocks (for example while parsing 679 // lambdas). 680 681 // If we already marked the opening brace as braced list, the closing 682 // must also be part of it. 683 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace); 684 685 ProbablyBracedList = ProbablyBracedList || 686 (Style.isJavaScript() && 687 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 688 Keywords.kw_as)); 689 ProbablyBracedList = ProbablyBracedList || 690 (Style.isCpp() && NextTok->is(tok::l_paren)); 691 692 // If there is a comma, semicolon or right paren after the closing 693 // brace, we assume this is a braced initializer list. 694 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 695 // braced list in JS. 696 ProbablyBracedList = 697 ProbablyBracedList || 698 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 699 tok::r_paren, tok::r_square, tok::l_brace, 700 tok::ellipsis); 701 702 ProbablyBracedList = 703 ProbablyBracedList || 704 (NextTok->is(tok::identifier) && 705 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 706 707 ProbablyBracedList = ProbablyBracedList || 708 (NextTok->is(tok::semi) && 709 (!ExpectClassBody || LBraceStack.size() != 1)); 710 711 ProbablyBracedList = 712 ProbablyBracedList || 713 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 714 715 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 716 // We can have an array subscript after a braced init 717 // list, but C++11 attributes are expected after blocks. 718 NextTok = Tokens->getNextToken(); 719 ProbablyBracedList = NextTok->isNot(tok::l_square); 720 } 721 } 722 if (ProbablyBracedList) { 723 Tok->setBlockKind(BK_BracedInit); 724 LBraceStack.back()->setBlockKind(BK_BracedInit); 725 } else { 726 Tok->setBlockKind(BK_Block); 727 LBraceStack.back()->setBlockKind(BK_Block); 728 } 729 } 730 LBraceStack.pop_back(); 731 break; 732 case tok::identifier: 733 if (!Tok->is(TT_StatementMacro)) 734 break; 735 LLVM_FALLTHROUGH; 736 case tok::at: 737 case tok::semi: 738 case tok::kw_if: 739 case tok::kw_while: 740 case tok::kw_for: 741 case tok::kw_switch: 742 case tok::kw_try: 743 case tok::kw___try: 744 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 745 LBraceStack.back()->setBlockKind(BK_Block); 746 break; 747 default: 748 break; 749 } 750 PrevTok = Tok; 751 Tok = NextTok; 752 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 753 754 // Assume other blocks for all unclosed opening braces. 755 for (FormatToken *LBrace : LBraceStack) 756 if (LBrace->is(BK_Unknown)) 757 LBrace->setBlockKind(BK_Block); 758 759 FormatTok = Tokens->setPosition(StoredPosition); 760 } 761 762 template <class T> 763 static inline void hash_combine(std::size_t &seed, const T &v) { 764 std::hash<T> hasher; 765 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 766 } 767 768 size_t UnwrappedLineParser::computePPHash() const { 769 size_t h = 0; 770 for (const auto &i : PPStack) { 771 hash_combine(h, size_t(i.Kind)); 772 hash_combine(h, i.Line); 773 } 774 return h; 775 } 776 777 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 778 // is not null, subtracts its length (plus the preceding space) when computing 779 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 780 // running the token annotator on it so that we can restore them afterward. 781 bool UnwrappedLineParser::mightFitOnOneLine( 782 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 783 const auto ColumnLimit = Style.ColumnLimit; 784 if (ColumnLimit == 0) 785 return true; 786 787 auto &Tokens = ParsedLine.Tokens; 788 assert(!Tokens.empty()); 789 790 const auto *LastToken = Tokens.back().Tok; 791 assert(LastToken); 792 793 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 794 795 int Index = 0; 796 for (const auto &Token : Tokens) { 797 assert(Token.Tok); 798 auto &SavedToken = SavedTokens[Index++]; 799 SavedToken.Tok = new FormatToken; 800 SavedToken.Tok->copyFrom(*Token.Tok); 801 SavedToken.Children = std::move(Token.Children); 802 } 803 804 AnnotatedLine Line(ParsedLine); 805 assert(Line.Last == LastToken); 806 807 TokenAnnotator Annotator(Style, Keywords); 808 Annotator.annotate(Line); 809 Annotator.calculateFormattingInformation(Line); 810 811 auto Length = LastToken->TotalLength; 812 if (OpeningBrace) { 813 assert(OpeningBrace != Tokens.front().Tok); 814 Length -= OpeningBrace->TokenText.size() + 1; 815 } 816 817 Index = 0; 818 for (auto &Token : Tokens) { 819 const auto &SavedToken = SavedTokens[Index++]; 820 Token.Tok->copyFrom(*SavedToken.Tok); 821 Token.Children = std::move(SavedToken.Children); 822 delete SavedToken.Tok; 823 } 824 825 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 826 } 827 828 FormatToken *UnwrappedLineParser::parseBlock( 829 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 830 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces, 831 bool CanContainBracedList, TokenType NextLBracesType) { 832 auto HandleVerilogBlockLabel = [this]() { 833 // ":" name 834 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 835 nextToken(); 836 if (Keywords.isVerilogIdentifier(*FormatTok)) 837 nextToken(); 838 } 839 }; 840 841 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 842 (Style.isVerilog() && Keywords.isVerilogBegin(*FormatTok))) && 843 "'{' or macro block token expected"); 844 FormatToken *Tok = FormatTok; 845 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 846 auto Index = CurrentLines->size(); 847 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 848 FormatTok->setBlockKind(BK_Block); 849 850 // For Whitesmiths mode, jump to the next level prior to skipping over the 851 // braces. 852 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 853 ++Line->Level; 854 855 size_t PPStartHash = computePPHash(); 856 857 const unsigned InitialLevel = Line->Level; 858 nextToken(/*LevelDifference=*/AddLevels); 859 HandleVerilogBlockLabel(); 860 861 // Bail out if there are too many levels. Otherwise, the stack might overflow. 862 if (Line->Level > 300) 863 return nullptr; 864 865 if (MacroBlock && FormatTok->is(tok::l_paren)) 866 parseParens(); 867 868 size_t NbPreprocessorDirectives = 869 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 870 addUnwrappedLine(); 871 size_t OpeningLineIndex = 872 CurrentLines->empty() 873 ? (UnwrappedLine::kInvalidIndex) 874 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 875 876 // Whitesmiths is weird here. The brace needs to be indented for the namespace 877 // block, but the block itself may not be indented depending on the style 878 // settings. This allows the format to back up one level in those cases. 879 if (UnindentWhitesmithsBraces) 880 --Line->Level; 881 882 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 883 MustBeDeclaration); 884 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 885 Line->Level += AddLevels; 886 887 FormatToken *IfLBrace = nullptr; 888 const bool SimpleBlock = 889 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace); 890 891 if (eof()) 892 return IfLBrace; 893 894 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 895 : !FormatTok->is(tok::r_brace)) { 896 Line->Level = InitialLevel; 897 FormatTok->setBlockKind(BK_Block); 898 return IfLBrace; 899 } 900 901 auto RemoveBraces = [=]() mutable { 902 if (!SimpleBlock) 903 return false; 904 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 905 assert(FormatTok->is(tok::r_brace)); 906 const bool WrappedOpeningBrace = !Tok->Previous; 907 if (WrappedOpeningBrace && FollowedByComment) 908 return false; 909 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 910 if (KeepBraces && !HasRequiredIfBraces) 911 return false; 912 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 913 const FormatToken *Previous = Tokens->getPreviousToken(); 914 assert(Previous); 915 if (Previous->is(tok::r_brace) && !Previous->Optional) 916 return false; 917 } 918 assert(!CurrentLines->empty()); 919 auto &LastLine = CurrentLines->back(); 920 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 921 return false; 922 if (Tok->is(TT_ElseLBrace)) 923 return true; 924 if (WrappedOpeningBrace) { 925 assert(Index > 0); 926 --Index; // The line above the wrapped l_brace. 927 Tok = nullptr; 928 } 929 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 930 }; 931 if (RemoveBraces()) { 932 Tok->MatchingParen = FormatTok; 933 FormatTok->MatchingParen = Tok; 934 } 935 936 size_t PPEndHash = computePPHash(); 937 938 // Munch the closing brace. 939 nextToken(/*LevelDifference=*/-AddLevels); 940 HandleVerilogBlockLabel(); 941 942 if (MacroBlock && FormatTok->is(tok::l_paren)) 943 parseParens(); 944 945 if (FormatTok->is(tok::kw_noexcept)) { 946 // A noexcept in a requires expression. 947 nextToken(); 948 } 949 950 if (FormatTok->is(tok::arrow)) { 951 // Following the } or noexcept we can find a trailing return type arrow 952 // as part of an implicit conversion constraint. 953 nextToken(); 954 parseStructuralElement(); 955 } 956 957 if (MunchSemi && FormatTok->is(tok::semi)) 958 nextToken(); 959 960 Line->Level = InitialLevel; 961 962 if (PPStartHash == PPEndHash) { 963 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 964 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 965 // Update the opening line to add the forward reference as well 966 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 967 CurrentLines->size() - 1; 968 } 969 } 970 971 return IfLBrace; 972 } 973 974 static bool isGoogScope(const UnwrappedLine &Line) { 975 // FIXME: Closure-library specific stuff should not be hard-coded but be 976 // configurable. 977 if (Line.Tokens.size() < 4) 978 return false; 979 auto I = Line.Tokens.begin(); 980 if (I->Tok->TokenText != "goog") 981 return false; 982 ++I; 983 if (I->Tok->isNot(tok::period)) 984 return false; 985 ++I; 986 if (I->Tok->TokenText != "scope") 987 return false; 988 ++I; 989 return I->Tok->is(tok::l_paren); 990 } 991 992 static bool isIIFE(const UnwrappedLine &Line, 993 const AdditionalKeywords &Keywords) { 994 // Look for the start of an immediately invoked anonymous function. 995 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 996 // This is commonly done in JavaScript to create a new, anonymous scope. 997 // Example: (function() { ... })() 998 if (Line.Tokens.size() < 3) 999 return false; 1000 auto I = Line.Tokens.begin(); 1001 if (I->Tok->isNot(tok::l_paren)) 1002 return false; 1003 ++I; 1004 if (I->Tok->isNot(Keywords.kw_function)) 1005 return false; 1006 ++I; 1007 return I->Tok->is(tok::l_paren); 1008 } 1009 1010 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 1011 const FormatToken &InitialToken) { 1012 tok::TokenKind Kind = InitialToken.Tok.getKind(); 1013 if (InitialToken.is(TT_NamespaceMacro)) 1014 Kind = tok::kw_namespace; 1015 1016 switch (Kind) { 1017 case tok::kw_namespace: 1018 return Style.BraceWrapping.AfterNamespace; 1019 case tok::kw_class: 1020 return Style.BraceWrapping.AfterClass; 1021 case tok::kw_union: 1022 return Style.BraceWrapping.AfterUnion; 1023 case tok::kw_struct: 1024 return Style.BraceWrapping.AfterStruct; 1025 case tok::kw_enum: 1026 return Style.BraceWrapping.AfterEnum; 1027 default: 1028 return false; 1029 } 1030 } 1031 1032 void UnwrappedLineParser::parseChildBlock( 1033 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 1034 assert(FormatTok->is(tok::l_brace)); 1035 FormatTok->setBlockKind(BK_Block); 1036 const FormatToken *OpeningBrace = FormatTok; 1037 nextToken(); 1038 { 1039 bool SkipIndent = (Style.isJavaScript() && 1040 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 1041 ScopedLineState LineState(*this); 1042 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 1043 /*MustBeDeclaration=*/false); 1044 Line->Level += SkipIndent ? 0 : 1; 1045 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType); 1046 flushComments(isOnNewLine(*FormatTok)); 1047 Line->Level -= SkipIndent ? 0 : 1; 1048 } 1049 nextToken(); 1050 } 1051 1052 void UnwrappedLineParser::parsePPDirective() { 1053 assert(FormatTok->is(tok::hash) && "'#' expected"); 1054 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1055 1056 nextToken(); 1057 1058 if (!FormatTok->Tok.getIdentifierInfo()) { 1059 parsePPUnknown(); 1060 return; 1061 } 1062 1063 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1064 case tok::pp_define: 1065 parsePPDefine(); 1066 return; 1067 case tok::pp_if: 1068 parsePPIf(/*IfDef=*/false); 1069 break; 1070 case tok::pp_ifdef: 1071 case tok::pp_ifndef: 1072 parsePPIf(/*IfDef=*/true); 1073 break; 1074 case tok::pp_else: 1075 parsePPElse(); 1076 break; 1077 case tok::pp_elifdef: 1078 case tok::pp_elifndef: 1079 case tok::pp_elif: 1080 parsePPElIf(); 1081 break; 1082 case tok::pp_endif: 1083 parsePPEndIf(); 1084 break; 1085 default: 1086 parsePPUnknown(); 1087 break; 1088 } 1089 } 1090 1091 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1092 size_t Line = CurrentLines->size(); 1093 if (CurrentLines == &PreprocessorDirectives) 1094 Line += Lines.size(); 1095 1096 if (Unreachable || 1097 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1098 PPStack.push_back({PP_Unreachable, Line}); 1099 } else { 1100 PPStack.push_back({PP_Conditional, Line}); 1101 } 1102 } 1103 1104 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1105 ++PPBranchLevel; 1106 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1107 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1108 PPLevelBranchIndex.push_back(0); 1109 PPLevelBranchCount.push_back(0); 1110 } 1111 PPChainBranchIndex.push(0); 1112 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1113 conditionalCompilationCondition(Unreachable || Skip); 1114 } 1115 1116 void UnwrappedLineParser::conditionalCompilationAlternative() { 1117 if (!PPStack.empty()) 1118 PPStack.pop_back(); 1119 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1120 if (!PPChainBranchIndex.empty()) 1121 ++PPChainBranchIndex.top(); 1122 conditionalCompilationCondition( 1123 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1124 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1125 } 1126 1127 void UnwrappedLineParser::conditionalCompilationEnd() { 1128 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1129 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1130 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1131 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1132 } 1133 // Guard against #endif's without #if. 1134 if (PPBranchLevel > -1) 1135 --PPBranchLevel; 1136 if (!PPChainBranchIndex.empty()) 1137 PPChainBranchIndex.pop(); 1138 if (!PPStack.empty()) 1139 PPStack.pop_back(); 1140 } 1141 1142 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1143 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1144 nextToken(); 1145 bool Unreachable = false; 1146 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1147 Unreachable = true; 1148 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1149 Unreachable = true; 1150 conditionalCompilationStart(Unreachable); 1151 FormatToken *IfCondition = FormatTok; 1152 // If there's a #ifndef on the first line, and the only lines before it are 1153 // comments, it could be an include guard. 1154 bool MaybeIncludeGuard = IfNDef; 1155 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1156 for (auto &Line : Lines) { 1157 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1158 MaybeIncludeGuard = false; 1159 IncludeGuard = IG_Rejected; 1160 break; 1161 } 1162 } 1163 } 1164 --PPBranchLevel; 1165 parsePPUnknown(); 1166 ++PPBranchLevel; 1167 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1168 IncludeGuard = IG_IfNdefed; 1169 IncludeGuardToken = IfCondition; 1170 } 1171 } 1172 1173 void UnwrappedLineParser::parsePPElse() { 1174 // If a potential include guard has an #else, it's not an include guard. 1175 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1176 IncludeGuard = IG_Rejected; 1177 conditionalCompilationAlternative(); 1178 if (PPBranchLevel > -1) 1179 --PPBranchLevel; 1180 parsePPUnknown(); 1181 ++PPBranchLevel; 1182 } 1183 1184 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1185 1186 void UnwrappedLineParser::parsePPEndIf() { 1187 conditionalCompilationEnd(); 1188 parsePPUnknown(); 1189 // If the #endif of a potential include guard is the last thing in the file, 1190 // then we found an include guard. 1191 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1192 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1193 IncludeGuard = IG_Found; 1194 } 1195 } 1196 1197 void UnwrappedLineParser::parsePPDefine() { 1198 nextToken(); 1199 1200 if (!FormatTok->Tok.getIdentifierInfo()) { 1201 IncludeGuard = IG_Rejected; 1202 IncludeGuardToken = nullptr; 1203 parsePPUnknown(); 1204 return; 1205 } 1206 1207 if (IncludeGuard == IG_IfNdefed && 1208 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1209 IncludeGuard = IG_Defined; 1210 IncludeGuardToken = nullptr; 1211 for (auto &Line : Lines) { 1212 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1213 IncludeGuard = IG_Rejected; 1214 break; 1215 } 1216 } 1217 } 1218 1219 // In the context of a define, even keywords should be treated as normal 1220 // identifiers. Setting the kind to identifier is not enough, because we need 1221 // to treat additional keywords like __except as well, which are already 1222 // identifiers. Setting the identifier info to null interferes with include 1223 // guard processing above, and changes preprocessing nesting. 1224 FormatTok->Tok.setKind(tok::identifier); 1225 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1226 nextToken(); 1227 if (FormatTok->Tok.getKind() == tok::l_paren && 1228 !FormatTok->hasWhitespaceBefore()) { 1229 parseParens(); 1230 } 1231 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1232 Line->Level += PPBranchLevel + 1; 1233 addUnwrappedLine(); 1234 ++Line->Level; 1235 1236 // Errors during a preprocessor directive can only affect the layout of the 1237 // preprocessor directive, and thus we ignore them. An alternative approach 1238 // would be to use the same approach we use on the file level (no 1239 // re-indentation if there was a structural error) within the macro 1240 // definition. 1241 parseFile(); 1242 } 1243 1244 void UnwrappedLineParser::parsePPUnknown() { 1245 do { 1246 nextToken(); 1247 } while (!eof()); 1248 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1249 Line->Level += PPBranchLevel + 1; 1250 addUnwrappedLine(); 1251 } 1252 1253 // Here we exclude certain tokens that are not usually the first token in an 1254 // unwrapped line. This is used in attempt to distinguish macro calls without 1255 // trailing semicolons from other constructs split to several lines. 1256 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1257 // Semicolon can be a null-statement, l_square can be a start of a macro or 1258 // a C++11 attribute, but this doesn't seem to be common. 1259 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1260 Tok.isNot(TT_AttributeSquare) && 1261 // Tokens that can only be used as binary operators and a part of 1262 // overloaded operator names. 1263 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1264 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1265 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1266 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1267 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1268 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1269 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1270 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1271 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1272 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1273 Tok.isNot(tok::lesslessequal) && 1274 // Colon is used in labels, base class lists, initializer lists, 1275 // range-based for loops, ternary operator, but should never be the 1276 // first token in an unwrapped line. 1277 Tok.isNot(tok::colon) && 1278 // 'noexcept' is a trailing annotation. 1279 Tok.isNot(tok::kw_noexcept); 1280 } 1281 1282 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1283 const FormatToken *FormatTok) { 1284 // FIXME: This returns true for C/C++ keywords like 'struct'. 1285 return FormatTok->is(tok::identifier) && 1286 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1287 !FormatTok->isOneOf( 1288 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1289 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1290 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1291 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1292 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1293 Keywords.kw_instanceof, Keywords.kw_interface, 1294 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1295 } 1296 1297 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1298 const FormatToken *FormatTok) { 1299 return FormatTok->Tok.isLiteral() || 1300 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1301 mustBeJSIdent(Keywords, FormatTok); 1302 } 1303 1304 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1305 // when encountered after a value (see mustBeJSIdentOrValue). 1306 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1307 const FormatToken *FormatTok) { 1308 return FormatTok->isOneOf( 1309 tok::kw_return, Keywords.kw_yield, 1310 // conditionals 1311 tok::kw_if, tok::kw_else, 1312 // loops 1313 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1314 // switch/case 1315 tok::kw_switch, tok::kw_case, 1316 // exceptions 1317 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1318 // declaration 1319 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1320 Keywords.kw_async, Keywords.kw_function, 1321 // import/export 1322 Keywords.kw_import, tok::kw_export); 1323 } 1324 1325 // Checks whether a token is a type in K&R C (aka C78). 1326 static bool isC78Type(const FormatToken &Tok) { 1327 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1328 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1329 tok::identifier); 1330 } 1331 1332 // This function checks whether a token starts the first parameter declaration 1333 // in a K&R C (aka C78) function definition, e.g.: 1334 // int f(a, b) 1335 // short a, b; 1336 // { 1337 // return a + b; 1338 // } 1339 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1340 const FormatToken *FuncName) { 1341 assert(Tok); 1342 assert(Next); 1343 assert(FuncName); 1344 1345 if (FuncName->isNot(tok::identifier)) 1346 return false; 1347 1348 const FormatToken *Prev = FuncName->Previous; 1349 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1350 return false; 1351 1352 if (!isC78Type(*Tok) && 1353 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1354 return false; 1355 } 1356 1357 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1358 return false; 1359 1360 Tok = Tok->Previous; 1361 if (!Tok || Tok->isNot(tok::r_paren)) 1362 return false; 1363 1364 Tok = Tok->Previous; 1365 if (!Tok || Tok->isNot(tok::identifier)) 1366 return false; 1367 1368 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1369 } 1370 1371 void UnwrappedLineParser::parseModuleImport() { 1372 nextToken(); 1373 while (!eof()) { 1374 if (FormatTok->is(tok::colon)) { 1375 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1376 } 1377 // Handle import <foo/bar.h> as we would an include statement. 1378 else if (FormatTok->is(tok::less)) { 1379 nextToken(); 1380 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1381 // Mark tokens up to the trailing line comments as implicit string 1382 // literals. 1383 if (FormatTok->isNot(tok::comment) && 1384 !FormatTok->TokenText.startswith("//")) { 1385 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1386 } 1387 nextToken(); 1388 } 1389 } 1390 if (FormatTok->is(tok::semi)) { 1391 nextToken(); 1392 break; 1393 } 1394 nextToken(); 1395 } 1396 1397 addUnwrappedLine(); 1398 } 1399 1400 // readTokenWithJavaScriptASI reads the next token and terminates the current 1401 // line if JavaScript Automatic Semicolon Insertion must 1402 // happen between the current token and the next token. 1403 // 1404 // This method is conservative - it cannot cover all edge cases of JavaScript, 1405 // but only aims to correctly handle certain well known cases. It *must not* 1406 // return true in speculative cases. 1407 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1408 FormatToken *Previous = FormatTok; 1409 readToken(); 1410 FormatToken *Next = FormatTok; 1411 1412 bool IsOnSameLine = 1413 CommentsBeforeNextToken.empty() 1414 ? Next->NewlinesBefore == 0 1415 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1416 if (IsOnSameLine) 1417 return; 1418 1419 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1420 bool PreviousStartsTemplateExpr = 1421 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1422 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1423 // If the line contains an '@' sign, the previous token might be an 1424 // annotation, which can precede another identifier/value. 1425 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1426 return LineNode.Tok->is(tok::at); 1427 }); 1428 if (HasAt) 1429 return; 1430 } 1431 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1432 return addUnwrappedLine(); 1433 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1434 bool NextEndsTemplateExpr = 1435 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1436 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1437 (PreviousMustBeValue || 1438 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1439 tok::minusminus))) { 1440 return addUnwrappedLine(); 1441 } 1442 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1443 isJSDeclOrStmt(Keywords, Next)) { 1444 return addUnwrappedLine(); 1445 } 1446 } 1447 1448 void UnwrappedLineParser::parseStructuralElement( 1449 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind, 1450 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1451 if (Style.Language == FormatStyle::LK_TableGen && 1452 FormatTok->is(tok::pp_include)) { 1453 nextToken(); 1454 if (FormatTok->is(tok::string_literal)) 1455 nextToken(); 1456 addUnwrappedLine(); 1457 return; 1458 } 1459 switch (FormatTok->Tok.getKind()) { 1460 case tok::kw_asm: 1461 nextToken(); 1462 if (FormatTok->is(tok::l_brace)) { 1463 FormatTok->setFinalizedType(TT_InlineASMBrace); 1464 nextToken(); 1465 while (FormatTok && FormatTok->isNot(tok::eof)) { 1466 if (FormatTok->is(tok::r_brace)) { 1467 FormatTok->setFinalizedType(TT_InlineASMBrace); 1468 nextToken(); 1469 addUnwrappedLine(); 1470 break; 1471 } 1472 FormatTok->Finalized = true; 1473 nextToken(); 1474 } 1475 } 1476 break; 1477 case tok::kw_namespace: 1478 parseNamespace(); 1479 return; 1480 case tok::kw_public: 1481 case tok::kw_protected: 1482 case tok::kw_private: 1483 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1484 Style.isCSharp()) { 1485 nextToken(); 1486 } else { 1487 parseAccessSpecifier(); 1488 } 1489 return; 1490 case tok::kw_if: { 1491 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1492 // field/method declaration. 1493 break; 1494 } 1495 FormatToken *Tok = parseIfThenElse(IfKind); 1496 if (IfLeftBrace) 1497 *IfLeftBrace = Tok; 1498 return; 1499 } 1500 case tok::kw_for: 1501 case tok::kw_while: 1502 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1503 // field/method declaration. 1504 break; 1505 } 1506 parseForOrWhileLoop(); 1507 return; 1508 case tok::kw_do: 1509 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1510 // field/method declaration. 1511 break; 1512 } 1513 parseDoWhile(); 1514 if (HasDoWhile) 1515 *HasDoWhile = true; 1516 return; 1517 case tok::kw_switch: 1518 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1519 // 'switch: string' field declaration. 1520 break; 1521 } 1522 parseSwitch(); 1523 return; 1524 case tok::kw_default: 1525 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1526 // 'default: string' field declaration. 1527 break; 1528 } 1529 nextToken(); 1530 if (FormatTok->is(tok::colon)) { 1531 parseLabel(); 1532 return; 1533 } 1534 // e.g. "default void f() {}" in a Java interface. 1535 break; 1536 case tok::kw_case: 1537 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1538 // 'case: string' field declaration. 1539 nextToken(); 1540 break; 1541 } 1542 parseCaseLabel(); 1543 return; 1544 case tok::kw_try: 1545 case tok::kw___try: 1546 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1547 // field/method declaration. 1548 break; 1549 } 1550 parseTryCatch(); 1551 return; 1552 case tok::kw_extern: 1553 nextToken(); 1554 if (FormatTok->is(tok::string_literal)) { 1555 nextToken(); 1556 if (FormatTok->is(tok::l_brace)) { 1557 if (Style.BraceWrapping.AfterExternBlock) 1558 addUnwrappedLine(); 1559 // Either we indent or for backwards compatibility we follow the 1560 // AfterExternBlock style. 1561 unsigned AddLevels = 1562 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1563 (Style.BraceWrapping.AfterExternBlock && 1564 Style.IndentExternBlock == 1565 FormatStyle::IEBS_AfterExternBlock) 1566 ? 1u 1567 : 0u; 1568 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1569 addUnwrappedLine(); 1570 return; 1571 } 1572 } 1573 break; 1574 case tok::kw_export: 1575 if (Style.isJavaScript()) { 1576 parseJavaScriptEs6ImportExport(); 1577 return; 1578 } 1579 if (!Style.isCpp()) 1580 break; 1581 // Handle C++ "(inline|export) namespace". 1582 LLVM_FALLTHROUGH; 1583 case tok::kw_inline: 1584 nextToken(); 1585 if (FormatTok->is(tok::kw_namespace)) { 1586 parseNamespace(); 1587 return; 1588 } 1589 break; 1590 case tok::identifier: 1591 if (FormatTok->is(TT_ForEachMacro)) { 1592 parseForOrWhileLoop(); 1593 return; 1594 } 1595 if (FormatTok->is(TT_MacroBlockBegin)) { 1596 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1597 /*MunchSemi=*/false); 1598 return; 1599 } 1600 if (FormatTok->is(Keywords.kw_import)) { 1601 if (Style.isJavaScript()) { 1602 parseJavaScriptEs6ImportExport(); 1603 return; 1604 } 1605 if (Style.Language == FormatStyle::LK_Proto) { 1606 nextToken(); 1607 if (FormatTok->is(tok::kw_public)) 1608 nextToken(); 1609 if (!FormatTok->is(tok::string_literal)) 1610 return; 1611 nextToken(); 1612 if (FormatTok->is(tok::semi)) 1613 nextToken(); 1614 addUnwrappedLine(); 1615 return; 1616 } 1617 if (Style.isCpp()) { 1618 parseModuleImport(); 1619 return; 1620 } 1621 } 1622 if (Style.isCpp() && 1623 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1624 Keywords.kw_slots, Keywords.kw_qslots)) { 1625 nextToken(); 1626 if (FormatTok->is(tok::colon)) { 1627 nextToken(); 1628 addUnwrappedLine(); 1629 return; 1630 } 1631 } 1632 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1633 parseStatementMacro(); 1634 return; 1635 } 1636 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1637 parseNamespace(); 1638 return; 1639 } 1640 // In all other cases, parse the declaration. 1641 break; 1642 default: 1643 break; 1644 } 1645 do { 1646 const FormatToken *Previous = FormatTok->Previous; 1647 switch (FormatTok->Tok.getKind()) { 1648 case tok::at: 1649 nextToken(); 1650 if (FormatTok->is(tok::l_brace)) { 1651 nextToken(); 1652 parseBracedList(); 1653 break; 1654 } else if (Style.Language == FormatStyle::LK_Java && 1655 FormatTok->is(Keywords.kw_interface)) { 1656 nextToken(); 1657 break; 1658 } 1659 switch (FormatTok->Tok.getObjCKeywordID()) { 1660 case tok::objc_public: 1661 case tok::objc_protected: 1662 case tok::objc_package: 1663 case tok::objc_private: 1664 return parseAccessSpecifier(); 1665 case tok::objc_interface: 1666 case tok::objc_implementation: 1667 return parseObjCInterfaceOrImplementation(); 1668 case tok::objc_protocol: 1669 if (parseObjCProtocol()) 1670 return; 1671 break; 1672 case tok::objc_end: 1673 return; // Handled by the caller. 1674 case tok::objc_optional: 1675 case tok::objc_required: 1676 nextToken(); 1677 addUnwrappedLine(); 1678 return; 1679 case tok::objc_autoreleasepool: 1680 nextToken(); 1681 if (FormatTok->is(tok::l_brace)) { 1682 if (Style.BraceWrapping.AfterControlStatement == 1683 FormatStyle::BWACS_Always) { 1684 addUnwrappedLine(); 1685 } 1686 parseBlock(); 1687 } 1688 addUnwrappedLine(); 1689 return; 1690 case tok::objc_synchronized: 1691 nextToken(); 1692 if (FormatTok->is(tok::l_paren)) { 1693 // Skip synchronization object 1694 parseParens(); 1695 } 1696 if (FormatTok->is(tok::l_brace)) { 1697 if (Style.BraceWrapping.AfterControlStatement == 1698 FormatStyle::BWACS_Always) { 1699 addUnwrappedLine(); 1700 } 1701 parseBlock(); 1702 } 1703 addUnwrappedLine(); 1704 return; 1705 case tok::objc_try: 1706 // This branch isn't strictly necessary (the kw_try case below would 1707 // do this too after the tok::at is parsed above). But be explicit. 1708 parseTryCatch(); 1709 return; 1710 default: 1711 break; 1712 } 1713 break; 1714 case tok::kw_concept: 1715 parseConcept(); 1716 return; 1717 case tok::kw_requires: { 1718 if (Style.isCpp()) { 1719 bool ParsedClause = parseRequires(); 1720 if (ParsedClause) 1721 return; 1722 } else { 1723 nextToken(); 1724 } 1725 break; 1726 } 1727 case tok::kw_enum: 1728 // Ignore if this is part of "template <enum ...". 1729 if (Previous && Previous->is(tok::less)) { 1730 nextToken(); 1731 break; 1732 } 1733 1734 // parseEnum falls through and does not yet add an unwrapped line as an 1735 // enum definition can start a structural element. 1736 if (!parseEnum()) 1737 break; 1738 // This only applies for C++. 1739 if (!Style.isCpp()) { 1740 addUnwrappedLine(); 1741 return; 1742 } 1743 break; 1744 case tok::kw_typedef: 1745 nextToken(); 1746 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1747 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1748 Keywords.kw_CF_CLOSED_ENUM, 1749 Keywords.kw_NS_CLOSED_ENUM)) { 1750 parseEnum(); 1751 } 1752 break; 1753 case tok::kw_struct: 1754 case tok::kw_union: 1755 case tok::kw_class: 1756 if (parseStructLike()) 1757 return; 1758 break; 1759 case tok::period: 1760 nextToken(); 1761 // In Java, classes have an implicit static member "class". 1762 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1763 FormatTok->is(tok::kw_class)) { 1764 nextToken(); 1765 } 1766 if (Style.isJavaScript() && FormatTok && 1767 FormatTok->Tok.getIdentifierInfo()) { 1768 // JavaScript only has pseudo keywords, all keywords are allowed to 1769 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1770 nextToken(); 1771 } 1772 break; 1773 case tok::semi: 1774 nextToken(); 1775 addUnwrappedLine(); 1776 return; 1777 case tok::r_brace: 1778 addUnwrappedLine(); 1779 return; 1780 case tok::l_paren: { 1781 parseParens(); 1782 // Break the unwrapped line if a K&R C function definition has a parameter 1783 // declaration. 1784 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1785 break; 1786 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1787 addUnwrappedLine(); 1788 return; 1789 } 1790 break; 1791 } 1792 case tok::kw_operator: 1793 nextToken(); 1794 if (FormatTok->isBinaryOperator()) 1795 nextToken(); 1796 break; 1797 case tok::caret: 1798 nextToken(); 1799 if (FormatTok->Tok.isAnyIdentifier() || 1800 FormatTok->isSimpleTypeSpecifier()) { 1801 nextToken(); 1802 } 1803 if (FormatTok->is(tok::l_paren)) 1804 parseParens(); 1805 if (FormatTok->is(tok::l_brace)) 1806 parseChildBlock(); 1807 break; 1808 case tok::l_brace: 1809 if (NextLBracesType != TT_Unknown) 1810 FormatTok->setFinalizedType(NextLBracesType); 1811 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1812 // A block outside of parentheses must be the last part of a 1813 // structural element. 1814 // FIXME: Figure out cases where this is not true, and add projections 1815 // for them (the one we know is missing are lambdas). 1816 if (Style.Language == FormatStyle::LK_Java && 1817 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1818 // If necessary, we could set the type to something different than 1819 // TT_FunctionLBrace. 1820 if (Style.BraceWrapping.AfterControlStatement == 1821 FormatStyle::BWACS_Always) { 1822 addUnwrappedLine(); 1823 } 1824 } else if (Style.BraceWrapping.AfterFunction) { 1825 addUnwrappedLine(); 1826 } 1827 if (!Line->InPPDirective) 1828 FormatTok->setFinalizedType(TT_FunctionLBrace); 1829 parseBlock(); 1830 addUnwrappedLine(); 1831 return; 1832 } 1833 // Otherwise this was a braced init list, and the structural 1834 // element continues. 1835 break; 1836 case tok::kw_try: 1837 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1838 // field/method declaration. 1839 nextToken(); 1840 break; 1841 } 1842 // We arrive here when parsing function-try blocks. 1843 if (Style.BraceWrapping.AfterFunction) 1844 addUnwrappedLine(); 1845 parseTryCatch(); 1846 return; 1847 case tok::identifier: { 1848 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1849 Line->MustBeDeclaration) { 1850 addUnwrappedLine(); 1851 parseCSharpGenericTypeConstraint(); 1852 break; 1853 } 1854 if (FormatTok->is(TT_MacroBlockEnd)) { 1855 addUnwrappedLine(); 1856 return; 1857 } 1858 1859 // Function declarations (as opposed to function expressions) are parsed 1860 // on their own unwrapped line by continuing this loop. Function 1861 // expressions (functions that are not on their own line) must not create 1862 // a new unwrapped line, so they are special cased below. 1863 size_t TokenCount = Line->Tokens.size(); 1864 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1865 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1866 Keywords.kw_async)))) { 1867 tryToParseJSFunction(); 1868 break; 1869 } 1870 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1871 FormatTok->is(Keywords.kw_interface)) { 1872 if (Style.isJavaScript()) { 1873 // In JavaScript/TypeScript, "interface" can be used as a standalone 1874 // identifier, e.g. in `var interface = 1;`. If "interface" is 1875 // followed by another identifier, it is very like to be an actual 1876 // interface declaration. 1877 unsigned StoredPosition = Tokens->getPosition(); 1878 FormatToken *Next = Tokens->getNextToken(); 1879 FormatTok = Tokens->setPosition(StoredPosition); 1880 if (!mustBeJSIdent(Keywords, Next)) { 1881 nextToken(); 1882 break; 1883 } 1884 } 1885 parseRecord(); 1886 addUnwrappedLine(); 1887 return; 1888 } 1889 1890 if (FormatTok->is(Keywords.kw_interface)) { 1891 if (parseStructLike()) 1892 return; 1893 break; 1894 } 1895 1896 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1897 parseStatementMacro(); 1898 return; 1899 } 1900 1901 // See if the following token should start a new unwrapped line. 1902 StringRef Text = FormatTok->TokenText; 1903 1904 FormatToken *PreviousToken = FormatTok; 1905 nextToken(); 1906 1907 // JS doesn't have macros, and within classes colons indicate fields, not 1908 // labels. 1909 if (Style.isJavaScript()) 1910 break; 1911 1912 auto OneTokenSoFar = [&]() { 1913 const UnwrappedLineNode *Tok = &Line->Tokens.front(), 1914 *End = Tok + Line->Tokens.size(); 1915 while (Tok != End && Tok->Tok->is(tok::comment)) 1916 ++Tok; 1917 // In Verilog, macro invocations start with a backtick which the code 1918 // treats as a hash. Skip it. 1919 if (Style.isVerilog() && Tok != End && Tok->Tok->is(tok::hash)) 1920 ++Tok; 1921 return End - Tok == 1; 1922 }; 1923 if (OneTokenSoFar()) { 1924 if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) { 1925 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1926 parseLabel(!Style.IndentGotoLabels); 1927 if (HasLabel) 1928 *HasLabel = true; 1929 return; 1930 } 1931 // Recognize function-like macro usages without trailing semicolon as 1932 // well as free-standing macros like Q_OBJECT. 1933 bool FunctionLike = FormatTok->is(tok::l_paren); 1934 if (FunctionLike) 1935 parseParens(); 1936 1937 bool FollowedByNewline = 1938 CommentsBeforeNextToken.empty() 1939 ? FormatTok->NewlinesBefore > 0 1940 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1941 1942 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1943 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1944 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 1945 addUnwrappedLine(); 1946 return; 1947 } 1948 } 1949 break; 1950 } 1951 case tok::equal: 1952 if ((Style.isJavaScript() || Style.isCSharp()) && 1953 FormatTok->is(TT_FatArrow)) { 1954 tryToParseChildBlock(); 1955 break; 1956 } 1957 1958 nextToken(); 1959 if (FormatTok->is(tok::l_brace)) { 1960 // Block kind should probably be set to BK_BracedInit for any language. 1961 // C# needs this change to ensure that array initialisers and object 1962 // initialisers are indented the same way. 1963 if (Style.isCSharp()) 1964 FormatTok->setBlockKind(BK_BracedInit); 1965 nextToken(); 1966 parseBracedList(); 1967 } else if (Style.Language == FormatStyle::LK_Proto && 1968 FormatTok->is(tok::less)) { 1969 nextToken(); 1970 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1971 /*ClosingBraceKind=*/tok::greater); 1972 } 1973 break; 1974 case tok::l_square: 1975 parseSquare(); 1976 break; 1977 case tok::kw_new: 1978 parseNew(); 1979 break; 1980 case tok::kw_case: 1981 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1982 // 'case: string' field declaration. 1983 nextToken(); 1984 break; 1985 } 1986 parseCaseLabel(); 1987 break; 1988 default: 1989 nextToken(); 1990 break; 1991 } 1992 } while (!eof()); 1993 } 1994 1995 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1996 assert(FormatTok->is(tok::l_brace)); 1997 if (!Style.isCSharp()) 1998 return false; 1999 // See if it's a property accessor. 2000 if (FormatTok->Previous->isNot(tok::identifier)) 2001 return false; 2002 2003 // See if we are inside a property accessor. 2004 // 2005 // Record the current tokenPosition so that we can advance and 2006 // reset the current token. `Next` is not set yet so we need 2007 // another way to advance along the token stream. 2008 unsigned int StoredPosition = Tokens->getPosition(); 2009 FormatToken *Tok = Tokens->getNextToken(); 2010 2011 // A trivial property accessor is of the form: 2012 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2013 // Track these as they do not require line breaks to be introduced. 2014 bool HasSpecialAccessor = false; 2015 bool IsTrivialPropertyAccessor = true; 2016 while (!eof()) { 2017 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 2018 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 2019 Keywords.kw_init, Keywords.kw_set)) { 2020 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2021 HasSpecialAccessor = true; 2022 Tok = Tokens->getNextToken(); 2023 continue; 2024 } 2025 if (Tok->isNot(tok::r_brace)) 2026 IsTrivialPropertyAccessor = false; 2027 break; 2028 } 2029 2030 if (!HasSpecialAccessor) { 2031 Tokens->setPosition(StoredPosition); 2032 return false; 2033 } 2034 2035 // Try to parse the property accessor: 2036 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2037 Tokens->setPosition(StoredPosition); 2038 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2039 addUnwrappedLine(); 2040 nextToken(); 2041 do { 2042 switch (FormatTok->Tok.getKind()) { 2043 case tok::r_brace: 2044 nextToken(); 2045 if (FormatTok->is(tok::equal)) { 2046 while (!eof() && FormatTok->isNot(tok::semi)) 2047 nextToken(); 2048 nextToken(); 2049 } 2050 addUnwrappedLine(); 2051 return true; 2052 case tok::l_brace: 2053 ++Line->Level; 2054 parseBlock(/*MustBeDeclaration=*/true); 2055 addUnwrappedLine(); 2056 --Line->Level; 2057 break; 2058 case tok::equal: 2059 if (FormatTok->is(TT_FatArrow)) { 2060 ++Line->Level; 2061 do { 2062 nextToken(); 2063 } while (!eof() && FormatTok->isNot(tok::semi)); 2064 nextToken(); 2065 addUnwrappedLine(); 2066 --Line->Level; 2067 break; 2068 } 2069 nextToken(); 2070 break; 2071 default: 2072 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2073 Keywords.kw_set) && 2074 !IsTrivialPropertyAccessor) { 2075 // Non-trivial get/set needs to be on its own line. 2076 addUnwrappedLine(); 2077 } 2078 nextToken(); 2079 } 2080 } while (!eof()); 2081 2082 // Unreachable for well-formed code (paired '{' and '}'). 2083 return true; 2084 } 2085 2086 bool UnwrappedLineParser::tryToParseLambda() { 2087 assert(FormatTok->is(tok::l_square)); 2088 if (!Style.isCpp()) { 2089 nextToken(); 2090 return false; 2091 } 2092 FormatToken &LSquare = *FormatTok; 2093 if (!tryToParseLambdaIntroducer()) 2094 return false; 2095 2096 bool SeenArrow = false; 2097 bool InTemplateParameterList = false; 2098 2099 while (FormatTok->isNot(tok::l_brace)) { 2100 if (FormatTok->isSimpleTypeSpecifier()) { 2101 nextToken(); 2102 continue; 2103 } 2104 switch (FormatTok->Tok.getKind()) { 2105 case tok::l_brace: 2106 break; 2107 case tok::l_paren: 2108 parseParens(); 2109 break; 2110 case tok::l_square: 2111 parseSquare(); 2112 break; 2113 case tok::kw_class: 2114 case tok::kw_template: 2115 case tok::kw_typename: 2116 assert(FormatTok->Previous); 2117 if (FormatTok->Previous->is(tok::less)) 2118 InTemplateParameterList = true; 2119 nextToken(); 2120 break; 2121 case tok::amp: 2122 case tok::star: 2123 case tok::kw_const: 2124 case tok::comma: 2125 case tok::less: 2126 case tok::greater: 2127 case tok::identifier: 2128 case tok::numeric_constant: 2129 case tok::coloncolon: 2130 case tok::kw_mutable: 2131 case tok::kw_noexcept: 2132 nextToken(); 2133 break; 2134 // Specialization of a template with an integer parameter can contain 2135 // arithmetic, logical, comparison and ternary operators. 2136 // 2137 // FIXME: This also accepts sequences of operators that are not in the scope 2138 // of a template argument list. 2139 // 2140 // In a C++ lambda a template type can only occur after an arrow. We use 2141 // this as an heuristic to distinguish between Objective-C expressions 2142 // followed by an `a->b` expression, such as: 2143 // ([obj func:arg] + a->b) 2144 // Otherwise the code below would parse as a lambda. 2145 // 2146 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2147 // explicit template lists: []<bool b = true && false>(U &&u){} 2148 case tok::plus: 2149 case tok::minus: 2150 case tok::exclaim: 2151 case tok::tilde: 2152 case tok::slash: 2153 case tok::percent: 2154 case tok::lessless: 2155 case tok::pipe: 2156 case tok::pipepipe: 2157 case tok::ampamp: 2158 case tok::caret: 2159 case tok::equalequal: 2160 case tok::exclaimequal: 2161 case tok::greaterequal: 2162 case tok::lessequal: 2163 case tok::question: 2164 case tok::colon: 2165 case tok::ellipsis: 2166 case tok::kw_true: 2167 case tok::kw_false: 2168 if (SeenArrow || InTemplateParameterList) { 2169 nextToken(); 2170 break; 2171 } 2172 return true; 2173 case tok::arrow: 2174 // This might or might not actually be a lambda arrow (this could be an 2175 // ObjC method invocation followed by a dereferencing arrow). We might 2176 // reset this back to TT_Unknown in TokenAnnotator. 2177 FormatTok->setFinalizedType(TT_LambdaArrow); 2178 SeenArrow = true; 2179 nextToken(); 2180 break; 2181 default: 2182 return true; 2183 } 2184 } 2185 FormatTok->setFinalizedType(TT_LambdaLBrace); 2186 LSquare.setFinalizedType(TT_LambdaLSquare); 2187 parseChildBlock(); 2188 return true; 2189 } 2190 2191 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2192 const FormatToken *Previous = FormatTok->Previous; 2193 const FormatToken *LeftSquare = FormatTok; 2194 nextToken(); 2195 if (Previous && 2196 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 2197 tok::kw_delete, tok::l_square) || 2198 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() || 2199 Previous->isSimpleTypeSpecifier())) { 2200 return false; 2201 } 2202 if (FormatTok->is(tok::l_square)) 2203 return false; 2204 if (FormatTok->is(tok::r_square)) { 2205 const FormatToken *Next = Tokens->peekNextToken(); 2206 if (Next->is(tok::greater)) 2207 return false; 2208 } 2209 parseSquare(/*LambdaIntroducer=*/true); 2210 return true; 2211 } 2212 2213 void UnwrappedLineParser::tryToParseJSFunction() { 2214 assert(FormatTok->is(Keywords.kw_function) || 2215 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2216 if (FormatTok->is(Keywords.kw_async)) 2217 nextToken(); 2218 // Consume "function". 2219 nextToken(); 2220 2221 // Consume * (generator function). Treat it like C++'s overloaded operators. 2222 if (FormatTok->is(tok::star)) { 2223 FormatTok->setFinalizedType(TT_OverloadedOperator); 2224 nextToken(); 2225 } 2226 2227 // Consume function name. 2228 if (FormatTok->is(tok::identifier)) 2229 nextToken(); 2230 2231 if (FormatTok->isNot(tok::l_paren)) 2232 return; 2233 2234 // Parse formal parameter list. 2235 parseParens(); 2236 2237 if (FormatTok->is(tok::colon)) { 2238 // Parse a type definition. 2239 nextToken(); 2240 2241 // Eat the type declaration. For braced inline object types, balance braces, 2242 // otherwise just parse until finding an l_brace for the function body. 2243 if (FormatTok->is(tok::l_brace)) 2244 tryToParseBracedList(); 2245 else 2246 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2247 nextToken(); 2248 } 2249 2250 if (FormatTok->is(tok::semi)) 2251 return; 2252 2253 parseChildBlock(); 2254 } 2255 2256 bool UnwrappedLineParser::tryToParseBracedList() { 2257 if (FormatTok->is(BK_Unknown)) 2258 calculateBraceTypes(); 2259 assert(FormatTok->isNot(BK_Unknown)); 2260 if (FormatTok->is(BK_Block)) 2261 return false; 2262 nextToken(); 2263 parseBracedList(); 2264 return true; 2265 } 2266 2267 bool UnwrappedLineParser::tryToParseChildBlock() { 2268 assert(Style.isJavaScript() || Style.isCSharp()); 2269 assert(FormatTok->is(TT_FatArrow)); 2270 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2271 // They always start an expression or a child block if followed by a curly 2272 // brace. 2273 nextToken(); 2274 if (FormatTok->isNot(tok::l_brace)) 2275 return false; 2276 parseChildBlock(); 2277 return true; 2278 } 2279 2280 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2281 bool IsEnum, 2282 tok::TokenKind ClosingBraceKind) { 2283 bool HasError = false; 2284 2285 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2286 // replace this by using parseAssignmentExpression() inside. 2287 do { 2288 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2289 tryToParseChildBlock()) { 2290 continue; 2291 } 2292 if (Style.isJavaScript()) { 2293 if (FormatTok->is(Keywords.kw_function) || 2294 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2295 tryToParseJSFunction(); 2296 continue; 2297 } 2298 if (FormatTok->is(tok::l_brace)) { 2299 // Could be a method inside of a braced list `{a() { return 1; }}`. 2300 if (tryToParseBracedList()) 2301 continue; 2302 parseChildBlock(); 2303 } 2304 } 2305 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2306 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2307 addUnwrappedLine(); 2308 nextToken(); 2309 return !HasError; 2310 } 2311 switch (FormatTok->Tok.getKind()) { 2312 case tok::l_square: 2313 if (Style.isCSharp()) 2314 parseSquare(); 2315 else 2316 tryToParseLambda(); 2317 break; 2318 case tok::l_paren: 2319 parseParens(); 2320 // JavaScript can just have free standing methods and getters/setters in 2321 // object literals. Detect them by a "{" following ")". 2322 if (Style.isJavaScript()) { 2323 if (FormatTok->is(tok::l_brace)) 2324 parseChildBlock(); 2325 break; 2326 } 2327 break; 2328 case tok::l_brace: 2329 // Assume there are no blocks inside a braced init list apart 2330 // from the ones we explicitly parse out (like lambdas). 2331 FormatTok->setBlockKind(BK_BracedInit); 2332 nextToken(); 2333 parseBracedList(); 2334 break; 2335 case tok::less: 2336 if (Style.Language == FormatStyle::LK_Proto || 2337 ClosingBraceKind == tok::greater) { 2338 nextToken(); 2339 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2340 /*ClosingBraceKind=*/tok::greater); 2341 } else { 2342 nextToken(); 2343 } 2344 break; 2345 case tok::semi: 2346 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2347 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2348 // used for error recovery if we have otherwise determined that this is 2349 // a braced list. 2350 if (Style.isJavaScript()) { 2351 nextToken(); 2352 break; 2353 } 2354 HasError = true; 2355 if (!ContinueOnSemicolons) 2356 return !HasError; 2357 nextToken(); 2358 break; 2359 case tok::comma: 2360 nextToken(); 2361 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2362 addUnwrappedLine(); 2363 break; 2364 default: 2365 nextToken(); 2366 break; 2367 } 2368 } while (!eof()); 2369 return false; 2370 } 2371 2372 /// \brief Parses a pair of parentheses (and everything between them). 2373 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2374 /// double ampersands. This only counts for the current parens scope. 2375 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2376 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2377 nextToken(); 2378 do { 2379 switch (FormatTok->Tok.getKind()) { 2380 case tok::l_paren: 2381 parseParens(); 2382 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2383 parseChildBlock(); 2384 break; 2385 case tok::r_paren: 2386 nextToken(); 2387 return; 2388 case tok::r_brace: 2389 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2390 return; 2391 case tok::l_square: 2392 tryToParseLambda(); 2393 break; 2394 case tok::l_brace: 2395 if (!tryToParseBracedList()) 2396 parseChildBlock(); 2397 break; 2398 case tok::at: 2399 nextToken(); 2400 if (FormatTok->is(tok::l_brace)) { 2401 nextToken(); 2402 parseBracedList(); 2403 } 2404 break; 2405 case tok::equal: 2406 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2407 tryToParseChildBlock(); 2408 else 2409 nextToken(); 2410 break; 2411 case tok::kw_class: 2412 if (Style.isJavaScript()) 2413 parseRecord(/*ParseAsExpr=*/true); 2414 else 2415 nextToken(); 2416 break; 2417 case tok::identifier: 2418 if (Style.isJavaScript() && 2419 (FormatTok->is(Keywords.kw_function) || 2420 FormatTok->startsSequence(Keywords.kw_async, 2421 Keywords.kw_function))) { 2422 tryToParseJSFunction(); 2423 } else { 2424 nextToken(); 2425 } 2426 break; 2427 case tok::kw_requires: { 2428 auto RequiresToken = FormatTok; 2429 nextToken(); 2430 parseRequiresExpression(RequiresToken); 2431 break; 2432 } 2433 case tok::ampamp: 2434 if (AmpAmpTokenType != TT_Unknown) 2435 FormatTok->setFinalizedType(AmpAmpTokenType); 2436 LLVM_FALLTHROUGH; 2437 default: 2438 nextToken(); 2439 break; 2440 } 2441 } while (!eof()); 2442 } 2443 2444 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2445 if (!LambdaIntroducer) { 2446 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2447 if (tryToParseLambda()) 2448 return; 2449 } 2450 do { 2451 switch (FormatTok->Tok.getKind()) { 2452 case tok::l_paren: 2453 parseParens(); 2454 break; 2455 case tok::r_square: 2456 nextToken(); 2457 return; 2458 case tok::r_brace: 2459 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2460 return; 2461 case tok::l_square: 2462 parseSquare(); 2463 break; 2464 case tok::l_brace: { 2465 if (!tryToParseBracedList()) 2466 parseChildBlock(); 2467 break; 2468 } 2469 case tok::at: 2470 nextToken(); 2471 if (FormatTok->is(tok::l_brace)) { 2472 nextToken(); 2473 parseBracedList(); 2474 } 2475 break; 2476 default: 2477 nextToken(); 2478 break; 2479 } 2480 } while (!eof()); 2481 } 2482 2483 void UnwrappedLineParser::keepAncestorBraces() { 2484 if (!Style.RemoveBracesLLVM) 2485 return; 2486 2487 const int MaxNestingLevels = 2; 2488 const int Size = NestedTooDeep.size(); 2489 if (Size >= MaxNestingLevels) 2490 NestedTooDeep[Size - MaxNestingLevels] = true; 2491 NestedTooDeep.push_back(false); 2492 } 2493 2494 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2495 for (const auto &Token : llvm::reverse(Line.Tokens)) 2496 if (Token.Tok->isNot(tok::comment)) 2497 return Token.Tok; 2498 2499 return nullptr; 2500 } 2501 2502 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2503 FormatToken *Tok = nullptr; 2504 2505 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2506 PreprocessorDirectives.empty()) { 2507 Tok = getLastNonComment(*Line); 2508 assert(Tok); 2509 if (Tok->BraceCount < 0) { 2510 assert(Tok->BraceCount == -1); 2511 Tok = nullptr; 2512 } else { 2513 Tok->BraceCount = -1; 2514 } 2515 } 2516 2517 addUnwrappedLine(); 2518 ++Line->Level; 2519 parseStructuralElement(); 2520 2521 if (Tok) { 2522 assert(!Line->InPPDirective); 2523 Tok = nullptr; 2524 for (const auto &L : llvm::reverse(*CurrentLines)) { 2525 if (!L.InPPDirective && getLastNonComment(L)) { 2526 Tok = L.Tokens.back().Tok; 2527 break; 2528 } 2529 } 2530 assert(Tok); 2531 ++Tok->BraceCount; 2532 } 2533 2534 if (CheckEOF && FormatTok->is(tok::eof)) 2535 addUnwrappedLine(); 2536 2537 --Line->Level; 2538 } 2539 2540 static void markOptionalBraces(FormatToken *LeftBrace) { 2541 if (!LeftBrace) 2542 return; 2543 2544 assert(LeftBrace->is(tok::l_brace)); 2545 2546 FormatToken *RightBrace = LeftBrace->MatchingParen; 2547 if (!RightBrace) { 2548 assert(!LeftBrace->Optional); 2549 return; 2550 } 2551 2552 assert(RightBrace->is(tok::r_brace)); 2553 assert(RightBrace->MatchingParen == LeftBrace); 2554 assert(LeftBrace->Optional == RightBrace->Optional); 2555 2556 LeftBrace->Optional = true; 2557 RightBrace->Optional = true; 2558 } 2559 2560 void UnwrappedLineParser::handleAttributes() { 2561 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2562 if (FormatTok->is(TT_AttributeMacro)) 2563 nextToken(); 2564 handleCppAttributes(); 2565 } 2566 2567 bool UnwrappedLineParser::handleCppAttributes() { 2568 // Handle [[likely]] / [[unlikely]] attributes. 2569 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) { 2570 parseSquare(); 2571 return true; 2572 } 2573 return false; 2574 } 2575 2576 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2577 bool KeepBraces) { 2578 assert(FormatTok->is(tok::kw_if) && "'if' expected"); 2579 nextToken(); 2580 if (FormatTok->is(tok::exclaim)) 2581 nextToken(); 2582 2583 bool KeepIfBraces = true; 2584 if (FormatTok->is(tok::kw_consteval)) { 2585 nextToken(); 2586 } else { 2587 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2588 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2589 nextToken(); 2590 if (FormatTok->is(tok::l_paren)) 2591 parseParens(); 2592 } 2593 handleAttributes(); 2594 2595 bool NeedsUnwrappedLine = false; 2596 keepAncestorBraces(); 2597 2598 FormatToken *IfLeftBrace = nullptr; 2599 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2600 2601 if (Keywords.isBlockBegin(*FormatTok, Style)) { 2602 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2603 IfLeftBrace = FormatTok; 2604 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2605 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2606 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2607 if (Style.BraceWrapping.BeforeElse) 2608 addUnwrappedLine(); 2609 else 2610 NeedsUnwrappedLine = true; 2611 } else { 2612 parseUnbracedBody(); 2613 } 2614 2615 if (Style.RemoveBracesLLVM) { 2616 assert(!NestedTooDeep.empty()); 2617 KeepIfBraces = KeepIfBraces || 2618 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2619 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2620 IfBlockKind == IfStmtKind::IfElseIf; 2621 } 2622 2623 bool KeepElseBraces = KeepIfBraces; 2624 FormatToken *ElseLeftBrace = nullptr; 2625 IfStmtKind Kind = IfStmtKind::IfOnly; 2626 2627 if (FormatTok->is(tok::kw_else)) { 2628 if (Style.RemoveBracesLLVM) { 2629 NestedTooDeep.back() = false; 2630 Kind = IfStmtKind::IfElse; 2631 } 2632 nextToken(); 2633 handleAttributes(); 2634 if (Keywords.isBlockBegin(*FormatTok, Style)) { 2635 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2636 FormatTok->setFinalizedType(TT_ElseLBrace); 2637 ElseLeftBrace = FormatTok; 2638 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2639 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2640 FormatToken *IfLBrace = 2641 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2642 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2643 if (FormatTok->is(tok::kw_else)) { 2644 KeepElseBraces = KeepElseBraces || 2645 ElseBlockKind == IfStmtKind::IfOnly || 2646 ElseBlockKind == IfStmtKind::IfElseIf; 2647 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2648 KeepElseBraces = true; 2649 assert(ElseLeftBrace->MatchingParen); 2650 markOptionalBraces(ElseLeftBrace); 2651 } 2652 addUnwrappedLine(); 2653 } else if (FormatTok->is(tok::kw_if)) { 2654 const FormatToken *Previous = Tokens->getPreviousToken(); 2655 assert(Previous); 2656 const bool IsPrecededByComment = Previous->is(tok::comment); 2657 if (IsPrecededByComment) { 2658 addUnwrappedLine(); 2659 ++Line->Level; 2660 } 2661 bool TooDeep = true; 2662 if (Style.RemoveBracesLLVM) { 2663 Kind = IfStmtKind::IfElseIf; 2664 TooDeep = NestedTooDeep.pop_back_val(); 2665 } 2666 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2667 if (Style.RemoveBracesLLVM) 2668 NestedTooDeep.push_back(TooDeep); 2669 if (IsPrecededByComment) 2670 --Line->Level; 2671 } else { 2672 parseUnbracedBody(/*CheckEOF=*/true); 2673 } 2674 } else { 2675 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2676 if (NeedsUnwrappedLine) 2677 addUnwrappedLine(); 2678 } 2679 2680 if (!Style.RemoveBracesLLVM) 2681 return nullptr; 2682 2683 assert(!NestedTooDeep.empty()); 2684 KeepElseBraces = KeepElseBraces || 2685 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2686 NestedTooDeep.back(); 2687 2688 NestedTooDeep.pop_back(); 2689 2690 if (!KeepIfBraces && !KeepElseBraces) { 2691 markOptionalBraces(IfLeftBrace); 2692 markOptionalBraces(ElseLeftBrace); 2693 } else if (IfLeftBrace) { 2694 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2695 if (IfRightBrace) { 2696 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2697 assert(!IfLeftBrace->Optional); 2698 assert(!IfRightBrace->Optional); 2699 IfLeftBrace->MatchingParen = nullptr; 2700 IfRightBrace->MatchingParen = nullptr; 2701 } 2702 } 2703 2704 if (IfKind) 2705 *IfKind = Kind; 2706 2707 return IfLeftBrace; 2708 } 2709 2710 void UnwrappedLineParser::parseTryCatch() { 2711 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2712 nextToken(); 2713 bool NeedsUnwrappedLine = false; 2714 if (FormatTok->is(tok::colon)) { 2715 // We are in a function try block, what comes is an initializer list. 2716 nextToken(); 2717 2718 // In case identifiers were removed by clang-tidy, what might follow is 2719 // multiple commas in sequence - before the first identifier. 2720 while (FormatTok->is(tok::comma)) 2721 nextToken(); 2722 2723 while (FormatTok->is(tok::identifier)) { 2724 nextToken(); 2725 if (FormatTok->is(tok::l_paren)) 2726 parseParens(); 2727 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2728 FormatTok->is(tok::l_brace)) { 2729 do { 2730 nextToken(); 2731 } while (!FormatTok->is(tok::r_brace)); 2732 nextToken(); 2733 } 2734 2735 // In case identifiers were removed by clang-tidy, what might follow is 2736 // multiple commas in sequence - after the first identifier. 2737 while (FormatTok->is(tok::comma)) 2738 nextToken(); 2739 } 2740 } 2741 // Parse try with resource. 2742 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2743 parseParens(); 2744 2745 keepAncestorBraces(); 2746 2747 if (FormatTok->is(tok::l_brace)) { 2748 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2749 parseBlock(); 2750 if (Style.BraceWrapping.BeforeCatch) 2751 addUnwrappedLine(); 2752 else 2753 NeedsUnwrappedLine = true; 2754 } else if (!FormatTok->is(tok::kw_catch)) { 2755 // The C++ standard requires a compound-statement after a try. 2756 // If there's none, we try to assume there's a structuralElement 2757 // and try to continue. 2758 addUnwrappedLine(); 2759 ++Line->Level; 2760 parseStructuralElement(); 2761 --Line->Level; 2762 } 2763 while (true) { 2764 if (FormatTok->is(tok::at)) 2765 nextToken(); 2766 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2767 tok::kw___finally) || 2768 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2769 FormatTok->is(Keywords.kw_finally)) || 2770 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2771 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 2772 break; 2773 } 2774 nextToken(); 2775 while (FormatTok->isNot(tok::l_brace)) { 2776 if (FormatTok->is(tok::l_paren)) { 2777 parseParens(); 2778 continue; 2779 } 2780 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2781 if (Style.RemoveBracesLLVM) 2782 NestedTooDeep.pop_back(); 2783 return; 2784 } 2785 nextToken(); 2786 } 2787 NeedsUnwrappedLine = false; 2788 Line->MustBeDeclaration = false; 2789 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2790 parseBlock(); 2791 if (Style.BraceWrapping.BeforeCatch) 2792 addUnwrappedLine(); 2793 else 2794 NeedsUnwrappedLine = true; 2795 } 2796 2797 if (Style.RemoveBracesLLVM) 2798 NestedTooDeep.pop_back(); 2799 2800 if (NeedsUnwrappedLine) 2801 addUnwrappedLine(); 2802 } 2803 2804 void UnwrappedLineParser::parseNamespace() { 2805 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2806 "'namespace' expected"); 2807 2808 const FormatToken &InitialToken = *FormatTok; 2809 nextToken(); 2810 if (InitialToken.is(TT_NamespaceMacro)) { 2811 parseParens(); 2812 } else { 2813 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2814 tok::l_square, tok::period, tok::l_paren) || 2815 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 2816 if (FormatTok->is(tok::l_square)) 2817 parseSquare(); 2818 else if (FormatTok->is(tok::l_paren)) 2819 parseParens(); 2820 else 2821 nextToken(); 2822 } 2823 } 2824 if (FormatTok->is(tok::l_brace)) { 2825 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2826 addUnwrappedLine(); 2827 2828 unsigned AddLevels = 2829 Style.NamespaceIndentation == FormatStyle::NI_All || 2830 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2831 DeclarationScopeStack.size() > 1) 2832 ? 1u 2833 : 0u; 2834 bool ManageWhitesmithsBraces = 2835 AddLevels == 0u && 2836 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2837 2838 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2839 // the whole block. 2840 if (ManageWhitesmithsBraces) 2841 ++Line->Level; 2842 2843 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 2844 /*KeepBraces=*/true, /*IfKind=*/nullptr, 2845 ManageWhitesmithsBraces); 2846 2847 // Munch the semicolon after a namespace. This is more common than one would 2848 // think. Putting the semicolon into its own line is very ugly. 2849 if (FormatTok->is(tok::semi)) 2850 nextToken(); 2851 2852 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2853 2854 if (ManageWhitesmithsBraces) 2855 --Line->Level; 2856 } 2857 // FIXME: Add error handling. 2858 } 2859 2860 void UnwrappedLineParser::parseNew() { 2861 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2862 nextToken(); 2863 2864 if (Style.isCSharp()) { 2865 do { 2866 if (FormatTok->is(tok::l_brace)) 2867 parseBracedList(); 2868 2869 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2870 return; 2871 2872 nextToken(); 2873 } while (!eof()); 2874 } 2875 2876 if (Style.Language != FormatStyle::LK_Java) 2877 return; 2878 2879 // In Java, we can parse everything up to the parens, which aren't optional. 2880 do { 2881 // There should not be a ;, { or } before the new's open paren. 2882 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2883 return; 2884 2885 // Consume the parens. 2886 if (FormatTok->is(tok::l_paren)) { 2887 parseParens(); 2888 2889 // If there is a class body of an anonymous class, consume that as child. 2890 if (FormatTok->is(tok::l_brace)) 2891 parseChildBlock(); 2892 return; 2893 } 2894 nextToken(); 2895 } while (!eof()); 2896 } 2897 2898 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 2899 keepAncestorBraces(); 2900 2901 if (Keywords.isBlockBegin(*FormatTok, Style)) { 2902 if (!KeepBraces) 2903 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2904 FormatToken *LeftBrace = FormatTok; 2905 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2906 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2907 /*MunchSemi=*/true, KeepBraces); 2908 if (!KeepBraces) { 2909 assert(!NestedTooDeep.empty()); 2910 if (!NestedTooDeep.back()) 2911 markOptionalBraces(LeftBrace); 2912 } 2913 if (WrapRightBrace) 2914 addUnwrappedLine(); 2915 } else { 2916 parseUnbracedBody(); 2917 } 2918 2919 if (!KeepBraces) 2920 NestedTooDeep.pop_back(); 2921 } 2922 2923 void UnwrappedLineParser::parseForOrWhileLoop() { 2924 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2925 "'for', 'while' or foreach macro expected"); 2926 const bool KeepBraces = !Style.RemoveBracesLLVM || 2927 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 2928 2929 nextToken(); 2930 // JS' for await ( ... 2931 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2932 nextToken(); 2933 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2934 nextToken(); 2935 if (FormatTok->is(tok::l_paren)) 2936 parseParens(); 2937 2938 handleAttributes(); 2939 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 2940 } 2941 2942 void UnwrappedLineParser::parseDoWhile() { 2943 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 2944 nextToken(); 2945 2946 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 2947 2948 // FIXME: Add error handling. 2949 if (!FormatTok->is(tok::kw_while)) { 2950 addUnwrappedLine(); 2951 return; 2952 } 2953 2954 // If in Whitesmiths mode, the line with the while() needs to be indented 2955 // to the same level as the block. 2956 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2957 ++Line->Level; 2958 2959 nextToken(); 2960 parseStructuralElement(); 2961 } 2962 2963 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2964 nextToken(); 2965 unsigned OldLineLevel = Line->Level; 2966 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2967 --Line->Level; 2968 if (LeftAlignLabel) 2969 Line->Level = 0; 2970 2971 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2972 FormatTok->is(tok::l_brace)) { 2973 2974 CompoundStatementIndenter Indenter(this, Line->Level, 2975 Style.BraceWrapping.AfterCaseLabel, 2976 Style.BraceWrapping.IndentBraces); 2977 parseBlock(); 2978 if (FormatTok->is(tok::kw_break)) { 2979 if (Style.BraceWrapping.AfterControlStatement == 2980 FormatStyle::BWACS_Always) { 2981 addUnwrappedLine(); 2982 if (!Style.IndentCaseBlocks && 2983 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2984 ++Line->Level; 2985 } 2986 } 2987 parseStructuralElement(); 2988 } 2989 addUnwrappedLine(); 2990 } else { 2991 if (FormatTok->is(tok::semi)) 2992 nextToken(); 2993 addUnwrappedLine(); 2994 } 2995 Line->Level = OldLineLevel; 2996 if (FormatTok->isNot(tok::l_brace)) { 2997 parseStructuralElement(); 2998 addUnwrappedLine(); 2999 } 3000 } 3001 3002 void UnwrappedLineParser::parseCaseLabel() { 3003 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3004 3005 // FIXME: fix handling of complex expressions here. 3006 do { 3007 nextToken(); 3008 } while (!eof() && !FormatTok->is(tok::colon)); 3009 parseLabel(); 3010 } 3011 3012 void UnwrappedLineParser::parseSwitch() { 3013 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3014 nextToken(); 3015 if (FormatTok->is(tok::l_paren)) 3016 parseParens(); 3017 3018 keepAncestorBraces(); 3019 3020 if (FormatTok->is(tok::l_brace)) { 3021 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3022 parseBlock(); 3023 addUnwrappedLine(); 3024 } else { 3025 addUnwrappedLine(); 3026 ++Line->Level; 3027 parseStructuralElement(); 3028 --Line->Level; 3029 } 3030 3031 if (Style.RemoveBracesLLVM) 3032 NestedTooDeep.pop_back(); 3033 } 3034 3035 // Operators that can follow a C variable. 3036 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 3037 switch (kind) { 3038 case tok::ampamp: 3039 case tok::ampequal: 3040 case tok::arrow: 3041 case tok::caret: 3042 case tok::caretequal: 3043 case tok::comma: 3044 case tok::ellipsis: 3045 case tok::equal: 3046 case tok::equalequal: 3047 case tok::exclaim: 3048 case tok::exclaimequal: 3049 case tok::greater: 3050 case tok::greaterequal: 3051 case tok::greatergreater: 3052 case tok::greatergreaterequal: 3053 case tok::l_paren: 3054 case tok::l_square: 3055 case tok::less: 3056 case tok::lessequal: 3057 case tok::lessless: 3058 case tok::lesslessequal: 3059 case tok::minus: 3060 case tok::minusequal: 3061 case tok::minusminus: 3062 case tok::percent: 3063 case tok::percentequal: 3064 case tok::period: 3065 case tok::pipe: 3066 case tok::pipeequal: 3067 case tok::pipepipe: 3068 case tok::plus: 3069 case tok::plusequal: 3070 case tok::plusplus: 3071 case tok::question: 3072 case tok::r_brace: 3073 case tok::r_paren: 3074 case tok::r_square: 3075 case tok::semi: 3076 case tok::slash: 3077 case tok::slashequal: 3078 case tok::star: 3079 case tok::starequal: 3080 return true; 3081 default: 3082 return false; 3083 } 3084 } 3085 3086 void UnwrappedLineParser::parseAccessSpecifier() { 3087 FormatToken *AccessSpecifierCandidate = FormatTok; 3088 nextToken(); 3089 // Understand Qt's slots. 3090 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3091 nextToken(); 3092 // Otherwise, we don't know what it is, and we'd better keep the next token. 3093 if (FormatTok->is(tok::colon)) { 3094 nextToken(); 3095 addUnwrappedLine(); 3096 } else if (!FormatTok->is(tok::coloncolon) && 3097 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3098 // Not a variable name nor namespace name. 3099 addUnwrappedLine(); 3100 } else if (AccessSpecifierCandidate) { 3101 // Consider the access specifier to be a C identifier. 3102 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3103 } 3104 } 3105 3106 /// \brief Parses a concept definition. 3107 /// \pre The current token has to be the concept keyword. 3108 /// 3109 /// Returns if either the concept has been completely parsed, or if it detects 3110 /// that the concept definition is incorrect. 3111 void UnwrappedLineParser::parseConcept() { 3112 assert(FormatTok->is(tok::kw_concept) && "'concept' expected"); 3113 nextToken(); 3114 if (!FormatTok->is(tok::identifier)) 3115 return; 3116 nextToken(); 3117 if (!FormatTok->is(tok::equal)) 3118 return; 3119 nextToken(); 3120 parseConstraintExpression(); 3121 if (FormatTok->is(tok::semi)) 3122 nextToken(); 3123 addUnwrappedLine(); 3124 } 3125 3126 /// \brief Parses a requires, decides if it is a clause or an expression. 3127 /// \pre The current token has to be the requires keyword. 3128 /// \returns true if it parsed a clause. 3129 bool clang::format::UnwrappedLineParser::parseRequires() { 3130 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3131 auto RequiresToken = FormatTok; 3132 3133 // We try to guess if it is a requires clause, or a requires expression. For 3134 // that we first consume the keyword and check the next token. 3135 nextToken(); 3136 3137 switch (FormatTok->Tok.getKind()) { 3138 case tok::l_brace: 3139 // This can only be an expression, never a clause. 3140 parseRequiresExpression(RequiresToken); 3141 return false; 3142 case tok::l_paren: 3143 // Clauses and expression can start with a paren, it's unclear what we have. 3144 break; 3145 default: 3146 // All other tokens can only be a clause. 3147 parseRequiresClause(RequiresToken); 3148 return true; 3149 } 3150 3151 // Looking forward we would have to decide if there are function declaration 3152 // like arguments to the requires expression: 3153 // requires (T t) { 3154 // Or there is a constraint expression for the requires clause: 3155 // requires (C<T> && ... 3156 3157 // But first let's look behind. 3158 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3159 3160 if (!PreviousNonComment || 3161 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3162 // If there is no token, or an expression left brace, we are a requires 3163 // clause within a requires expression. 3164 parseRequiresClause(RequiresToken); 3165 return true; 3166 } 3167 3168 switch (PreviousNonComment->Tok.getKind()) { 3169 case tok::greater: 3170 case tok::r_paren: 3171 case tok::kw_noexcept: 3172 case tok::kw_const: 3173 // This is a requires clause. 3174 parseRequiresClause(RequiresToken); 3175 return true; 3176 case tok::amp: 3177 case tok::ampamp: { 3178 // This can be either: 3179 // if (... && requires (T t) ...) 3180 // Or 3181 // void member(...) && requires (C<T> ... 3182 // We check the one token before that for a const: 3183 // void member(...) const && requires (C<T> ... 3184 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3185 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3186 parseRequiresClause(RequiresToken); 3187 return true; 3188 } 3189 break; 3190 } 3191 default: 3192 if (PreviousNonComment->isTypeOrIdentifier()) { 3193 // This is a requires clause. 3194 parseRequiresClause(RequiresToken); 3195 return true; 3196 } 3197 // It's an expression. 3198 parseRequiresExpression(RequiresToken); 3199 return false; 3200 } 3201 3202 // Now we look forward and try to check if the paren content is a parameter 3203 // list. The parameters can be cv-qualified and contain references or 3204 // pointers. 3205 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3206 // of stuff: typename, const, *, &, &&, ::, identifiers. 3207 3208 int NextTokenOffset = 1; 3209 auto NextToken = Tokens->peekNextToken(NextTokenOffset); 3210 auto PeekNext = [&NextTokenOffset, &NextToken, this] { 3211 ++NextTokenOffset; 3212 NextToken = Tokens->peekNextToken(NextTokenOffset); 3213 }; 3214 3215 bool FoundType = false; 3216 bool LastWasColonColon = false; 3217 int OpenAngles = 0; 3218 3219 for (; NextTokenOffset < 50; PeekNext()) { 3220 switch (NextToken->Tok.getKind()) { 3221 case tok::kw_volatile: 3222 case tok::kw_const: 3223 case tok::comma: 3224 parseRequiresExpression(RequiresToken); 3225 return false; 3226 case tok::r_paren: 3227 case tok::pipepipe: 3228 parseRequiresClause(RequiresToken); 3229 return true; 3230 case tok::eof: 3231 // Break out of the loop. 3232 NextTokenOffset = 50; 3233 break; 3234 case tok::coloncolon: 3235 LastWasColonColon = true; 3236 break; 3237 case tok::identifier: 3238 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3239 parseRequiresExpression(RequiresToken); 3240 return false; 3241 } 3242 FoundType = true; 3243 LastWasColonColon = false; 3244 break; 3245 case tok::less: 3246 ++OpenAngles; 3247 break; 3248 case tok::greater: 3249 --OpenAngles; 3250 break; 3251 default: 3252 if (NextToken->isSimpleTypeSpecifier()) { 3253 parseRequiresExpression(RequiresToken); 3254 return false; 3255 } 3256 break; 3257 } 3258 } 3259 3260 // This seems to be a complicated expression, just assume it's a clause. 3261 parseRequiresClause(RequiresToken); 3262 return true; 3263 } 3264 3265 /// \brief Parses a requires clause. 3266 /// \param RequiresToken The requires keyword token, which starts this clause. 3267 /// \pre We need to be on the next token after the requires keyword. 3268 /// \sa parseRequiresExpression 3269 /// 3270 /// Returns if it either has finished parsing the clause, or it detects, that 3271 /// the clause is incorrect. 3272 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3273 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3274 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3275 3276 // If there is no previous token, we are within a requires expression, 3277 // otherwise we will always have the template or function declaration in front 3278 // of it. 3279 bool InRequiresExpression = 3280 !RequiresToken->Previous || 3281 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3282 3283 RequiresToken->setFinalizedType(InRequiresExpression 3284 ? TT_RequiresClauseInARequiresExpression 3285 : TT_RequiresClause); 3286 3287 parseConstraintExpression(); 3288 3289 if (!InRequiresExpression) 3290 FormatTok->Previous->ClosesRequiresClause = true; 3291 } 3292 3293 /// \brief Parses a requires expression. 3294 /// \param RequiresToken The requires keyword token, which starts this clause. 3295 /// \pre We need to be on the next token after the requires keyword. 3296 /// \sa parseRequiresClause 3297 /// 3298 /// Returns if it either has finished parsing the expression, or it detects, 3299 /// that the expression is incorrect. 3300 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3301 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3302 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3303 3304 RequiresToken->setFinalizedType(TT_RequiresExpression); 3305 3306 if (FormatTok->is(tok::l_paren)) { 3307 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3308 parseParens(); 3309 } 3310 3311 if (FormatTok->is(tok::l_brace)) { 3312 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3313 parseChildBlock(/*CanContainBracedList=*/false, 3314 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3315 } 3316 } 3317 3318 /// \brief Parses a constraint expression. 3319 /// 3320 /// This is either the definition of a concept, or the body of a requires 3321 /// clause. It returns, when the parsing is complete, or the expression is 3322 /// incorrect. 3323 void UnwrappedLineParser::parseConstraintExpression() { 3324 // The special handling for lambdas is needed since tryToParseLambda() eats a 3325 // token and if a requires expression is the last part of a requires clause 3326 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3327 // not set on the correct token. Thus we need to be aware if we even expect a 3328 // lambda to be possible. 3329 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3330 bool LambdaNextTimeAllowed = true; 3331 do { 3332 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3333 3334 switch (FormatTok->Tok.getKind()) { 3335 case tok::kw_requires: { 3336 auto RequiresToken = FormatTok; 3337 nextToken(); 3338 parseRequiresExpression(RequiresToken); 3339 break; 3340 } 3341 3342 case tok::l_paren: 3343 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3344 break; 3345 3346 case tok::l_square: 3347 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3348 return; 3349 break; 3350 3351 case tok::kw_const: 3352 case tok::semi: 3353 case tok::kw_class: 3354 case tok::kw_struct: 3355 case tok::kw_union: 3356 return; 3357 3358 case tok::l_brace: 3359 // Potential function body. 3360 return; 3361 3362 case tok::ampamp: 3363 case tok::pipepipe: 3364 FormatTok->setFinalizedType(TT_BinaryOperator); 3365 nextToken(); 3366 LambdaNextTimeAllowed = true; 3367 break; 3368 3369 case tok::comma: 3370 case tok::comment: 3371 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3372 nextToken(); 3373 break; 3374 3375 case tok::kw_sizeof: 3376 case tok::greater: 3377 case tok::greaterequal: 3378 case tok::greatergreater: 3379 case tok::less: 3380 case tok::lessequal: 3381 case tok::lessless: 3382 case tok::equalequal: 3383 case tok::exclaim: 3384 case tok::exclaimequal: 3385 case tok::plus: 3386 case tok::minus: 3387 case tok::star: 3388 case tok::slash: 3389 case tok::kw_decltype: 3390 LambdaNextTimeAllowed = true; 3391 // Just eat them. 3392 nextToken(); 3393 break; 3394 3395 case tok::numeric_constant: 3396 case tok::coloncolon: 3397 case tok::kw_true: 3398 case tok::kw_false: 3399 // Just eat them. 3400 nextToken(); 3401 break; 3402 3403 case tok::kw_static_cast: 3404 case tok::kw_const_cast: 3405 case tok::kw_reinterpret_cast: 3406 case tok::kw_dynamic_cast: 3407 nextToken(); 3408 if (!FormatTok->is(tok::less)) 3409 return; 3410 3411 nextToken(); 3412 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3413 /*ClosingBraceKind=*/tok::greater); 3414 break; 3415 3416 case tok::kw_bool: 3417 // bool is only allowed if it is directly followed by a paren for a cast: 3418 // concept C = bool(...); 3419 // and bool is the only type, all other types as cast must be inside a 3420 // cast to bool an thus are handled by the other cases. 3421 nextToken(); 3422 if (FormatTok->isNot(tok::l_paren)) 3423 return; 3424 parseParens(); 3425 break; 3426 3427 default: 3428 if (!FormatTok->Tok.getIdentifierInfo()) { 3429 // Identifiers are part of the default case, we check for more then 3430 // tok::identifier to handle builtin type traits. 3431 return; 3432 } 3433 3434 // We need to differentiate identifiers for a template deduction guide, 3435 // variables, or function return types (the constraint expression has 3436 // ended before that), and basically all other cases. But it's easier to 3437 // check the other way around. 3438 assert(FormatTok->Previous); 3439 switch (FormatTok->Previous->Tok.getKind()) { 3440 case tok::coloncolon: // Nested identifier. 3441 case tok::ampamp: // Start of a function or variable for the 3442 case tok::pipepipe: // constraint expression. 3443 case tok::kw_requires: // Initial identifier of a requires clause. 3444 case tok::equal: // Initial identifier of a concept declaration. 3445 break; 3446 default: 3447 return; 3448 } 3449 3450 // Read identifier with optional template declaration. 3451 nextToken(); 3452 if (FormatTok->is(tok::less)) { 3453 nextToken(); 3454 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3455 /*ClosingBraceKind=*/tok::greater); 3456 } 3457 break; 3458 } 3459 } while (!eof()); 3460 } 3461 3462 bool UnwrappedLineParser::parseEnum() { 3463 const FormatToken &InitialToken = *FormatTok; 3464 3465 // Won't be 'enum' for NS_ENUMs. 3466 if (FormatTok->is(tok::kw_enum)) 3467 nextToken(); 3468 3469 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3470 // declarations. An "enum" keyword followed by a colon would be a syntax 3471 // error and thus assume it is just an identifier. 3472 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3473 return false; 3474 3475 // In protobuf, "enum" can be used as a field name. 3476 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3477 return false; 3478 3479 // Eat up enum class ... 3480 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3481 nextToken(); 3482 3483 while (FormatTok->Tok.getIdentifierInfo() || 3484 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3485 tok::greater, tok::comma, tok::question, 3486 tok::l_square, tok::r_square)) { 3487 nextToken(); 3488 // We can have macros or attributes in between 'enum' and the enum name. 3489 if (FormatTok->is(tok::l_paren)) 3490 parseParens(); 3491 if (FormatTok->is(TT_AttributeSquare)) { 3492 parseSquare(); 3493 // Consume the closing TT_AttributeSquare. 3494 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3495 nextToken(); 3496 } 3497 if (FormatTok->is(tok::identifier)) { 3498 nextToken(); 3499 // If there are two identifiers in a row, this is likely an elaborate 3500 // return type. In Java, this can be "implements", etc. 3501 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3502 return false; 3503 } 3504 } 3505 3506 // Just a declaration or something is wrong. 3507 if (FormatTok->isNot(tok::l_brace)) 3508 return true; 3509 FormatTok->setFinalizedType(TT_EnumLBrace); 3510 FormatTok->setBlockKind(BK_Block); 3511 3512 if (Style.Language == FormatStyle::LK_Java) { 3513 // Java enums are different. 3514 parseJavaEnumBody(); 3515 return true; 3516 } 3517 if (Style.Language == FormatStyle::LK_Proto) { 3518 parseBlock(/*MustBeDeclaration=*/true); 3519 return true; 3520 } 3521 3522 if (!Style.AllowShortEnumsOnASingleLine && 3523 ShouldBreakBeforeBrace(Style, InitialToken)) { 3524 addUnwrappedLine(); 3525 } 3526 // Parse enum body. 3527 nextToken(); 3528 if (!Style.AllowShortEnumsOnASingleLine) { 3529 addUnwrappedLine(); 3530 Line->Level += 1; 3531 } 3532 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3533 /*IsEnum=*/true); 3534 if (!Style.AllowShortEnumsOnASingleLine) 3535 Line->Level -= 1; 3536 if (HasError) { 3537 if (FormatTok->is(tok::semi)) 3538 nextToken(); 3539 addUnwrappedLine(); 3540 } 3541 return true; 3542 3543 // There is no addUnwrappedLine() here so that we fall through to parsing a 3544 // structural element afterwards. Thus, in "enum A {} n, m;", 3545 // "} n, m;" will end up in one unwrapped line. 3546 } 3547 3548 bool UnwrappedLineParser::parseStructLike() { 3549 // parseRecord falls through and does not yet add an unwrapped line as a 3550 // record declaration or definition can start a structural element. 3551 parseRecord(); 3552 // This does not apply to Java, JavaScript and C#. 3553 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3554 Style.isCSharp()) { 3555 if (FormatTok->is(tok::semi)) 3556 nextToken(); 3557 addUnwrappedLine(); 3558 return true; 3559 } 3560 return false; 3561 } 3562 3563 namespace { 3564 // A class used to set and restore the Token position when peeking 3565 // ahead in the token source. 3566 class ScopedTokenPosition { 3567 unsigned StoredPosition; 3568 FormatTokenSource *Tokens; 3569 3570 public: 3571 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3572 assert(Tokens && "Tokens expected to not be null"); 3573 StoredPosition = Tokens->getPosition(); 3574 } 3575 3576 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3577 }; 3578 } // namespace 3579 3580 // Look to see if we have [[ by looking ahead, if 3581 // its not then rewind to the original position. 3582 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3583 ScopedTokenPosition AutoPosition(Tokens); 3584 FormatToken *Tok = Tokens->getNextToken(); 3585 // We already read the first [ check for the second. 3586 if (!Tok->is(tok::l_square)) 3587 return false; 3588 // Double check that the attribute is just something 3589 // fairly simple. 3590 while (Tok->isNot(tok::eof)) { 3591 if (Tok->is(tok::r_square)) 3592 break; 3593 Tok = Tokens->getNextToken(); 3594 } 3595 if (Tok->is(tok::eof)) 3596 return false; 3597 Tok = Tokens->getNextToken(); 3598 if (!Tok->is(tok::r_square)) 3599 return false; 3600 Tok = Tokens->getNextToken(); 3601 if (Tok->is(tok::semi)) 3602 return false; 3603 return true; 3604 } 3605 3606 void UnwrappedLineParser::parseJavaEnumBody() { 3607 assert(FormatTok->is(tok::l_brace)); 3608 const FormatToken *OpeningBrace = FormatTok; 3609 3610 // Determine whether the enum is simple, i.e. does not have a semicolon or 3611 // constants with class bodies. Simple enums can be formatted like braced 3612 // lists, contracted to a single line, etc. 3613 unsigned StoredPosition = Tokens->getPosition(); 3614 bool IsSimple = true; 3615 FormatToken *Tok = Tokens->getNextToken(); 3616 while (!Tok->is(tok::eof)) { 3617 if (Tok->is(tok::r_brace)) 3618 break; 3619 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3620 IsSimple = false; 3621 break; 3622 } 3623 // FIXME: This will also mark enums with braces in the arguments to enum 3624 // constants as "not simple". This is probably fine in practice, though. 3625 Tok = Tokens->getNextToken(); 3626 } 3627 FormatTok = Tokens->setPosition(StoredPosition); 3628 3629 if (IsSimple) { 3630 nextToken(); 3631 parseBracedList(); 3632 addUnwrappedLine(); 3633 return; 3634 } 3635 3636 // Parse the body of a more complex enum. 3637 // First add a line for everything up to the "{". 3638 nextToken(); 3639 addUnwrappedLine(); 3640 ++Line->Level; 3641 3642 // Parse the enum constants. 3643 while (FormatTok->isNot(tok::eof)) { 3644 if (FormatTok->is(tok::l_brace)) { 3645 // Parse the constant's class body. 3646 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3647 /*MunchSemi=*/false); 3648 } else if (FormatTok->is(tok::l_paren)) { 3649 parseParens(); 3650 } else if (FormatTok->is(tok::comma)) { 3651 nextToken(); 3652 addUnwrappedLine(); 3653 } else if (FormatTok->is(tok::semi)) { 3654 nextToken(); 3655 addUnwrappedLine(); 3656 break; 3657 } else if (FormatTok->is(tok::r_brace)) { 3658 addUnwrappedLine(); 3659 break; 3660 } else { 3661 nextToken(); 3662 } 3663 } 3664 3665 // Parse the class body after the enum's ";" if any. 3666 parseLevel(OpeningBrace); 3667 nextToken(); 3668 --Line->Level; 3669 addUnwrappedLine(); 3670 } 3671 3672 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3673 const FormatToken &InitialToken = *FormatTok; 3674 nextToken(); 3675 3676 // The actual identifier can be a nested name specifier, and in macros 3677 // it is often token-pasted. 3678 // An [[attribute]] can be before the identifier. 3679 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3680 tok::kw___attribute, tok::kw___declspec, 3681 tok::kw_alignas, tok::l_square, tok::r_square) || 3682 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3683 FormatTok->isOneOf(tok::period, tok::comma))) { 3684 if (Style.isJavaScript() && 3685 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3686 // JavaScript/TypeScript supports inline object types in 3687 // extends/implements positions: 3688 // class Foo implements {bar: number} { } 3689 nextToken(); 3690 if (FormatTok->is(tok::l_brace)) { 3691 tryToParseBracedList(); 3692 continue; 3693 } 3694 } 3695 bool IsNonMacroIdentifier = 3696 FormatTok->is(tok::identifier) && 3697 FormatTok->TokenText != FormatTok->TokenText.upper(); 3698 nextToken(); 3699 // We can have macros or attributes in between 'class' and the class name. 3700 if (!IsNonMacroIdentifier) { 3701 if (FormatTok->is(tok::l_paren)) { 3702 parseParens(); 3703 } else if (FormatTok->is(TT_AttributeSquare)) { 3704 parseSquare(); 3705 // Consume the closing TT_AttributeSquare. 3706 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3707 nextToken(); 3708 } 3709 } 3710 } 3711 3712 // Note that parsing away template declarations here leads to incorrectly 3713 // accepting function declarations as record declarations. 3714 // In general, we cannot solve this problem. Consider: 3715 // class A<int> B() {} 3716 // which can be a function definition or a class definition when B() is a 3717 // macro. If we find enough real-world cases where this is a problem, we 3718 // can parse for the 'template' keyword in the beginning of the statement, 3719 // and thus rule out the record production in case there is no template 3720 // (this would still leave us with an ambiguity between template function 3721 // and class declarations). 3722 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3723 do { 3724 if (FormatTok->is(tok::l_brace)) { 3725 calculateBraceTypes(/*ExpectClassBody=*/true); 3726 if (!tryToParseBracedList()) 3727 break; 3728 } 3729 if (FormatTok->is(tok::l_square)) { 3730 FormatToken *Previous = FormatTok->Previous; 3731 if (!Previous || 3732 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3733 // Don't try parsing a lambda if we had a closing parenthesis before, 3734 // it was probably a pointer to an array: int (*)[]. 3735 if (!tryToParseLambda()) 3736 break; 3737 } else { 3738 parseSquare(); 3739 continue; 3740 } 3741 } 3742 if (FormatTok->is(tok::semi)) 3743 return; 3744 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3745 addUnwrappedLine(); 3746 nextToken(); 3747 parseCSharpGenericTypeConstraint(); 3748 break; 3749 } 3750 nextToken(); 3751 } while (!eof()); 3752 } 3753 3754 auto GetBraceType = [](const FormatToken &RecordTok) { 3755 switch (RecordTok.Tok.getKind()) { 3756 case tok::kw_class: 3757 return TT_ClassLBrace; 3758 case tok::kw_struct: 3759 return TT_StructLBrace; 3760 case tok::kw_union: 3761 return TT_UnionLBrace; 3762 default: 3763 // Useful for e.g. interface. 3764 return TT_RecordLBrace; 3765 } 3766 }; 3767 if (FormatTok->is(tok::l_brace)) { 3768 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3769 if (ParseAsExpr) { 3770 parseChildBlock(); 3771 } else { 3772 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3773 addUnwrappedLine(); 3774 3775 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3776 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3777 } 3778 } 3779 // There is no addUnwrappedLine() here so that we fall through to parsing a 3780 // structural element afterwards. Thus, in "class A {} n, m;", 3781 // "} n, m;" will end up in one unwrapped line. 3782 } 3783 3784 void UnwrappedLineParser::parseObjCMethod() { 3785 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3786 "'(' or identifier expected."); 3787 do { 3788 if (FormatTok->is(tok::semi)) { 3789 nextToken(); 3790 addUnwrappedLine(); 3791 return; 3792 } else if (FormatTok->is(tok::l_brace)) { 3793 if (Style.BraceWrapping.AfterFunction) 3794 addUnwrappedLine(); 3795 parseBlock(); 3796 addUnwrappedLine(); 3797 return; 3798 } else { 3799 nextToken(); 3800 } 3801 } while (!eof()); 3802 } 3803 3804 void UnwrappedLineParser::parseObjCProtocolList() { 3805 assert(FormatTok->is(tok::less) && "'<' expected."); 3806 do { 3807 nextToken(); 3808 // Early exit in case someone forgot a close angle. 3809 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3810 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3811 return; 3812 } 3813 } while (!eof() && FormatTok->isNot(tok::greater)); 3814 nextToken(); // Skip '>'. 3815 } 3816 3817 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3818 do { 3819 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3820 nextToken(); 3821 addUnwrappedLine(); 3822 break; 3823 } 3824 if (FormatTok->is(tok::l_brace)) { 3825 parseBlock(); 3826 // In ObjC interfaces, nothing should be following the "}". 3827 addUnwrappedLine(); 3828 } else if (FormatTok->is(tok::r_brace)) { 3829 // Ignore stray "}". parseStructuralElement doesn't consume them. 3830 nextToken(); 3831 addUnwrappedLine(); 3832 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3833 nextToken(); 3834 parseObjCMethod(); 3835 } else { 3836 parseStructuralElement(); 3837 } 3838 } while (!eof()); 3839 } 3840 3841 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3842 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3843 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3844 nextToken(); 3845 nextToken(); // interface name 3846 3847 // @interface can be followed by a lightweight generic 3848 // specialization list, then either a base class or a category. 3849 if (FormatTok->is(tok::less)) 3850 parseObjCLightweightGenerics(); 3851 if (FormatTok->is(tok::colon)) { 3852 nextToken(); 3853 nextToken(); // base class name 3854 // The base class can also have lightweight generics applied to it. 3855 if (FormatTok->is(tok::less)) 3856 parseObjCLightweightGenerics(); 3857 } else if (FormatTok->is(tok::l_paren)) { 3858 // Skip category, if present. 3859 parseParens(); 3860 } 3861 3862 if (FormatTok->is(tok::less)) 3863 parseObjCProtocolList(); 3864 3865 if (FormatTok->is(tok::l_brace)) { 3866 if (Style.BraceWrapping.AfterObjCDeclaration) 3867 addUnwrappedLine(); 3868 parseBlock(/*MustBeDeclaration=*/true); 3869 } 3870 3871 // With instance variables, this puts '}' on its own line. Without instance 3872 // variables, this ends the @interface line. 3873 addUnwrappedLine(); 3874 3875 parseObjCUntilAtEnd(); 3876 } 3877 3878 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3879 assert(FormatTok->is(tok::less)); 3880 // Unlike protocol lists, generic parameterizations support 3881 // nested angles: 3882 // 3883 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3884 // NSObject <NSCopying, NSSecureCoding> 3885 // 3886 // so we need to count how many open angles we have left. 3887 unsigned NumOpenAngles = 1; 3888 do { 3889 nextToken(); 3890 // Early exit in case someone forgot a close angle. 3891 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3892 FormatTok->isObjCAtKeyword(tok::objc_end)) { 3893 break; 3894 } 3895 if (FormatTok->is(tok::less)) { 3896 ++NumOpenAngles; 3897 } else if (FormatTok->is(tok::greater)) { 3898 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3899 --NumOpenAngles; 3900 } 3901 } while (!eof() && NumOpenAngles != 0); 3902 nextToken(); // Skip '>'. 3903 } 3904 3905 // Returns true for the declaration/definition form of @protocol, 3906 // false for the expression form. 3907 bool UnwrappedLineParser::parseObjCProtocol() { 3908 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3909 nextToken(); 3910 3911 if (FormatTok->is(tok::l_paren)) { 3912 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3913 return false; 3914 } 3915 3916 // The definition/declaration form, 3917 // @protocol Foo 3918 // - (int)someMethod; 3919 // @end 3920 3921 nextToken(); // protocol name 3922 3923 if (FormatTok->is(tok::less)) 3924 parseObjCProtocolList(); 3925 3926 // Check for protocol declaration. 3927 if (FormatTok->is(tok::semi)) { 3928 nextToken(); 3929 addUnwrappedLine(); 3930 return true; 3931 } 3932 3933 addUnwrappedLine(); 3934 parseObjCUntilAtEnd(); 3935 return true; 3936 } 3937 3938 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3939 bool IsImport = FormatTok->is(Keywords.kw_import); 3940 assert(IsImport || FormatTok->is(tok::kw_export)); 3941 nextToken(); 3942 3943 // Consume the "default" in "export default class/function". 3944 if (FormatTok->is(tok::kw_default)) 3945 nextToken(); 3946 3947 // Consume "async function", "function" and "default function", so that these 3948 // get parsed as free-standing JS functions, i.e. do not require a trailing 3949 // semicolon. 3950 if (FormatTok->is(Keywords.kw_async)) 3951 nextToken(); 3952 if (FormatTok->is(Keywords.kw_function)) { 3953 nextToken(); 3954 return; 3955 } 3956 3957 // For imports, `export *`, `export {...}`, consume the rest of the line up 3958 // to the terminating `;`. For everything else, just return and continue 3959 // parsing the structural element, i.e. the declaration or expression for 3960 // `export default`. 3961 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3962 !FormatTok->isStringLiteral()) { 3963 return; 3964 } 3965 3966 while (!eof()) { 3967 if (FormatTok->is(tok::semi)) 3968 return; 3969 if (Line->Tokens.empty()) { 3970 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3971 // import statement should terminate. 3972 return; 3973 } 3974 if (FormatTok->is(tok::l_brace)) { 3975 FormatTok->setBlockKind(BK_Block); 3976 nextToken(); 3977 parseBracedList(); 3978 } else { 3979 nextToken(); 3980 } 3981 } 3982 } 3983 3984 void UnwrappedLineParser::parseStatementMacro() { 3985 nextToken(); 3986 if (FormatTok->is(tok::l_paren)) 3987 parseParens(); 3988 if (FormatTok->is(tok::semi)) 3989 nextToken(); 3990 addUnwrappedLine(); 3991 } 3992 3993 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3994 StringRef Prefix = "") { 3995 llvm::dbgs() << Prefix << "Line(" << Line.Level 3996 << ", FSC=" << Line.FirstStartColumn << ")" 3997 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3998 for (const auto &Node : Line.Tokens) { 3999 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 4000 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 4001 << ", OC=" << Node.Tok->OriginalColumn << "] "; 4002 } 4003 for (const auto &Node : Line.Tokens) 4004 for (const auto &ChildNode : Node.Children) 4005 printDebugInfo(ChildNode, "\nChild: "); 4006 4007 llvm::dbgs() << "\n"; 4008 } 4009 4010 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4011 if (Line->Tokens.empty()) 4012 return; 4013 LLVM_DEBUG({ 4014 if (CurrentLines == &Lines) 4015 printDebugInfo(*Line); 4016 }); 4017 4018 // If this line closes a block when in Whitesmiths mode, remember that 4019 // information so that the level can be decreased after the line is added. 4020 // This has to happen after the addition of the line since the line itself 4021 // needs to be indented. 4022 bool ClosesWhitesmithsBlock = 4023 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4024 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4025 4026 CurrentLines->push_back(std::move(*Line)); 4027 Line->Tokens.clear(); 4028 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4029 Line->FirstStartColumn = 0; 4030 4031 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4032 --Line->Level; 4033 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 4034 CurrentLines->append( 4035 std::make_move_iterator(PreprocessorDirectives.begin()), 4036 std::make_move_iterator(PreprocessorDirectives.end())); 4037 PreprocessorDirectives.clear(); 4038 } 4039 // Disconnect the current token from the last token on the previous line. 4040 FormatTok->Previous = nullptr; 4041 } 4042 4043 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4044 4045 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4046 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4047 FormatTok.NewlinesBefore > 0; 4048 } 4049 4050 // Checks if \p FormatTok is a line comment that continues the line comment 4051 // section on \p Line. 4052 static bool 4053 continuesLineCommentSection(const FormatToken &FormatTok, 4054 const UnwrappedLine &Line, 4055 const llvm::Regex &CommentPragmasRegex) { 4056 if (Line.Tokens.empty()) 4057 return false; 4058 4059 StringRef IndentContent = FormatTok.TokenText; 4060 if (FormatTok.TokenText.startswith("//") || 4061 FormatTok.TokenText.startswith("/*")) { 4062 IndentContent = FormatTok.TokenText.substr(2); 4063 } 4064 if (CommentPragmasRegex.match(IndentContent)) 4065 return false; 4066 4067 // If Line starts with a line comment, then FormatTok continues the comment 4068 // section if its original column is greater or equal to the original start 4069 // column of the line. 4070 // 4071 // Define the min column token of a line as follows: if a line ends in '{' or 4072 // contains a '{' followed by a line comment, then the min column token is 4073 // that '{'. Otherwise, the min column token of the line is the first token of 4074 // the line. 4075 // 4076 // If Line starts with a token other than a line comment, then FormatTok 4077 // continues the comment section if its original column is greater than the 4078 // original start column of the min column token of the line. 4079 // 4080 // For example, the second line comment continues the first in these cases: 4081 // 4082 // // first line 4083 // // second line 4084 // 4085 // and: 4086 // 4087 // // first line 4088 // // second line 4089 // 4090 // and: 4091 // 4092 // int i; // first line 4093 // // second line 4094 // 4095 // and: 4096 // 4097 // do { // first line 4098 // // second line 4099 // int i; 4100 // } while (true); 4101 // 4102 // and: 4103 // 4104 // enum { 4105 // a, // first line 4106 // // second line 4107 // b 4108 // }; 4109 // 4110 // The second line comment doesn't continue the first in these cases: 4111 // 4112 // // first line 4113 // // second line 4114 // 4115 // and: 4116 // 4117 // int i; // first line 4118 // // second line 4119 // 4120 // and: 4121 // 4122 // do { // first line 4123 // // second line 4124 // int i; 4125 // } while (true); 4126 // 4127 // and: 4128 // 4129 // enum { 4130 // a, // first line 4131 // // second line 4132 // }; 4133 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4134 4135 // Scan for '{//'. If found, use the column of '{' as a min column for line 4136 // comment section continuation. 4137 const FormatToken *PreviousToken = nullptr; 4138 for (const UnwrappedLineNode &Node : Line.Tokens) { 4139 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4140 isLineComment(*Node.Tok)) { 4141 MinColumnToken = PreviousToken; 4142 break; 4143 } 4144 PreviousToken = Node.Tok; 4145 4146 // Grab the last newline preceding a token in this unwrapped line. 4147 if (Node.Tok->NewlinesBefore > 0) 4148 MinColumnToken = Node.Tok; 4149 } 4150 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4151 MinColumnToken = PreviousToken; 4152 4153 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4154 MinColumnToken); 4155 } 4156 4157 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4158 bool JustComments = Line->Tokens.empty(); 4159 for (FormatToken *Tok : CommentsBeforeNextToken) { 4160 // Line comments that belong to the same line comment section are put on the 4161 // same line since later we might want to reflow content between them. 4162 // Additional fine-grained breaking of line comment sections is controlled 4163 // by the class BreakableLineCommentSection in case it is desirable to keep 4164 // several line comment sections in the same unwrapped line. 4165 // 4166 // FIXME: Consider putting separate line comment sections as children to the 4167 // unwrapped line instead. 4168 Tok->ContinuesLineCommentSection = 4169 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4170 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4171 addUnwrappedLine(); 4172 pushToken(Tok); 4173 } 4174 if (NewlineBeforeNext && JustComments) 4175 addUnwrappedLine(); 4176 CommentsBeforeNextToken.clear(); 4177 } 4178 4179 void UnwrappedLineParser::nextToken(int LevelDifference) { 4180 if (eof()) 4181 return; 4182 flushComments(isOnNewLine(*FormatTok)); 4183 pushToken(FormatTok); 4184 FormatToken *Previous = FormatTok; 4185 if (!Style.isJavaScript()) 4186 readToken(LevelDifference); 4187 else 4188 readTokenWithJavaScriptASI(); 4189 FormatTok->Previous = Previous; 4190 if (Style.isVerilog()) { 4191 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4192 // keywords like `begin`, we can't treat them the same as left braces 4193 // because some contexts require one of them. For example structs use 4194 // braces and if blocks use keywords, and a left brace can occur in an if 4195 // statement, but it is not a block. For keywords like `end`, we simply 4196 // treat them the same as right braces. 4197 if (Keywords.isVerilogEnd(*FormatTok)) 4198 FormatTok->Tok.setKind(tok::r_brace); 4199 } 4200 } 4201 4202 void UnwrappedLineParser::distributeComments( 4203 const SmallVectorImpl<FormatToken *> &Comments, 4204 const FormatToken *NextTok) { 4205 // Whether or not a line comment token continues a line is controlled by 4206 // the method continuesLineCommentSection, with the following caveat: 4207 // 4208 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4209 // that each comment line from the trail is aligned with the next token, if 4210 // the next token exists. If a trail exists, the beginning of the maximal 4211 // trail is marked as a start of a new comment section. 4212 // 4213 // For example in this code: 4214 // 4215 // int a; // line about a 4216 // // line 1 about b 4217 // // line 2 about b 4218 // int b; 4219 // 4220 // the two lines about b form a maximal trail, so there are two sections, the 4221 // first one consisting of the single comment "// line about a" and the 4222 // second one consisting of the next two comments. 4223 if (Comments.empty()) 4224 return; 4225 bool ShouldPushCommentsInCurrentLine = true; 4226 bool HasTrailAlignedWithNextToken = false; 4227 unsigned StartOfTrailAlignedWithNextToken = 0; 4228 if (NextTok) { 4229 // We are skipping the first element intentionally. 4230 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4231 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4232 HasTrailAlignedWithNextToken = true; 4233 StartOfTrailAlignedWithNextToken = i; 4234 } 4235 } 4236 } 4237 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4238 FormatToken *FormatTok = Comments[i]; 4239 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4240 FormatTok->ContinuesLineCommentSection = false; 4241 } else { 4242 FormatTok->ContinuesLineCommentSection = 4243 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4244 } 4245 if (!FormatTok->ContinuesLineCommentSection && 4246 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4247 ShouldPushCommentsInCurrentLine = false; 4248 } 4249 if (ShouldPushCommentsInCurrentLine) 4250 pushToken(FormatTok); 4251 else 4252 CommentsBeforeNextToken.push_back(FormatTok); 4253 } 4254 } 4255 4256 void UnwrappedLineParser::readToken(int LevelDifference) { 4257 SmallVector<FormatToken *, 1> Comments; 4258 bool PreviousWasComment = false; 4259 bool FirstNonCommentOnLine = false; 4260 do { 4261 FormatTok = Tokens->getNextToken(); 4262 assert(FormatTok); 4263 while (FormatTok->getType() == TT_ConflictStart || 4264 FormatTok->getType() == TT_ConflictEnd || 4265 FormatTok->getType() == TT_ConflictAlternative) { 4266 if (FormatTok->getType() == TT_ConflictStart) 4267 conditionalCompilationStart(/*Unreachable=*/false); 4268 else if (FormatTok->getType() == TT_ConflictAlternative) 4269 conditionalCompilationAlternative(); 4270 else if (FormatTok->getType() == TT_ConflictEnd) 4271 conditionalCompilationEnd(); 4272 FormatTok = Tokens->getNextToken(); 4273 FormatTok->MustBreakBefore = true; 4274 } 4275 4276 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4277 const FormatToken &Tok, 4278 bool PreviousWasComment) { 4279 auto IsFirstOnLine = [](const FormatToken &Tok) { 4280 return Tok.HasUnescapedNewline || Tok.IsFirst; 4281 }; 4282 4283 // Consider preprocessor directives preceded by block comments as first 4284 // on line. 4285 if (PreviousWasComment) 4286 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4287 return IsFirstOnLine(Tok); 4288 }; 4289 4290 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4291 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4292 PreviousWasComment = FormatTok->is(tok::comment); 4293 4294 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4295 (!Style.isVerilog() || 4296 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4297 FirstNonCommentOnLine) { 4298 distributeComments(Comments, FormatTok); 4299 Comments.clear(); 4300 // If there is an unfinished unwrapped line, we flush the preprocessor 4301 // directives only after that unwrapped line was finished later. 4302 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4303 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4304 assert((LevelDifference >= 0 || 4305 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4306 "LevelDifference makes Line->Level negative"); 4307 Line->Level += LevelDifference; 4308 // Comments stored before the preprocessor directive need to be output 4309 // before the preprocessor directive, at the same level as the 4310 // preprocessor directive, as we consider them to apply to the directive. 4311 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4312 PPBranchLevel > 0) { 4313 Line->Level += PPBranchLevel; 4314 } 4315 flushComments(isOnNewLine(*FormatTok)); 4316 parsePPDirective(); 4317 PreviousWasComment = FormatTok->is(tok::comment); 4318 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4319 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4320 } 4321 4322 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4323 !Line->InPPDirective) { 4324 continue; 4325 } 4326 4327 if (!FormatTok->is(tok::comment)) { 4328 distributeComments(Comments, FormatTok); 4329 Comments.clear(); 4330 return; 4331 } 4332 4333 Comments.push_back(FormatTok); 4334 } while (!eof()); 4335 4336 distributeComments(Comments, nullptr); 4337 Comments.clear(); 4338 } 4339 4340 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4341 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4342 if (MustBreakBeforeNextToken) { 4343 Line->Tokens.back().Tok->MustBreakBefore = true; 4344 MustBreakBeforeNextToken = false; 4345 } 4346 } 4347 4348 } // end namespace format 4349 } // end namespace clang 4350