1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 #include <utility> 24 25 #define DEBUG_TYPE "format-parser" 26 27 namespace clang { 28 namespace format { 29 30 class FormatTokenSource { 31 public: 32 virtual ~FormatTokenSource() {} 33 34 // Returns the next token in the token stream. 35 virtual FormatToken *getNextToken() = 0; 36 37 // Returns the token preceding the token returned by the last call to 38 // getNextToken() in the token stream, or nullptr if no such token exists. 39 virtual FormatToken *getPreviousToken() = 0; 40 41 // Returns the token that would be returned by the next call to 42 // getNextToken(). 43 virtual FormatToken *peekNextToken() = 0; 44 45 // Returns the token that would be returned after the next N calls to 46 // getNextToken(). N needs to be greater than zero, and small enough that 47 // there are still tokens. Check for tok::eof with N-1 before calling it with 48 // N. 49 virtual FormatToken *peekNextToken(int N) = 0; 50 51 // Returns whether we are at the end of the file. 52 // This can be different from whether getNextToken() returned an eof token 53 // when the FormatTokenSource is a view on a part of the token stream. 54 virtual bool isEOF() = 0; 55 56 // Gets the current position in the token stream, to be used by setPosition(). 57 virtual unsigned getPosition() = 0; 58 59 // Resets the token stream to the state it was in when getPosition() returned 60 // Position, and return the token at that position in the stream. 61 virtual FormatToken *setPosition(unsigned Position) = 0; 62 }; 63 64 namespace { 65 66 class ScopedDeclarationState { 67 public: 68 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 69 bool MustBeDeclaration) 70 : Line(Line), Stack(Stack) { 71 Line.MustBeDeclaration = MustBeDeclaration; 72 Stack.push_back(MustBeDeclaration); 73 } 74 ~ScopedDeclarationState() { 75 Stack.pop_back(); 76 if (!Stack.empty()) 77 Line.MustBeDeclaration = Stack.back(); 78 else 79 Line.MustBeDeclaration = true; 80 } 81 82 private: 83 UnwrappedLine &Line; 84 llvm::BitVector &Stack; 85 }; 86 87 static bool isLineComment(const FormatToken &FormatTok) { 88 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 89 } 90 91 // Checks if \p FormatTok is a line comment that continues the line comment 92 // \p Previous. The original column of \p MinColumnToken is used to determine 93 // whether \p FormatTok is indented enough to the right to continue \p Previous. 94 static bool continuesLineComment(const FormatToken &FormatTok, 95 const FormatToken *Previous, 96 const FormatToken *MinColumnToken) { 97 if (!Previous || !MinColumnToken) 98 return false; 99 unsigned MinContinueColumn = 100 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 101 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 102 isLineComment(*Previous) && 103 FormatTok.OriginalColumn >= MinContinueColumn; 104 } 105 106 class ScopedMacroState : public FormatTokenSource { 107 public: 108 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 109 FormatToken *&ResetToken) 110 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 111 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 112 Token(nullptr), PreviousToken(nullptr) { 113 FakeEOF.Tok.startToken(); 114 FakeEOF.Tok.setKind(tok::eof); 115 TokenSource = this; 116 Line.Level = 0; 117 Line.InPPDirective = true; 118 } 119 120 ~ScopedMacroState() override { 121 TokenSource = PreviousTokenSource; 122 ResetToken = Token; 123 Line.InPPDirective = false; 124 Line.Level = PreviousLineLevel; 125 } 126 127 FormatToken *getNextToken() override { 128 // The \c UnwrappedLineParser guards against this by never calling 129 // \c getNextToken() after it has encountered the first eof token. 130 assert(!eof()); 131 PreviousToken = Token; 132 Token = PreviousTokenSource->getNextToken(); 133 if (eof()) 134 return &FakeEOF; 135 return Token; 136 } 137 138 FormatToken *getPreviousToken() override { 139 return PreviousTokenSource->getPreviousToken(); 140 } 141 142 FormatToken *peekNextToken() override { 143 if (eof()) 144 return &FakeEOF; 145 return PreviousTokenSource->peekNextToken(); 146 } 147 148 FormatToken *peekNextToken(int N) override { 149 assert(N > 0); 150 if (eof()) 151 return &FakeEOF; 152 return PreviousTokenSource->peekNextToken(N); 153 } 154 155 bool isEOF() override { return PreviousTokenSource->isEOF(); } 156 157 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 158 159 FormatToken *setPosition(unsigned Position) override { 160 PreviousToken = nullptr; 161 Token = PreviousTokenSource->setPosition(Position); 162 return Token; 163 } 164 165 private: 166 bool eof() { 167 return Token && Token->HasUnescapedNewline && 168 !continuesLineComment(*Token, PreviousToken, 169 /*MinColumnToken=*/PreviousToken); 170 } 171 172 FormatToken FakeEOF; 173 UnwrappedLine &Line; 174 FormatTokenSource *&TokenSource; 175 FormatToken *&ResetToken; 176 unsigned PreviousLineLevel; 177 FormatTokenSource *PreviousTokenSource; 178 179 FormatToken *Token; 180 FormatToken *PreviousToken; 181 }; 182 183 } // end anonymous namespace 184 185 class ScopedLineState { 186 public: 187 ScopedLineState(UnwrappedLineParser &Parser, 188 bool SwitchToPreprocessorLines = false) 189 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 190 if (SwitchToPreprocessorLines) 191 Parser.CurrentLines = &Parser.PreprocessorDirectives; 192 else if (!Parser.Line->Tokens.empty()) 193 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 194 PreBlockLine = std::move(Parser.Line); 195 Parser.Line = std::make_unique<UnwrappedLine>(); 196 Parser.Line->Level = PreBlockLine->Level; 197 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 198 } 199 200 ~ScopedLineState() { 201 if (!Parser.Line->Tokens.empty()) 202 Parser.addUnwrappedLine(); 203 assert(Parser.Line->Tokens.empty()); 204 Parser.Line = std::move(PreBlockLine); 205 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 206 Parser.MustBreakBeforeNextToken = true; 207 Parser.CurrentLines = OriginalLines; 208 } 209 210 private: 211 UnwrappedLineParser &Parser; 212 213 std::unique_ptr<UnwrappedLine> PreBlockLine; 214 SmallVectorImpl<UnwrappedLine> *OriginalLines; 215 }; 216 217 class CompoundStatementIndenter { 218 public: 219 CompoundStatementIndenter(UnwrappedLineParser *Parser, 220 const FormatStyle &Style, unsigned &LineLevel) 221 : CompoundStatementIndenter(Parser, LineLevel, 222 Style.BraceWrapping.AfterControlStatement, 223 Style.BraceWrapping.IndentBraces) {} 224 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 225 bool WrapBrace, bool IndentBrace) 226 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 227 if (WrapBrace) 228 Parser->addUnwrappedLine(); 229 if (IndentBrace) 230 ++LineLevel; 231 } 232 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 233 234 private: 235 unsigned &LineLevel; 236 unsigned OldLineLevel; 237 }; 238 239 namespace { 240 241 class IndexedTokenSource : public FormatTokenSource { 242 public: 243 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 244 : Tokens(Tokens), Position(-1) {} 245 246 FormatToken *getNextToken() override { 247 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 248 LLVM_DEBUG({ 249 llvm::dbgs() << "Next "; 250 dbgToken(Position); 251 }); 252 return Tokens[Position]; 253 } 254 ++Position; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Next "; 257 dbgToken(Position); 258 }); 259 return Tokens[Position]; 260 } 261 262 FormatToken *getPreviousToken() override { 263 return Position > 0 ? Tokens[Position - 1] : nullptr; 264 } 265 266 FormatToken *peekNextToken() override { 267 int Next = Position + 1; 268 LLVM_DEBUG({ 269 llvm::dbgs() << "Peeking "; 270 dbgToken(Next); 271 }); 272 return Tokens[Next]; 273 } 274 275 FormatToken *peekNextToken(int N) override { 276 assert(N > 0); 277 int Next = Position + N; 278 LLVM_DEBUG({ 279 llvm::dbgs() << "Peeking (+" << (N - 1) << ") "; 280 dbgToken(Next); 281 }); 282 return Tokens[Next]; 283 } 284 285 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 286 287 unsigned getPosition() override { 288 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 289 assert(Position >= 0); 290 return Position; 291 } 292 293 FormatToken *setPosition(unsigned P) override { 294 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 295 Position = P; 296 return Tokens[Position]; 297 } 298 299 void reset() { Position = -1; } 300 301 private: 302 void dbgToken(int Position, llvm::StringRef Indent = "") { 303 FormatToken *Tok = Tokens[Position]; 304 llvm::dbgs() << Indent << "[" << Position 305 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 306 << ", Macro: " << !!Tok->MacroCtx << "\n"; 307 } 308 309 ArrayRef<FormatToken *> Tokens; 310 int Position; 311 }; 312 313 } // end anonymous namespace 314 315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 316 const AdditionalKeywords &Keywords, 317 unsigned FirstStartColumn, 318 ArrayRef<FormatToken *> Tokens, 319 UnwrappedLineConsumer &Callback) 320 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 321 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 322 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 323 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 324 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 325 ? IG_Rejected 326 : IG_Inited), 327 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 328 329 void UnwrappedLineParser::reset() { 330 PPBranchLevel = -1; 331 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 332 ? IG_Rejected 333 : IG_Inited; 334 IncludeGuardToken = nullptr; 335 Line.reset(new UnwrappedLine); 336 CommentsBeforeNextToken.clear(); 337 FormatTok = nullptr; 338 MustBreakBeforeNextToken = false; 339 PreprocessorDirectives.clear(); 340 CurrentLines = &Lines; 341 DeclarationScopeStack.clear(); 342 NestedTooDeep.clear(); 343 PPStack.clear(); 344 Line->FirstStartColumn = FirstStartColumn; 345 } 346 347 void UnwrappedLineParser::parse() { 348 IndexedTokenSource TokenSource(AllTokens); 349 Line->FirstStartColumn = FirstStartColumn; 350 do { 351 LLVM_DEBUG(llvm::dbgs() << "----\n"); 352 reset(); 353 Tokens = &TokenSource; 354 TokenSource.reset(); 355 356 readToken(); 357 parseFile(); 358 359 // If we found an include guard then all preprocessor directives (other than 360 // the guard) are over-indented by one. 361 if (IncludeGuard == IG_Found) 362 for (auto &Line : Lines) 363 if (Line.InPPDirective && Line.Level > 0) 364 --Line.Level; 365 366 // Create line with eof token. 367 pushToken(FormatTok); 368 addUnwrappedLine(); 369 370 for (const UnwrappedLine &Line : Lines) 371 Callback.consumeUnwrappedLine(Line); 372 373 Callback.finishRun(); 374 Lines.clear(); 375 while (!PPLevelBranchIndex.empty() && 376 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 377 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 378 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 379 } 380 if (!PPLevelBranchIndex.empty()) { 381 ++PPLevelBranchIndex.back(); 382 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 383 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 384 } 385 } while (!PPLevelBranchIndex.empty()); 386 } 387 388 void UnwrappedLineParser::parseFile() { 389 // The top-level context in a file always has declarations, except for pre- 390 // processor directives and JavaScript files. 391 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 392 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 393 MustBeDeclaration); 394 if (Style.Language == FormatStyle::LK_TextProto) 395 parseBracedList(); 396 else 397 parseLevel(/*OpeningBrace=*/nullptr, /*CanContainBracedList=*/true); 398 // Make sure to format the remaining tokens. 399 // 400 // LK_TextProto is special since its top-level is parsed as the body of a 401 // braced list, which does not necessarily have natural line separators such 402 // as a semicolon. Comments after the last entry that have been determined to 403 // not belong to that line, as in: 404 // key: value 405 // // endfile comment 406 // do not have a chance to be put on a line of their own until this point. 407 // Here we add this newline before end-of-file comments. 408 if (Style.Language == FormatStyle::LK_TextProto && 409 !CommentsBeforeNextToken.empty()) 410 addUnwrappedLine(); 411 flushComments(true); 412 addUnwrappedLine(); 413 } 414 415 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 416 do { 417 switch (FormatTok->Tok.getKind()) { 418 case tok::l_brace: 419 return; 420 default: 421 if (FormatTok->is(Keywords.kw_where)) { 422 addUnwrappedLine(); 423 nextToken(); 424 parseCSharpGenericTypeConstraint(); 425 break; 426 } 427 nextToken(); 428 break; 429 } 430 } while (!eof()); 431 } 432 433 void UnwrappedLineParser::parseCSharpAttribute() { 434 int UnpairedSquareBrackets = 1; 435 do { 436 switch (FormatTok->Tok.getKind()) { 437 case tok::r_square: 438 nextToken(); 439 --UnpairedSquareBrackets; 440 if (UnpairedSquareBrackets == 0) { 441 addUnwrappedLine(); 442 return; 443 } 444 break; 445 case tok::l_square: 446 ++UnpairedSquareBrackets; 447 nextToken(); 448 break; 449 default: 450 nextToken(); 451 break; 452 } 453 } while (!eof()); 454 } 455 456 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 457 if (!Lines.empty() && Lines.back().InPPDirective) 458 return true; 459 460 const FormatToken *Previous = Tokens->getPreviousToken(); 461 return Previous && Previous->is(tok::comment) && 462 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 463 } 464 465 /// \brief Parses a level, that is ???. 466 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 467 /// \param CanContainBracedList If the content can contain (at any level) a 468 /// braced list. 469 /// \param NextLBracesType The type for left brace found in this level. 470 /// \returns true if a simple block of if/else/for/while, or false otherwise. 471 /// (A simple block has a single statement.) 472 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 473 bool CanContainBracedList, 474 IfStmtKind *IfKind, 475 TokenType NextLBracesType) { 476 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 477 ? TT_BracedListLBrace 478 : TT_Unknown; 479 const bool IsPrecededByCommentOrPPDirective = 480 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 481 bool HasLabel = false; 482 unsigned StatementCount = 0; 483 bool SwitchLabelEncountered = false; 484 do { 485 if (FormatTok->getType() == TT_AttributeMacro) { 486 nextToken(); 487 continue; 488 } 489 tok::TokenKind kind = FormatTok->Tok.getKind(); 490 if (FormatTok->getType() == TT_MacroBlockBegin) 491 kind = tok::l_brace; 492 else if (FormatTok->getType() == TT_MacroBlockEnd) 493 kind = tok::r_brace; 494 495 auto ParseDefault = [this, OpeningBrace, IfKind, NextLevelLBracesType, 496 &HasLabel, &StatementCount] { 497 parseStructuralElement(IfKind, !OpeningBrace, NextLevelLBracesType, 498 HasLabel ? nullptr : &HasLabel); 499 ++StatementCount; 500 assert(StatementCount > 0 && "StatementCount overflow!"); 501 }; 502 503 switch (kind) { 504 case tok::comment: 505 nextToken(); 506 addUnwrappedLine(); 507 break; 508 case tok::l_brace: 509 if (NextLBracesType != TT_Unknown) 510 FormatTok->setFinalizedType(NextLBracesType); 511 else if (FormatTok->Previous && 512 FormatTok->Previous->ClosesRequiresClause) { 513 // We need the 'default' case here to correctly parse a function 514 // l_brace. 515 ParseDefault(); 516 continue; 517 } 518 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 519 tryToParseBracedList()) 520 continue; 521 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 522 /*MunchSemi=*/true, /*KeepBraces=*/true, 523 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 524 NextLBracesType); 525 ++StatementCount; 526 assert(StatementCount > 0 && "StatementCount overflow!"); 527 addUnwrappedLine(); 528 break; 529 case tok::r_brace: 530 if (OpeningBrace) { 531 if (!Style.RemoveBracesLLVM || 532 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) 533 return false; 534 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 535 IsPrecededByCommentOrPPDirective || 536 precededByCommentOrPPDirective()) 537 return false; 538 const FormatToken *Next = Tokens->peekNextToken(); 539 return Next->isNot(tok::comment) || Next->NewlinesBefore > 0; 540 } 541 nextToken(); 542 addUnwrappedLine(); 543 break; 544 case tok::kw_default: { 545 unsigned StoredPosition = Tokens->getPosition(); 546 FormatToken *Next; 547 do { 548 Next = Tokens->getNextToken(); 549 assert(Next); 550 } while (Next->is(tok::comment)); 551 FormatTok = Tokens->setPosition(StoredPosition); 552 if (Next->isNot(tok::colon)) { 553 // default not followed by ':' is not a case label; treat it like 554 // an identifier. 555 parseStructuralElement(); 556 break; 557 } 558 // Else, if it is 'default:', fall through to the case handling. 559 LLVM_FALLTHROUGH; 560 } 561 case tok::kw_case: 562 if (Style.isJavaScript() && Line->MustBeDeclaration) { 563 // A 'case: string' style field declaration. 564 parseStructuralElement(); 565 break; 566 } 567 if (!SwitchLabelEncountered && 568 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 569 ++Line->Level; 570 SwitchLabelEncountered = true; 571 parseStructuralElement(); 572 break; 573 case tok::l_square: 574 if (Style.isCSharp()) { 575 nextToken(); 576 parseCSharpAttribute(); 577 break; 578 } 579 if (handleCppAttributes()) 580 break; 581 LLVM_FALLTHROUGH; 582 default: 583 ParseDefault(); 584 break; 585 } 586 } while (!eof()); 587 return false; 588 } 589 590 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 591 // We'll parse forward through the tokens until we hit 592 // a closing brace or eof - note that getNextToken() will 593 // parse macros, so this will magically work inside macro 594 // definitions, too. 595 unsigned StoredPosition = Tokens->getPosition(); 596 FormatToken *Tok = FormatTok; 597 const FormatToken *PrevTok = Tok->Previous; 598 // Keep a stack of positions of lbrace tokens. We will 599 // update information about whether an lbrace starts a 600 // braced init list or a different block during the loop. 601 SmallVector<FormatToken *, 8> LBraceStack; 602 assert(Tok->is(tok::l_brace)); 603 do { 604 // Get next non-comment token. 605 FormatToken *NextTok; 606 do { 607 NextTok = Tokens->getNextToken(); 608 } while (NextTok->is(tok::comment)); 609 610 switch (Tok->Tok.getKind()) { 611 case tok::l_brace: 612 if (Style.isJavaScript() && PrevTok) { 613 if (PrevTok->isOneOf(tok::colon, tok::less)) 614 // A ':' indicates this code is in a type, or a braced list 615 // following a label in an object literal ({a: {b: 1}}). 616 // A '<' could be an object used in a comparison, but that is nonsense 617 // code (can never return true), so more likely it is a generic type 618 // argument (`X<{a: string; b: number}>`). 619 // The code below could be confused by semicolons between the 620 // individual members in a type member list, which would normally 621 // trigger BK_Block. In both cases, this must be parsed as an inline 622 // braced init. 623 Tok->setBlockKind(BK_BracedInit); 624 else if (PrevTok->is(tok::r_paren)) 625 // `) { }` can only occur in function or method declarations in JS. 626 Tok->setBlockKind(BK_Block); 627 } else { 628 Tok->setBlockKind(BK_Unknown); 629 } 630 LBraceStack.push_back(Tok); 631 break; 632 case tok::r_brace: 633 if (LBraceStack.empty()) 634 break; 635 if (LBraceStack.back()->is(BK_Unknown)) { 636 bool ProbablyBracedList = false; 637 if (Style.Language == FormatStyle::LK_Proto) { 638 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 639 } else { 640 // Skip NextTok over preprocessor lines, otherwise we may not 641 // properly diagnose the block as a braced intializer 642 // if the comma separator appears after the pp directive. 643 while (NextTok->is(tok::hash)) { 644 ScopedMacroState MacroState(*Line, Tokens, NextTok); 645 do { 646 NextTok = Tokens->getNextToken(); 647 } while (NextTok->isNot(tok::eof)); 648 } 649 650 // Using OriginalColumn to distinguish between ObjC methods and 651 // binary operators is a bit hacky. 652 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 653 NextTok->OriginalColumn == 0; 654 655 // Try to detect a braced list. Note that regardless how we mark inner 656 // braces here, we will overwrite the BlockKind later if we parse a 657 // braced list (where all blocks inside are by default braced lists), 658 // or when we explicitly detect blocks (for example while parsing 659 // lambdas). 660 661 // If we already marked the opening brace as braced list, the closing 662 // must also be part of it. 663 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace); 664 665 ProbablyBracedList = ProbablyBracedList || 666 (Style.isJavaScript() && 667 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 668 Keywords.kw_as)); 669 ProbablyBracedList = ProbablyBracedList || 670 (Style.isCpp() && NextTok->is(tok::l_paren)); 671 672 // If there is a comma, semicolon or right paren after the closing 673 // brace, we assume this is a braced initializer list. 674 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 675 // braced list in JS. 676 ProbablyBracedList = 677 ProbablyBracedList || 678 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 679 tok::r_paren, tok::r_square, tok::l_brace, 680 tok::ellipsis); 681 682 ProbablyBracedList = 683 ProbablyBracedList || 684 (NextTok->is(tok::identifier) && 685 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 686 687 ProbablyBracedList = ProbablyBracedList || 688 (NextTok->is(tok::semi) && 689 (!ExpectClassBody || LBraceStack.size() != 1)); 690 691 ProbablyBracedList = 692 ProbablyBracedList || 693 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 694 695 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 696 // We can have an array subscript after a braced init 697 // list, but C++11 attributes are expected after blocks. 698 NextTok = Tokens->getNextToken(); 699 ProbablyBracedList = NextTok->isNot(tok::l_square); 700 } 701 } 702 if (ProbablyBracedList) { 703 Tok->setBlockKind(BK_BracedInit); 704 LBraceStack.back()->setBlockKind(BK_BracedInit); 705 } else { 706 Tok->setBlockKind(BK_Block); 707 LBraceStack.back()->setBlockKind(BK_Block); 708 } 709 } 710 LBraceStack.pop_back(); 711 break; 712 case tok::identifier: 713 if (!Tok->is(TT_StatementMacro)) 714 break; 715 LLVM_FALLTHROUGH; 716 case tok::at: 717 case tok::semi: 718 case tok::kw_if: 719 case tok::kw_while: 720 case tok::kw_for: 721 case tok::kw_switch: 722 case tok::kw_try: 723 case tok::kw___try: 724 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 725 LBraceStack.back()->setBlockKind(BK_Block); 726 break; 727 default: 728 break; 729 } 730 PrevTok = Tok; 731 Tok = NextTok; 732 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 733 734 // Assume other blocks for all unclosed opening braces. 735 for (FormatToken *LBrace : LBraceStack) 736 if (LBrace->is(BK_Unknown)) 737 LBrace->setBlockKind(BK_Block); 738 739 FormatTok = Tokens->setPosition(StoredPosition); 740 } 741 742 template <class T> 743 static inline void hash_combine(std::size_t &seed, const T &v) { 744 std::hash<T> hasher; 745 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 746 } 747 748 size_t UnwrappedLineParser::computePPHash() const { 749 size_t h = 0; 750 for (const auto &i : PPStack) { 751 hash_combine(h, size_t(i.Kind)); 752 hash_combine(h, i.Line); 753 } 754 return h; 755 } 756 757 // Checks whether \p ParsedLine might fit on a single line. We must clone the 758 // tokens of \p ParsedLine before running the token annotator on it so that we 759 // can restore them afterward. 760 bool UnwrappedLineParser::mightFitOnOneLine(UnwrappedLine &ParsedLine) const { 761 const auto ColumnLimit = Style.ColumnLimit; 762 if (ColumnLimit == 0) 763 return true; 764 765 auto &Tokens = ParsedLine.Tokens; 766 assert(!Tokens.empty()); 767 const auto *LastToken = Tokens.back().Tok; 768 assert(LastToken); 769 770 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 771 772 int Index = 0; 773 for (const auto &Token : Tokens) { 774 assert(Token.Tok); 775 auto &SavedToken = SavedTokens[Index++]; 776 SavedToken.Tok = new FormatToken; 777 SavedToken.Tok->copyFrom(*Token.Tok); 778 SavedToken.Children = std::move(Token.Children); 779 } 780 781 AnnotatedLine Line(ParsedLine); 782 assert(Line.Last == LastToken); 783 784 TokenAnnotator Annotator(Style, Keywords); 785 Annotator.annotate(Line); 786 Annotator.calculateFormattingInformation(Line); 787 788 const int Length = LastToken->TotalLength; 789 790 Index = 0; 791 for (auto &Token : Tokens) { 792 const auto &SavedToken = SavedTokens[Index++]; 793 Token.Tok->copyFrom(*SavedToken.Tok); 794 Token.Children = std::move(SavedToken.Children); 795 delete SavedToken.Tok; 796 } 797 798 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 799 } 800 801 UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock( 802 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 803 bool UnindentWhitesmithsBraces, bool CanContainBracedList, 804 TokenType NextLBracesType) { 805 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 806 "'{' or macro block token expected"); 807 FormatToken *Tok = FormatTok; 808 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 809 FormatTok->setBlockKind(BK_Block); 810 811 // For Whitesmiths mode, jump to the next level prior to skipping over the 812 // braces. 813 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 814 ++Line->Level; 815 816 size_t PPStartHash = computePPHash(); 817 818 unsigned InitialLevel = Line->Level; 819 nextToken(/*LevelDifference=*/AddLevels); 820 821 if (MacroBlock && FormatTok->is(tok::l_paren)) 822 parseParens(); 823 824 size_t NbPreprocessorDirectives = 825 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 826 addUnwrappedLine(); 827 size_t OpeningLineIndex = 828 CurrentLines->empty() 829 ? (UnwrappedLine::kInvalidIndex) 830 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 831 832 // Whitesmiths is weird here. The brace needs to be indented for the namespace 833 // block, but the block itself may not be indented depending on the style 834 // settings. This allows the format to back up one level in those cases. 835 if (UnindentWhitesmithsBraces) 836 --Line->Level; 837 838 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 839 MustBeDeclaration); 840 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 841 Line->Level += AddLevels; 842 843 IfStmtKind IfKind = IfStmtKind::NotIf; 844 const bool SimpleBlock = 845 parseLevel(Tok, CanContainBracedList, &IfKind, NextLBracesType); 846 847 if (eof()) 848 return IfKind; 849 850 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 851 : !FormatTok->is(tok::r_brace)) { 852 Line->Level = InitialLevel; 853 FormatTok->setBlockKind(BK_Block); 854 return IfKind; 855 } 856 857 if (SimpleBlock && !KeepBraces && 858 Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 859 assert(FormatTok->is(tok::r_brace)); 860 const FormatToken *Previous = Tokens->getPreviousToken(); 861 assert(Previous); 862 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 863 assert(!CurrentLines->empty()); 864 if (mightFitOnOneLine(CurrentLines->back())) { 865 Tok->MatchingParen = FormatTok; 866 FormatTok->MatchingParen = Tok; 867 } 868 } 869 } 870 871 size_t PPEndHash = computePPHash(); 872 873 // Munch the closing brace. 874 nextToken(/*LevelDifference=*/-AddLevels); 875 876 if (MacroBlock && FormatTok->is(tok::l_paren)) 877 parseParens(); 878 879 if (FormatTok->is(tok::kw_noexcept)) { 880 // A noexcept in a requires expression. 881 nextToken(); 882 } 883 884 if (FormatTok->is(tok::arrow)) { 885 // Following the } or noexcept we can find a trailing return type arrow 886 // as part of an implicit conversion constraint. 887 nextToken(); 888 parseStructuralElement(); 889 } 890 891 if (MunchSemi && FormatTok->is(tok::semi)) 892 nextToken(); 893 894 Line->Level = InitialLevel; 895 896 if (PPStartHash == PPEndHash) { 897 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 898 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 899 // Update the opening line to add the forward reference as well 900 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 901 CurrentLines->size() - 1; 902 } 903 } 904 905 return IfKind; 906 } 907 908 static bool isGoogScope(const UnwrappedLine &Line) { 909 // FIXME: Closure-library specific stuff should not be hard-coded but be 910 // configurable. 911 if (Line.Tokens.size() < 4) 912 return false; 913 auto I = Line.Tokens.begin(); 914 if (I->Tok->TokenText != "goog") 915 return false; 916 ++I; 917 if (I->Tok->isNot(tok::period)) 918 return false; 919 ++I; 920 if (I->Tok->TokenText != "scope") 921 return false; 922 ++I; 923 return I->Tok->is(tok::l_paren); 924 } 925 926 static bool isIIFE(const UnwrappedLine &Line, 927 const AdditionalKeywords &Keywords) { 928 // Look for the start of an immediately invoked anonymous function. 929 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 930 // This is commonly done in JavaScript to create a new, anonymous scope. 931 // Example: (function() { ... })() 932 if (Line.Tokens.size() < 3) 933 return false; 934 auto I = Line.Tokens.begin(); 935 if (I->Tok->isNot(tok::l_paren)) 936 return false; 937 ++I; 938 if (I->Tok->isNot(Keywords.kw_function)) 939 return false; 940 ++I; 941 return I->Tok->is(tok::l_paren); 942 } 943 944 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 945 const FormatToken &InitialToken) { 946 tok::TokenKind Kind = InitialToken.Tok.getKind(); 947 if (InitialToken.is(TT_NamespaceMacro)) 948 Kind = tok::kw_namespace; 949 950 switch (Kind) { 951 case tok::kw_namespace: 952 return Style.BraceWrapping.AfterNamespace; 953 case tok::kw_class: 954 return Style.BraceWrapping.AfterClass; 955 case tok::kw_union: 956 return Style.BraceWrapping.AfterUnion; 957 case tok::kw_struct: 958 return Style.BraceWrapping.AfterStruct; 959 case tok::kw_enum: 960 return Style.BraceWrapping.AfterEnum; 961 default: 962 return false; 963 } 964 } 965 966 void UnwrappedLineParser::parseChildBlock( 967 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 968 assert(FormatTok->is(tok::l_brace)); 969 FormatTok->setBlockKind(BK_Block); 970 const FormatToken *OpeningBrace = FormatTok; 971 nextToken(); 972 { 973 bool SkipIndent = (Style.isJavaScript() && 974 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 975 ScopedLineState LineState(*this); 976 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 977 /*MustBeDeclaration=*/false); 978 Line->Level += SkipIndent ? 0 : 1; 979 parseLevel(OpeningBrace, CanContainBracedList, /*IfKind=*/nullptr, 980 NextLBracesType); 981 flushComments(isOnNewLine(*FormatTok)); 982 Line->Level -= SkipIndent ? 0 : 1; 983 } 984 nextToken(); 985 } 986 987 void UnwrappedLineParser::parsePPDirective() { 988 assert(FormatTok->is(tok::hash) && "'#' expected"); 989 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 990 991 nextToken(); 992 993 if (!FormatTok->Tok.getIdentifierInfo()) { 994 parsePPUnknown(); 995 return; 996 } 997 998 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 999 case tok::pp_define: 1000 parsePPDefine(); 1001 return; 1002 case tok::pp_if: 1003 parsePPIf(/*IfDef=*/false); 1004 break; 1005 case tok::pp_ifdef: 1006 case tok::pp_ifndef: 1007 parsePPIf(/*IfDef=*/true); 1008 break; 1009 case tok::pp_else: 1010 parsePPElse(); 1011 break; 1012 case tok::pp_elifdef: 1013 case tok::pp_elifndef: 1014 case tok::pp_elif: 1015 parsePPElIf(); 1016 break; 1017 case tok::pp_endif: 1018 parsePPEndIf(); 1019 break; 1020 default: 1021 parsePPUnknown(); 1022 break; 1023 } 1024 } 1025 1026 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1027 size_t Line = CurrentLines->size(); 1028 if (CurrentLines == &PreprocessorDirectives) 1029 Line += Lines.size(); 1030 1031 if (Unreachable || 1032 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 1033 PPStack.push_back({PP_Unreachable, Line}); 1034 else 1035 PPStack.push_back({PP_Conditional, Line}); 1036 } 1037 1038 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1039 ++PPBranchLevel; 1040 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1041 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1042 PPLevelBranchIndex.push_back(0); 1043 PPLevelBranchCount.push_back(0); 1044 } 1045 PPChainBranchIndex.push(0); 1046 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1047 conditionalCompilationCondition(Unreachable || Skip); 1048 } 1049 1050 void UnwrappedLineParser::conditionalCompilationAlternative() { 1051 if (!PPStack.empty()) 1052 PPStack.pop_back(); 1053 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1054 if (!PPChainBranchIndex.empty()) 1055 ++PPChainBranchIndex.top(); 1056 conditionalCompilationCondition( 1057 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1058 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1059 } 1060 1061 void UnwrappedLineParser::conditionalCompilationEnd() { 1062 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1063 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1064 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1065 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1066 } 1067 // Guard against #endif's without #if. 1068 if (PPBranchLevel > -1) 1069 --PPBranchLevel; 1070 if (!PPChainBranchIndex.empty()) 1071 PPChainBranchIndex.pop(); 1072 if (!PPStack.empty()) 1073 PPStack.pop_back(); 1074 } 1075 1076 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1077 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1078 nextToken(); 1079 bool Unreachable = false; 1080 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1081 Unreachable = true; 1082 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1083 Unreachable = true; 1084 conditionalCompilationStart(Unreachable); 1085 FormatToken *IfCondition = FormatTok; 1086 // If there's a #ifndef on the first line, and the only lines before it are 1087 // comments, it could be an include guard. 1088 bool MaybeIncludeGuard = IfNDef; 1089 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 1090 for (auto &Line : Lines) { 1091 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1092 MaybeIncludeGuard = false; 1093 IncludeGuard = IG_Rejected; 1094 break; 1095 } 1096 } 1097 --PPBranchLevel; 1098 parsePPUnknown(); 1099 ++PPBranchLevel; 1100 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1101 IncludeGuard = IG_IfNdefed; 1102 IncludeGuardToken = IfCondition; 1103 } 1104 } 1105 1106 void UnwrappedLineParser::parsePPElse() { 1107 // If a potential include guard has an #else, it's not an include guard. 1108 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1109 IncludeGuard = IG_Rejected; 1110 conditionalCompilationAlternative(); 1111 if (PPBranchLevel > -1) 1112 --PPBranchLevel; 1113 parsePPUnknown(); 1114 ++PPBranchLevel; 1115 } 1116 1117 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1118 1119 void UnwrappedLineParser::parsePPEndIf() { 1120 conditionalCompilationEnd(); 1121 parsePPUnknown(); 1122 // If the #endif of a potential include guard is the last thing in the file, 1123 // then we found an include guard. 1124 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1125 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1126 IncludeGuard = IG_Found; 1127 } 1128 1129 void UnwrappedLineParser::parsePPDefine() { 1130 nextToken(); 1131 1132 if (!FormatTok->Tok.getIdentifierInfo()) { 1133 IncludeGuard = IG_Rejected; 1134 IncludeGuardToken = nullptr; 1135 parsePPUnknown(); 1136 return; 1137 } 1138 1139 if (IncludeGuard == IG_IfNdefed && 1140 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1141 IncludeGuard = IG_Defined; 1142 IncludeGuardToken = nullptr; 1143 for (auto &Line : Lines) { 1144 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1145 IncludeGuard = IG_Rejected; 1146 break; 1147 } 1148 } 1149 } 1150 1151 // In the context of a define, even keywords should be treated as normal 1152 // identifiers. Setting the kind to identifier is not enough, because we need 1153 // to treat additional keywords like __except as well, which are already 1154 // identifiers. Setting the identifier info to null interferes with include 1155 // guard processing above, and changes preprocessing nesting. 1156 FormatTok->Tok.setKind(tok::identifier); 1157 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1158 nextToken(); 1159 if (FormatTok->Tok.getKind() == tok::l_paren && 1160 !FormatTok->hasWhitespaceBefore()) 1161 parseParens(); 1162 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1163 Line->Level += PPBranchLevel + 1; 1164 addUnwrappedLine(); 1165 ++Line->Level; 1166 1167 // Errors during a preprocessor directive can only affect the layout of the 1168 // preprocessor directive, and thus we ignore them. An alternative approach 1169 // would be to use the same approach we use on the file level (no 1170 // re-indentation if there was a structural error) within the macro 1171 // definition. 1172 parseFile(); 1173 } 1174 1175 void UnwrappedLineParser::parsePPUnknown() { 1176 do { 1177 nextToken(); 1178 } while (!eof()); 1179 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1180 Line->Level += PPBranchLevel + 1; 1181 addUnwrappedLine(); 1182 } 1183 1184 // Here we exclude certain tokens that are not usually the first token in an 1185 // unwrapped line. This is used in attempt to distinguish macro calls without 1186 // trailing semicolons from other constructs split to several lines. 1187 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1188 // Semicolon can be a null-statement, l_square can be a start of a macro or 1189 // a C++11 attribute, but this doesn't seem to be common. 1190 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1191 Tok.isNot(TT_AttributeSquare) && 1192 // Tokens that can only be used as binary operators and a part of 1193 // overloaded operator names. 1194 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1195 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1196 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1197 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1198 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1199 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1200 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1201 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1202 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1203 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1204 Tok.isNot(tok::lesslessequal) && 1205 // Colon is used in labels, base class lists, initializer lists, 1206 // range-based for loops, ternary operator, but should never be the 1207 // first token in an unwrapped line. 1208 Tok.isNot(tok::colon) && 1209 // 'noexcept' is a trailing annotation. 1210 Tok.isNot(tok::kw_noexcept); 1211 } 1212 1213 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1214 const FormatToken *FormatTok) { 1215 // FIXME: This returns true for C/C++ keywords like 'struct'. 1216 return FormatTok->is(tok::identifier) && 1217 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1218 !FormatTok->isOneOf( 1219 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1220 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1221 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1222 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1223 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1224 Keywords.kw_instanceof, Keywords.kw_interface, 1225 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1226 } 1227 1228 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1229 const FormatToken *FormatTok) { 1230 return FormatTok->Tok.isLiteral() || 1231 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1232 mustBeJSIdent(Keywords, FormatTok); 1233 } 1234 1235 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1236 // when encountered after a value (see mustBeJSIdentOrValue). 1237 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1238 const FormatToken *FormatTok) { 1239 return FormatTok->isOneOf( 1240 tok::kw_return, Keywords.kw_yield, 1241 // conditionals 1242 tok::kw_if, tok::kw_else, 1243 // loops 1244 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1245 // switch/case 1246 tok::kw_switch, tok::kw_case, 1247 // exceptions 1248 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1249 // declaration 1250 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1251 Keywords.kw_async, Keywords.kw_function, 1252 // import/export 1253 Keywords.kw_import, tok::kw_export); 1254 } 1255 1256 // Checks whether a token is a type in K&R C (aka C78). 1257 static bool isC78Type(const FormatToken &Tok) { 1258 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1259 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1260 tok::identifier); 1261 } 1262 1263 // This function checks whether a token starts the first parameter declaration 1264 // in a K&R C (aka C78) function definition, e.g.: 1265 // int f(a, b) 1266 // short a, b; 1267 // { 1268 // return a + b; 1269 // } 1270 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1271 const FormatToken *FuncName) { 1272 assert(Tok); 1273 assert(Next); 1274 assert(FuncName); 1275 1276 if (FuncName->isNot(tok::identifier)) 1277 return false; 1278 1279 const FormatToken *Prev = FuncName->Previous; 1280 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1281 return false; 1282 1283 if (!isC78Type(*Tok) && 1284 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1285 return false; 1286 1287 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1288 return false; 1289 1290 Tok = Tok->Previous; 1291 if (!Tok || Tok->isNot(tok::r_paren)) 1292 return false; 1293 1294 Tok = Tok->Previous; 1295 if (!Tok || Tok->isNot(tok::identifier)) 1296 return false; 1297 1298 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1299 } 1300 1301 void UnwrappedLineParser::parseModuleImport() { 1302 nextToken(); 1303 while (!eof()) { 1304 if (FormatTok->is(tok::colon)) { 1305 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1306 } 1307 // Handle import <foo/bar.h> as we would an include statement. 1308 else if (FormatTok->is(tok::less)) { 1309 nextToken(); 1310 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1311 // Mark tokens up to the trailing line comments as implicit string 1312 // literals. 1313 if (FormatTok->isNot(tok::comment) && 1314 !FormatTok->TokenText.startswith("//")) 1315 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1316 nextToken(); 1317 } 1318 } 1319 if (FormatTok->is(tok::semi)) { 1320 nextToken(); 1321 break; 1322 } 1323 nextToken(); 1324 } 1325 1326 addUnwrappedLine(); 1327 } 1328 1329 // readTokenWithJavaScriptASI reads the next token and terminates the current 1330 // line if JavaScript Automatic Semicolon Insertion must 1331 // happen between the current token and the next token. 1332 // 1333 // This method is conservative - it cannot cover all edge cases of JavaScript, 1334 // but only aims to correctly handle certain well known cases. It *must not* 1335 // return true in speculative cases. 1336 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1337 FormatToken *Previous = FormatTok; 1338 readToken(); 1339 FormatToken *Next = FormatTok; 1340 1341 bool IsOnSameLine = 1342 CommentsBeforeNextToken.empty() 1343 ? Next->NewlinesBefore == 0 1344 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1345 if (IsOnSameLine) 1346 return; 1347 1348 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1349 bool PreviousStartsTemplateExpr = 1350 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1351 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1352 // If the line contains an '@' sign, the previous token might be an 1353 // annotation, which can precede another identifier/value. 1354 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1355 return LineNode.Tok->is(tok::at); 1356 }); 1357 if (HasAt) 1358 return; 1359 } 1360 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1361 return addUnwrappedLine(); 1362 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1363 bool NextEndsTemplateExpr = 1364 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1365 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1366 (PreviousMustBeValue || 1367 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1368 tok::minusminus))) 1369 return addUnwrappedLine(); 1370 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1371 isJSDeclOrStmt(Keywords, Next)) 1372 return addUnwrappedLine(); 1373 } 1374 1375 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1376 bool IsTopLevel, 1377 TokenType NextLBracesType, 1378 bool *HasLabel) { 1379 if (Style.Language == FormatStyle::LK_TableGen && 1380 FormatTok->is(tok::pp_include)) { 1381 nextToken(); 1382 if (FormatTok->is(tok::string_literal)) 1383 nextToken(); 1384 addUnwrappedLine(); 1385 return; 1386 } 1387 switch (FormatTok->Tok.getKind()) { 1388 case tok::kw_asm: 1389 nextToken(); 1390 if (FormatTok->is(tok::l_brace)) { 1391 FormatTok->setFinalizedType(TT_InlineASMBrace); 1392 nextToken(); 1393 while (FormatTok && FormatTok->isNot(tok::eof)) { 1394 if (FormatTok->is(tok::r_brace)) { 1395 FormatTok->setFinalizedType(TT_InlineASMBrace); 1396 nextToken(); 1397 addUnwrappedLine(); 1398 break; 1399 } 1400 FormatTok->Finalized = true; 1401 nextToken(); 1402 } 1403 } 1404 break; 1405 case tok::kw_namespace: 1406 parseNamespace(); 1407 return; 1408 case tok::kw_public: 1409 case tok::kw_protected: 1410 case tok::kw_private: 1411 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1412 Style.isCSharp()) 1413 nextToken(); 1414 else 1415 parseAccessSpecifier(); 1416 return; 1417 case tok::kw_if: 1418 if (Style.isJavaScript() && Line->MustBeDeclaration) 1419 // field/method declaration. 1420 break; 1421 parseIfThenElse(IfKind); 1422 return; 1423 case tok::kw_for: 1424 case tok::kw_while: 1425 if (Style.isJavaScript() && Line->MustBeDeclaration) 1426 // field/method declaration. 1427 break; 1428 parseForOrWhileLoop(); 1429 return; 1430 case tok::kw_do: 1431 if (Style.isJavaScript() && Line->MustBeDeclaration) 1432 // field/method declaration. 1433 break; 1434 parseDoWhile(); 1435 return; 1436 case tok::kw_switch: 1437 if (Style.isJavaScript() && Line->MustBeDeclaration) 1438 // 'switch: string' field declaration. 1439 break; 1440 parseSwitch(); 1441 return; 1442 case tok::kw_default: 1443 if (Style.isJavaScript() && Line->MustBeDeclaration) 1444 // 'default: string' field declaration. 1445 break; 1446 nextToken(); 1447 if (FormatTok->is(tok::colon)) { 1448 parseLabel(); 1449 return; 1450 } 1451 // e.g. "default void f() {}" in a Java interface. 1452 break; 1453 case tok::kw_case: 1454 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1455 // 'case: string' field declaration. 1456 nextToken(); 1457 break; 1458 } 1459 parseCaseLabel(); 1460 return; 1461 case tok::kw_try: 1462 case tok::kw___try: 1463 if (Style.isJavaScript() && Line->MustBeDeclaration) 1464 // field/method declaration. 1465 break; 1466 parseTryCatch(); 1467 return; 1468 case tok::kw_extern: 1469 nextToken(); 1470 if (FormatTok->is(tok::string_literal)) { 1471 nextToken(); 1472 if (FormatTok->is(tok::l_brace)) { 1473 if (Style.BraceWrapping.AfterExternBlock) 1474 addUnwrappedLine(); 1475 // Either we indent or for backwards compatibility we follow the 1476 // AfterExternBlock style. 1477 unsigned AddLevels = 1478 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1479 (Style.BraceWrapping.AfterExternBlock && 1480 Style.IndentExternBlock == 1481 FormatStyle::IEBS_AfterExternBlock) 1482 ? 1u 1483 : 0u; 1484 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1485 addUnwrappedLine(); 1486 return; 1487 } 1488 } 1489 break; 1490 case tok::kw_export: 1491 if (Style.isJavaScript()) { 1492 parseJavaScriptEs6ImportExport(); 1493 return; 1494 } 1495 if (!Style.isCpp()) 1496 break; 1497 // Handle C++ "(inline|export) namespace". 1498 LLVM_FALLTHROUGH; 1499 case tok::kw_inline: 1500 nextToken(); 1501 if (FormatTok->is(tok::kw_namespace)) { 1502 parseNamespace(); 1503 return; 1504 } 1505 break; 1506 case tok::identifier: 1507 if (FormatTok->is(TT_ForEachMacro)) { 1508 parseForOrWhileLoop(); 1509 return; 1510 } 1511 if (FormatTok->is(TT_MacroBlockBegin)) { 1512 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1513 /*MunchSemi=*/false); 1514 return; 1515 } 1516 if (FormatTok->is(Keywords.kw_import)) { 1517 if (Style.isJavaScript()) { 1518 parseJavaScriptEs6ImportExport(); 1519 return; 1520 } 1521 if (Style.Language == FormatStyle::LK_Proto) { 1522 nextToken(); 1523 if (FormatTok->is(tok::kw_public)) 1524 nextToken(); 1525 if (!FormatTok->is(tok::string_literal)) 1526 return; 1527 nextToken(); 1528 if (FormatTok->is(tok::semi)) 1529 nextToken(); 1530 addUnwrappedLine(); 1531 return; 1532 } 1533 if (Style.isCpp()) { 1534 parseModuleImport(); 1535 return; 1536 } 1537 } 1538 if (Style.isCpp() && 1539 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1540 Keywords.kw_slots, Keywords.kw_qslots)) { 1541 nextToken(); 1542 if (FormatTok->is(tok::colon)) { 1543 nextToken(); 1544 addUnwrappedLine(); 1545 return; 1546 } 1547 } 1548 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1549 parseStatementMacro(); 1550 return; 1551 } 1552 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1553 parseNamespace(); 1554 return; 1555 } 1556 // In all other cases, parse the declaration. 1557 break; 1558 default: 1559 break; 1560 } 1561 do { 1562 const FormatToken *Previous = FormatTok->Previous; 1563 switch (FormatTok->Tok.getKind()) { 1564 case tok::at: 1565 nextToken(); 1566 if (FormatTok->is(tok::l_brace)) { 1567 nextToken(); 1568 parseBracedList(); 1569 break; 1570 } else if (Style.Language == FormatStyle::LK_Java && 1571 FormatTok->is(Keywords.kw_interface)) { 1572 nextToken(); 1573 break; 1574 } 1575 switch (FormatTok->Tok.getObjCKeywordID()) { 1576 case tok::objc_public: 1577 case tok::objc_protected: 1578 case tok::objc_package: 1579 case tok::objc_private: 1580 return parseAccessSpecifier(); 1581 case tok::objc_interface: 1582 case tok::objc_implementation: 1583 return parseObjCInterfaceOrImplementation(); 1584 case tok::objc_protocol: 1585 if (parseObjCProtocol()) 1586 return; 1587 break; 1588 case tok::objc_end: 1589 return; // Handled by the caller. 1590 case tok::objc_optional: 1591 case tok::objc_required: 1592 nextToken(); 1593 addUnwrappedLine(); 1594 return; 1595 case tok::objc_autoreleasepool: 1596 nextToken(); 1597 if (FormatTok->is(tok::l_brace)) { 1598 if (Style.BraceWrapping.AfterControlStatement == 1599 FormatStyle::BWACS_Always) 1600 addUnwrappedLine(); 1601 parseBlock(); 1602 } 1603 addUnwrappedLine(); 1604 return; 1605 case tok::objc_synchronized: 1606 nextToken(); 1607 if (FormatTok->is(tok::l_paren)) 1608 // Skip synchronization object 1609 parseParens(); 1610 if (FormatTok->is(tok::l_brace)) { 1611 if (Style.BraceWrapping.AfterControlStatement == 1612 FormatStyle::BWACS_Always) 1613 addUnwrappedLine(); 1614 parseBlock(); 1615 } 1616 addUnwrappedLine(); 1617 return; 1618 case tok::objc_try: 1619 // This branch isn't strictly necessary (the kw_try case below would 1620 // do this too after the tok::at is parsed above). But be explicit. 1621 parseTryCatch(); 1622 return; 1623 default: 1624 break; 1625 } 1626 break; 1627 case tok::kw_concept: 1628 parseConcept(); 1629 return; 1630 case tok::kw_requires: { 1631 if (Style.isCpp()) { 1632 bool ParsedClause = parseRequires(); 1633 if (ParsedClause) 1634 return; 1635 } else { 1636 nextToken(); 1637 } 1638 break; 1639 } 1640 case tok::kw_enum: 1641 // Ignore if this is part of "template <enum ...". 1642 if (Previous && Previous->is(tok::less)) { 1643 nextToken(); 1644 break; 1645 } 1646 1647 // parseEnum falls through and does not yet add an unwrapped line as an 1648 // enum definition can start a structural element. 1649 if (!parseEnum()) 1650 break; 1651 // This only applies for C++. 1652 if (!Style.isCpp()) { 1653 addUnwrappedLine(); 1654 return; 1655 } 1656 break; 1657 case tok::kw_typedef: 1658 nextToken(); 1659 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1660 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1661 Keywords.kw_CF_CLOSED_ENUM, 1662 Keywords.kw_NS_CLOSED_ENUM)) 1663 parseEnum(); 1664 break; 1665 case tok::kw_struct: 1666 case tok::kw_union: 1667 case tok::kw_class: 1668 if (parseStructLike()) 1669 return; 1670 break; 1671 case tok::period: 1672 nextToken(); 1673 // In Java, classes have an implicit static member "class". 1674 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1675 FormatTok->is(tok::kw_class)) 1676 nextToken(); 1677 if (Style.isJavaScript() && FormatTok && 1678 FormatTok->Tok.getIdentifierInfo()) 1679 // JavaScript only has pseudo keywords, all keywords are allowed to 1680 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1681 nextToken(); 1682 break; 1683 case tok::semi: 1684 nextToken(); 1685 addUnwrappedLine(); 1686 return; 1687 case tok::r_brace: 1688 addUnwrappedLine(); 1689 return; 1690 case tok::l_paren: { 1691 parseParens(); 1692 // Break the unwrapped line if a K&R C function definition has a parameter 1693 // declaration. 1694 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1695 break; 1696 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1697 addUnwrappedLine(); 1698 return; 1699 } 1700 break; 1701 } 1702 case tok::kw_operator: 1703 nextToken(); 1704 if (FormatTok->isBinaryOperator()) 1705 nextToken(); 1706 break; 1707 case tok::caret: 1708 nextToken(); 1709 if (FormatTok->Tok.isAnyIdentifier() || 1710 FormatTok->isSimpleTypeSpecifier()) 1711 nextToken(); 1712 if (FormatTok->is(tok::l_paren)) 1713 parseParens(); 1714 if (FormatTok->is(tok::l_brace)) 1715 parseChildBlock(); 1716 break; 1717 case tok::l_brace: 1718 if (NextLBracesType != TT_Unknown) 1719 FormatTok->setFinalizedType(NextLBracesType); 1720 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1721 // A block outside of parentheses must be the last part of a 1722 // structural element. 1723 // FIXME: Figure out cases where this is not true, and add projections 1724 // for them (the one we know is missing are lambdas). 1725 if (Style.Language == FormatStyle::LK_Java && 1726 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1727 // If necessary, we could set the type to something different than 1728 // TT_FunctionLBrace. 1729 if (Style.BraceWrapping.AfterControlStatement == 1730 FormatStyle::BWACS_Always) 1731 addUnwrappedLine(); 1732 } else if (Style.BraceWrapping.AfterFunction) { 1733 addUnwrappedLine(); 1734 } 1735 if (!Line->InPPDirective) 1736 FormatTok->setFinalizedType(TT_FunctionLBrace); 1737 parseBlock(); 1738 addUnwrappedLine(); 1739 return; 1740 } 1741 // Otherwise this was a braced init list, and the structural 1742 // element continues. 1743 break; 1744 case tok::kw_try: 1745 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1746 // field/method declaration. 1747 nextToken(); 1748 break; 1749 } 1750 // We arrive here when parsing function-try blocks. 1751 if (Style.BraceWrapping.AfterFunction) 1752 addUnwrappedLine(); 1753 parseTryCatch(); 1754 return; 1755 case tok::identifier: { 1756 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1757 Line->MustBeDeclaration) { 1758 addUnwrappedLine(); 1759 parseCSharpGenericTypeConstraint(); 1760 break; 1761 } 1762 if (FormatTok->is(TT_MacroBlockEnd)) { 1763 addUnwrappedLine(); 1764 return; 1765 } 1766 1767 // Function declarations (as opposed to function expressions) are parsed 1768 // on their own unwrapped line by continuing this loop. Function 1769 // expressions (functions that are not on their own line) must not create 1770 // a new unwrapped line, so they are special cased below. 1771 size_t TokenCount = Line->Tokens.size(); 1772 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1773 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1774 Keywords.kw_async)))) { 1775 tryToParseJSFunction(); 1776 break; 1777 } 1778 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1779 FormatTok->is(Keywords.kw_interface)) { 1780 if (Style.isJavaScript()) { 1781 // In JavaScript/TypeScript, "interface" can be used as a standalone 1782 // identifier, e.g. in `var interface = 1;`. If "interface" is 1783 // followed by another identifier, it is very like to be an actual 1784 // interface declaration. 1785 unsigned StoredPosition = Tokens->getPosition(); 1786 FormatToken *Next = Tokens->getNextToken(); 1787 FormatTok = Tokens->setPosition(StoredPosition); 1788 if (!mustBeJSIdent(Keywords, Next)) { 1789 nextToken(); 1790 break; 1791 } 1792 } 1793 parseRecord(); 1794 addUnwrappedLine(); 1795 return; 1796 } 1797 1798 if (FormatTok->is(Keywords.kw_interface)) { 1799 if (parseStructLike()) 1800 return; 1801 break; 1802 } 1803 1804 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1805 parseStatementMacro(); 1806 return; 1807 } 1808 1809 // See if the following token should start a new unwrapped line. 1810 StringRef Text = FormatTok->TokenText; 1811 1812 FormatToken *PreviousToken = FormatTok; 1813 nextToken(); 1814 1815 // JS doesn't have macros, and within classes colons indicate fields, not 1816 // labels. 1817 if (Style.isJavaScript()) 1818 break; 1819 1820 TokenCount = Line->Tokens.size(); 1821 if (TokenCount == 1 || 1822 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1823 if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) { 1824 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1825 parseLabel(!Style.IndentGotoLabels); 1826 if (HasLabel) 1827 *HasLabel = true; 1828 return; 1829 } 1830 // Recognize function-like macro usages without trailing semicolon as 1831 // well as free-standing macros like Q_OBJECT. 1832 bool FunctionLike = FormatTok->is(tok::l_paren); 1833 if (FunctionLike) 1834 parseParens(); 1835 1836 bool FollowedByNewline = 1837 CommentsBeforeNextToken.empty() 1838 ? FormatTok->NewlinesBefore > 0 1839 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1840 1841 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1842 tokenCanStartNewLine(*FormatTok) && Text == Text.upper() && 1843 !PreviousToken->isTypeFinalized()) { 1844 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 1845 addUnwrappedLine(); 1846 return; 1847 } 1848 } 1849 break; 1850 } 1851 case tok::equal: 1852 if ((Style.isJavaScript() || Style.isCSharp()) && 1853 FormatTok->is(TT_FatArrow)) { 1854 tryToParseChildBlock(); 1855 break; 1856 } 1857 1858 nextToken(); 1859 if (FormatTok->is(tok::l_brace)) { 1860 // Block kind should probably be set to BK_BracedInit for any language. 1861 // C# needs this change to ensure that array initialisers and object 1862 // initialisers are indented the same way. 1863 if (Style.isCSharp()) 1864 FormatTok->setBlockKind(BK_BracedInit); 1865 nextToken(); 1866 parseBracedList(); 1867 } else if (Style.Language == FormatStyle::LK_Proto && 1868 FormatTok->is(tok::less)) { 1869 nextToken(); 1870 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1871 /*ClosingBraceKind=*/tok::greater); 1872 } 1873 break; 1874 case tok::l_square: 1875 parseSquare(); 1876 break; 1877 case tok::kw_new: 1878 parseNew(); 1879 break; 1880 case tok::kw_case: 1881 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1882 // 'case: string' field declaration. 1883 nextToken(); 1884 break; 1885 } 1886 parseCaseLabel(); 1887 break; 1888 default: 1889 nextToken(); 1890 break; 1891 } 1892 } while (!eof()); 1893 } 1894 1895 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1896 assert(FormatTok->is(tok::l_brace)); 1897 if (!Style.isCSharp()) 1898 return false; 1899 // See if it's a property accessor. 1900 if (FormatTok->Previous->isNot(tok::identifier)) 1901 return false; 1902 1903 // See if we are inside a property accessor. 1904 // 1905 // Record the current tokenPosition so that we can advance and 1906 // reset the current token. `Next` is not set yet so we need 1907 // another way to advance along the token stream. 1908 unsigned int StoredPosition = Tokens->getPosition(); 1909 FormatToken *Tok = Tokens->getNextToken(); 1910 1911 // A trivial property accessor is of the form: 1912 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 1913 // Track these as they do not require line breaks to be introduced. 1914 bool HasSpecialAccessor = false; 1915 bool IsTrivialPropertyAccessor = true; 1916 while (!eof()) { 1917 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1918 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1919 Keywords.kw_init, Keywords.kw_set)) { 1920 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 1921 HasSpecialAccessor = true; 1922 Tok = Tokens->getNextToken(); 1923 continue; 1924 } 1925 if (Tok->isNot(tok::r_brace)) 1926 IsTrivialPropertyAccessor = false; 1927 break; 1928 } 1929 1930 if (!HasSpecialAccessor) { 1931 Tokens->setPosition(StoredPosition); 1932 return false; 1933 } 1934 1935 // Try to parse the property accessor: 1936 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1937 Tokens->setPosition(StoredPosition); 1938 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1939 addUnwrappedLine(); 1940 nextToken(); 1941 do { 1942 switch (FormatTok->Tok.getKind()) { 1943 case tok::r_brace: 1944 nextToken(); 1945 if (FormatTok->is(tok::equal)) { 1946 while (!eof() && FormatTok->isNot(tok::semi)) 1947 nextToken(); 1948 nextToken(); 1949 } 1950 addUnwrappedLine(); 1951 return true; 1952 case tok::l_brace: 1953 ++Line->Level; 1954 parseBlock(/*MustBeDeclaration=*/true); 1955 addUnwrappedLine(); 1956 --Line->Level; 1957 break; 1958 case tok::equal: 1959 if (FormatTok->is(TT_FatArrow)) { 1960 ++Line->Level; 1961 do { 1962 nextToken(); 1963 } while (!eof() && FormatTok->isNot(tok::semi)); 1964 nextToken(); 1965 addUnwrappedLine(); 1966 --Line->Level; 1967 break; 1968 } 1969 nextToken(); 1970 break; 1971 default: 1972 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 1973 Keywords.kw_set) && 1974 !IsTrivialPropertyAccessor) { 1975 // Non-trivial get/set needs to be on its own line. 1976 addUnwrappedLine(); 1977 } 1978 nextToken(); 1979 } 1980 } while (!eof()); 1981 1982 // Unreachable for well-formed code (paired '{' and '}'). 1983 return true; 1984 } 1985 1986 bool UnwrappedLineParser::tryToParseLambda() { 1987 assert(FormatTok->is(tok::l_square)); 1988 if (!Style.isCpp()) { 1989 nextToken(); 1990 return false; 1991 } 1992 FormatToken &LSquare = *FormatTok; 1993 if (!tryToParseLambdaIntroducer()) 1994 return false; 1995 1996 bool SeenArrow = false; 1997 bool InTemplateParameterList = false; 1998 1999 while (FormatTok->isNot(tok::l_brace)) { 2000 if (FormatTok->isSimpleTypeSpecifier()) { 2001 nextToken(); 2002 continue; 2003 } 2004 switch (FormatTok->Tok.getKind()) { 2005 case tok::l_brace: 2006 break; 2007 case tok::l_paren: 2008 parseParens(); 2009 break; 2010 case tok::l_square: 2011 parseSquare(); 2012 break; 2013 case tok::kw_class: 2014 case tok::kw_template: 2015 case tok::kw_typename: 2016 assert(FormatTok->Previous); 2017 if (FormatTok->Previous->is(tok::less)) 2018 InTemplateParameterList = true; 2019 nextToken(); 2020 break; 2021 case tok::amp: 2022 case tok::star: 2023 case tok::kw_const: 2024 case tok::comma: 2025 case tok::less: 2026 case tok::greater: 2027 case tok::identifier: 2028 case tok::numeric_constant: 2029 case tok::coloncolon: 2030 case tok::kw_mutable: 2031 case tok::kw_noexcept: 2032 nextToken(); 2033 break; 2034 // Specialization of a template with an integer parameter can contain 2035 // arithmetic, logical, comparison and ternary operators. 2036 // 2037 // FIXME: This also accepts sequences of operators that are not in the scope 2038 // of a template argument list. 2039 // 2040 // In a C++ lambda a template type can only occur after an arrow. We use 2041 // this as an heuristic to distinguish between Objective-C expressions 2042 // followed by an `a->b` expression, such as: 2043 // ([obj func:arg] + a->b) 2044 // Otherwise the code below would parse as a lambda. 2045 // 2046 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2047 // explicit template lists: []<bool b = true && false>(U &&u){} 2048 case tok::plus: 2049 case tok::minus: 2050 case tok::exclaim: 2051 case tok::tilde: 2052 case tok::slash: 2053 case tok::percent: 2054 case tok::lessless: 2055 case tok::pipe: 2056 case tok::pipepipe: 2057 case tok::ampamp: 2058 case tok::caret: 2059 case tok::equalequal: 2060 case tok::exclaimequal: 2061 case tok::greaterequal: 2062 case tok::lessequal: 2063 case tok::question: 2064 case tok::colon: 2065 case tok::ellipsis: 2066 case tok::kw_true: 2067 case tok::kw_false: 2068 if (SeenArrow || InTemplateParameterList) { 2069 nextToken(); 2070 break; 2071 } 2072 return true; 2073 case tok::arrow: 2074 // This might or might not actually be a lambda arrow (this could be an 2075 // ObjC method invocation followed by a dereferencing arrow). We might 2076 // reset this back to TT_Unknown in TokenAnnotator. 2077 FormatTok->setFinalizedType(TT_LambdaArrow); 2078 SeenArrow = true; 2079 nextToken(); 2080 break; 2081 default: 2082 return true; 2083 } 2084 } 2085 FormatTok->setFinalizedType(TT_LambdaLBrace); 2086 LSquare.setFinalizedType(TT_LambdaLSquare); 2087 parseChildBlock(); 2088 return true; 2089 } 2090 2091 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2092 const FormatToken *Previous = FormatTok->Previous; 2093 const FormatToken *LeftSquare = FormatTok; 2094 nextToken(); 2095 if (Previous && 2096 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 2097 tok::kw_delete, tok::l_square) || 2098 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() || 2099 Previous->isSimpleTypeSpecifier())) { 2100 return false; 2101 } 2102 if (FormatTok->is(tok::l_square)) 2103 return false; 2104 if (FormatTok->is(tok::r_square)) { 2105 const FormatToken *Next = Tokens->peekNextToken(); 2106 if (Next->is(tok::greater)) 2107 return false; 2108 } 2109 parseSquare(/*LambdaIntroducer=*/true); 2110 return true; 2111 } 2112 2113 void UnwrappedLineParser::tryToParseJSFunction() { 2114 assert(FormatTok->is(Keywords.kw_function) || 2115 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2116 if (FormatTok->is(Keywords.kw_async)) 2117 nextToken(); 2118 // Consume "function". 2119 nextToken(); 2120 2121 // Consume * (generator function). Treat it like C++'s overloaded operators. 2122 if (FormatTok->is(tok::star)) { 2123 FormatTok->setFinalizedType(TT_OverloadedOperator); 2124 nextToken(); 2125 } 2126 2127 // Consume function name. 2128 if (FormatTok->is(tok::identifier)) 2129 nextToken(); 2130 2131 if (FormatTok->isNot(tok::l_paren)) 2132 return; 2133 2134 // Parse formal parameter list. 2135 parseParens(); 2136 2137 if (FormatTok->is(tok::colon)) { 2138 // Parse a type definition. 2139 nextToken(); 2140 2141 // Eat the type declaration. For braced inline object types, balance braces, 2142 // otherwise just parse until finding an l_brace for the function body. 2143 if (FormatTok->is(tok::l_brace)) 2144 tryToParseBracedList(); 2145 else 2146 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2147 nextToken(); 2148 } 2149 2150 if (FormatTok->is(tok::semi)) 2151 return; 2152 2153 parseChildBlock(); 2154 } 2155 2156 bool UnwrappedLineParser::tryToParseBracedList() { 2157 if (FormatTok->is(BK_Unknown)) 2158 calculateBraceTypes(); 2159 assert(FormatTok->isNot(BK_Unknown)); 2160 if (FormatTok->is(BK_Block)) 2161 return false; 2162 nextToken(); 2163 parseBracedList(); 2164 return true; 2165 } 2166 2167 bool UnwrappedLineParser::tryToParseChildBlock() { 2168 assert(Style.isJavaScript() || Style.isCSharp()); 2169 assert(FormatTok->is(TT_FatArrow)); 2170 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2171 // They always start an expression or a child block if followed by a curly 2172 // brace. 2173 nextToken(); 2174 if (FormatTok->isNot(tok::l_brace)) 2175 return false; 2176 parseChildBlock(); 2177 return true; 2178 } 2179 2180 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2181 bool IsEnum, 2182 tok::TokenKind ClosingBraceKind) { 2183 bool HasError = false; 2184 2185 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2186 // replace this by using parseAssignmentExpression() inside. 2187 do { 2188 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2189 tryToParseChildBlock()) 2190 continue; 2191 if (Style.isJavaScript()) { 2192 if (FormatTok->is(Keywords.kw_function) || 2193 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2194 tryToParseJSFunction(); 2195 continue; 2196 } 2197 if (FormatTok->is(tok::l_brace)) { 2198 // Could be a method inside of a braced list `{a() { return 1; }}`. 2199 if (tryToParseBracedList()) 2200 continue; 2201 parseChildBlock(); 2202 } 2203 } 2204 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2205 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2206 addUnwrappedLine(); 2207 nextToken(); 2208 return !HasError; 2209 } 2210 switch (FormatTok->Tok.getKind()) { 2211 case tok::l_square: 2212 if (Style.isCSharp()) 2213 parseSquare(); 2214 else 2215 tryToParseLambda(); 2216 break; 2217 case tok::l_paren: 2218 parseParens(); 2219 // JavaScript can just have free standing methods and getters/setters in 2220 // object literals. Detect them by a "{" following ")". 2221 if (Style.isJavaScript()) { 2222 if (FormatTok->is(tok::l_brace)) 2223 parseChildBlock(); 2224 break; 2225 } 2226 break; 2227 case tok::l_brace: 2228 // Assume there are no blocks inside a braced init list apart 2229 // from the ones we explicitly parse out (like lambdas). 2230 FormatTok->setBlockKind(BK_BracedInit); 2231 nextToken(); 2232 parseBracedList(); 2233 break; 2234 case tok::less: 2235 if (Style.Language == FormatStyle::LK_Proto || 2236 ClosingBraceKind == tok::greater) { 2237 nextToken(); 2238 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2239 /*ClosingBraceKind=*/tok::greater); 2240 } else { 2241 nextToken(); 2242 } 2243 break; 2244 case tok::semi: 2245 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2246 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2247 // used for error recovery if we have otherwise determined that this is 2248 // a braced list. 2249 if (Style.isJavaScript()) { 2250 nextToken(); 2251 break; 2252 } 2253 HasError = true; 2254 if (!ContinueOnSemicolons) 2255 return !HasError; 2256 nextToken(); 2257 break; 2258 case tok::comma: 2259 nextToken(); 2260 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2261 addUnwrappedLine(); 2262 break; 2263 default: 2264 nextToken(); 2265 break; 2266 } 2267 } while (!eof()); 2268 return false; 2269 } 2270 2271 /// \brief Parses a pair of parentheses (and everything between them). 2272 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2273 /// double ampersands. This only counts for the current parens scope. 2274 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2275 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2276 nextToken(); 2277 do { 2278 switch (FormatTok->Tok.getKind()) { 2279 case tok::l_paren: 2280 parseParens(); 2281 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2282 parseChildBlock(); 2283 break; 2284 case tok::r_paren: 2285 nextToken(); 2286 return; 2287 case tok::r_brace: 2288 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2289 return; 2290 case tok::l_square: 2291 tryToParseLambda(); 2292 break; 2293 case tok::l_brace: 2294 if (!tryToParseBracedList()) 2295 parseChildBlock(); 2296 break; 2297 case tok::at: 2298 nextToken(); 2299 if (FormatTok->is(tok::l_brace)) { 2300 nextToken(); 2301 parseBracedList(); 2302 } 2303 break; 2304 case tok::equal: 2305 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2306 tryToParseChildBlock(); 2307 else 2308 nextToken(); 2309 break; 2310 case tok::kw_class: 2311 if (Style.isJavaScript()) 2312 parseRecord(/*ParseAsExpr=*/true); 2313 else 2314 nextToken(); 2315 break; 2316 case tok::identifier: 2317 if (Style.isJavaScript() && 2318 (FormatTok->is(Keywords.kw_function) || 2319 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2320 tryToParseJSFunction(); 2321 else 2322 nextToken(); 2323 break; 2324 case tok::kw_requires: { 2325 auto RequiresToken = FormatTok; 2326 nextToken(); 2327 parseRequiresExpression(RequiresToken); 2328 break; 2329 } 2330 case tok::ampamp: 2331 if (AmpAmpTokenType != TT_Unknown) 2332 FormatTok->setFinalizedType(AmpAmpTokenType); 2333 LLVM_FALLTHROUGH; 2334 default: 2335 nextToken(); 2336 break; 2337 } 2338 } while (!eof()); 2339 } 2340 2341 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2342 if (!LambdaIntroducer) { 2343 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2344 if (tryToParseLambda()) 2345 return; 2346 } 2347 do { 2348 switch (FormatTok->Tok.getKind()) { 2349 case tok::l_paren: 2350 parseParens(); 2351 break; 2352 case tok::r_square: 2353 nextToken(); 2354 return; 2355 case tok::r_brace: 2356 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2357 return; 2358 case tok::l_square: 2359 parseSquare(); 2360 break; 2361 case tok::l_brace: { 2362 if (!tryToParseBracedList()) 2363 parseChildBlock(); 2364 break; 2365 } 2366 case tok::at: 2367 nextToken(); 2368 if (FormatTok->is(tok::l_brace)) { 2369 nextToken(); 2370 parseBracedList(); 2371 } 2372 break; 2373 default: 2374 nextToken(); 2375 break; 2376 } 2377 } while (!eof()); 2378 } 2379 2380 void UnwrappedLineParser::keepAncestorBraces() { 2381 if (!Style.RemoveBracesLLVM) 2382 return; 2383 2384 const int MaxNestingLevels = 2; 2385 const int Size = NestedTooDeep.size(); 2386 if (Size >= MaxNestingLevels) 2387 NestedTooDeep[Size - MaxNestingLevels] = true; 2388 NestedTooDeep.push_back(false); 2389 } 2390 2391 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2392 for (const auto &Token : llvm::reverse(Line.Tokens)) 2393 if (Token.Tok->isNot(tok::comment)) 2394 return Token.Tok; 2395 2396 return nullptr; 2397 } 2398 2399 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2400 FormatToken *Tok = nullptr; 2401 2402 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2403 PreprocessorDirectives.empty()) { 2404 Tok = getLastNonComment(*Line); 2405 assert(Tok); 2406 if (Tok->BraceCount < 0) { 2407 assert(Tok->BraceCount == -1); 2408 Tok = nullptr; 2409 } else { 2410 Tok->BraceCount = -1; 2411 } 2412 } 2413 2414 addUnwrappedLine(); 2415 ++Line->Level; 2416 parseStructuralElement(); 2417 2418 if (Tok) { 2419 assert(!Line->InPPDirective); 2420 Tok = nullptr; 2421 for (const auto &L : llvm::reverse(*CurrentLines)) { 2422 if (!L.InPPDirective && getLastNonComment(L)) { 2423 Tok = L.Tokens.back().Tok; 2424 break; 2425 } 2426 } 2427 assert(Tok); 2428 ++Tok->BraceCount; 2429 } 2430 2431 if (CheckEOF && FormatTok->is(tok::eof)) 2432 addUnwrappedLine(); 2433 2434 --Line->Level; 2435 } 2436 2437 static void markOptionalBraces(FormatToken *LeftBrace) { 2438 if (!LeftBrace) 2439 return; 2440 2441 assert(LeftBrace->is(tok::l_brace)); 2442 2443 FormatToken *RightBrace = LeftBrace->MatchingParen; 2444 if (!RightBrace) { 2445 assert(!LeftBrace->Optional); 2446 return; 2447 } 2448 2449 assert(RightBrace->is(tok::r_brace)); 2450 assert(RightBrace->MatchingParen == LeftBrace); 2451 assert(LeftBrace->Optional == RightBrace->Optional); 2452 2453 LeftBrace->Optional = true; 2454 RightBrace->Optional = true; 2455 } 2456 2457 void UnwrappedLineParser::handleAttributes() { 2458 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2459 if (FormatTok->is(TT_AttributeMacro)) 2460 nextToken(); 2461 handleCppAttributes(); 2462 } 2463 2464 bool UnwrappedLineParser::handleCppAttributes() { 2465 // Handle [[likely]] / [[unlikely]] attributes. 2466 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) { 2467 parseSquare(); 2468 return true; 2469 } 2470 return false; 2471 } 2472 2473 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2474 bool KeepBraces) { 2475 assert(FormatTok->is(tok::kw_if) && "'if' expected"); 2476 nextToken(); 2477 if (FormatTok->is(tok::exclaim)) 2478 nextToken(); 2479 2480 bool KeepIfBraces = true; 2481 if (FormatTok->is(tok::kw_consteval)) { 2482 nextToken(); 2483 } else { 2484 if (Style.RemoveBracesLLVM) 2485 KeepIfBraces = KeepBraces; 2486 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2487 nextToken(); 2488 if (FormatTok->is(tok::l_paren)) 2489 parseParens(); 2490 } 2491 handleAttributes(); 2492 2493 bool NeedsUnwrappedLine = false; 2494 keepAncestorBraces(); 2495 2496 FormatToken *IfLeftBrace = nullptr; 2497 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2498 2499 if (FormatTok->is(tok::l_brace)) { 2500 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2501 IfLeftBrace = FormatTok; 2502 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2503 IfBlockKind = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2504 /*MunchSemi=*/true, KeepIfBraces); 2505 if (Style.BraceWrapping.BeforeElse) 2506 addUnwrappedLine(); 2507 else 2508 NeedsUnwrappedLine = true; 2509 } else { 2510 parseUnbracedBody(); 2511 } 2512 2513 if (Style.RemoveBracesLLVM) { 2514 assert(!NestedTooDeep.empty()); 2515 KeepIfBraces = KeepIfBraces || 2516 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2517 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2518 IfBlockKind == IfStmtKind::IfElseIf; 2519 } 2520 2521 bool KeepElseBraces = KeepIfBraces; 2522 FormatToken *ElseLeftBrace = nullptr; 2523 IfStmtKind Kind = IfStmtKind::IfOnly; 2524 2525 if (FormatTok->is(tok::kw_else)) { 2526 if (Style.RemoveBracesLLVM) { 2527 NestedTooDeep.back() = false; 2528 Kind = IfStmtKind::IfElse; 2529 } 2530 nextToken(); 2531 handleAttributes(); 2532 if (FormatTok->is(tok::l_brace)) { 2533 FormatTok->setFinalizedType(TT_ElseLBrace); 2534 ElseLeftBrace = FormatTok; 2535 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2536 if (parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2537 /*MunchSemi=*/true, KeepElseBraces) == IfStmtKind::IfOnly) 2538 Kind = IfStmtKind::IfElseIf; 2539 addUnwrappedLine(); 2540 } else if (FormatTok->is(tok::kw_if)) { 2541 const FormatToken *Previous = Tokens->getPreviousToken(); 2542 assert(Previous); 2543 const bool IsPrecededByComment = Previous->is(tok::comment); 2544 if (IsPrecededByComment) { 2545 addUnwrappedLine(); 2546 ++Line->Level; 2547 } 2548 bool TooDeep = true; 2549 if (Style.RemoveBracesLLVM) { 2550 Kind = IfStmtKind::IfElseIf; 2551 TooDeep = NestedTooDeep.pop_back_val(); 2552 } 2553 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2554 if (Style.RemoveBracesLLVM) 2555 NestedTooDeep.push_back(TooDeep); 2556 if (IsPrecededByComment) 2557 --Line->Level; 2558 } else { 2559 parseUnbracedBody(/*CheckEOF=*/true); 2560 } 2561 } else { 2562 if (Style.RemoveBracesLLVM) 2563 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2564 if (NeedsUnwrappedLine) 2565 addUnwrappedLine(); 2566 } 2567 2568 if (!Style.RemoveBracesLLVM) 2569 return nullptr; 2570 2571 assert(!NestedTooDeep.empty()); 2572 KeepElseBraces = KeepElseBraces || 2573 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2574 NestedTooDeep.back(); 2575 2576 NestedTooDeep.pop_back(); 2577 2578 if (!KeepIfBraces && !KeepElseBraces) { 2579 markOptionalBraces(IfLeftBrace); 2580 markOptionalBraces(ElseLeftBrace); 2581 } else if (IfLeftBrace) { 2582 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2583 if (IfRightBrace) { 2584 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2585 assert(!IfLeftBrace->Optional); 2586 assert(!IfRightBrace->Optional); 2587 IfLeftBrace->MatchingParen = nullptr; 2588 IfRightBrace->MatchingParen = nullptr; 2589 } 2590 } 2591 2592 if (IfKind) 2593 *IfKind = Kind; 2594 2595 return IfLeftBrace; 2596 } 2597 2598 void UnwrappedLineParser::parseTryCatch() { 2599 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2600 nextToken(); 2601 bool NeedsUnwrappedLine = false; 2602 if (FormatTok->is(tok::colon)) { 2603 // We are in a function try block, what comes is an initializer list. 2604 nextToken(); 2605 2606 // In case identifiers were removed by clang-tidy, what might follow is 2607 // multiple commas in sequence - before the first identifier. 2608 while (FormatTok->is(tok::comma)) 2609 nextToken(); 2610 2611 while (FormatTok->is(tok::identifier)) { 2612 nextToken(); 2613 if (FormatTok->is(tok::l_paren)) 2614 parseParens(); 2615 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2616 FormatTok->is(tok::l_brace)) { 2617 do { 2618 nextToken(); 2619 } while (!FormatTok->is(tok::r_brace)); 2620 nextToken(); 2621 } 2622 2623 // In case identifiers were removed by clang-tidy, what might follow is 2624 // multiple commas in sequence - after the first identifier. 2625 while (FormatTok->is(tok::comma)) 2626 nextToken(); 2627 } 2628 } 2629 // Parse try with resource. 2630 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2631 parseParens(); 2632 2633 keepAncestorBraces(); 2634 2635 if (FormatTok->is(tok::l_brace)) { 2636 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2637 parseBlock(); 2638 if (Style.BraceWrapping.BeforeCatch) 2639 addUnwrappedLine(); 2640 else 2641 NeedsUnwrappedLine = true; 2642 } else if (!FormatTok->is(tok::kw_catch)) { 2643 // The C++ standard requires a compound-statement after a try. 2644 // If there's none, we try to assume there's a structuralElement 2645 // and try to continue. 2646 addUnwrappedLine(); 2647 ++Line->Level; 2648 parseStructuralElement(); 2649 --Line->Level; 2650 } 2651 while (true) { 2652 if (FormatTok->is(tok::at)) 2653 nextToken(); 2654 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2655 tok::kw___finally) || 2656 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2657 FormatTok->is(Keywords.kw_finally)) || 2658 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2659 FormatTok->isObjCAtKeyword(tok::objc_finally)))) 2660 break; 2661 nextToken(); 2662 while (FormatTok->isNot(tok::l_brace)) { 2663 if (FormatTok->is(tok::l_paren)) { 2664 parseParens(); 2665 continue; 2666 } 2667 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2668 if (Style.RemoveBracesLLVM) 2669 NestedTooDeep.pop_back(); 2670 return; 2671 } 2672 nextToken(); 2673 } 2674 NeedsUnwrappedLine = false; 2675 Line->MustBeDeclaration = false; 2676 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2677 parseBlock(); 2678 if (Style.BraceWrapping.BeforeCatch) 2679 addUnwrappedLine(); 2680 else 2681 NeedsUnwrappedLine = true; 2682 } 2683 2684 if (Style.RemoveBracesLLVM) 2685 NestedTooDeep.pop_back(); 2686 2687 if (NeedsUnwrappedLine) 2688 addUnwrappedLine(); 2689 } 2690 2691 void UnwrappedLineParser::parseNamespace() { 2692 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2693 "'namespace' expected"); 2694 2695 const FormatToken &InitialToken = *FormatTok; 2696 nextToken(); 2697 if (InitialToken.is(TT_NamespaceMacro)) { 2698 parseParens(); 2699 } else { 2700 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2701 tok::l_square, tok::period, tok::l_paren) || 2702 (Style.isCSharp() && FormatTok->is(tok::kw_union))) 2703 if (FormatTok->is(tok::l_square)) 2704 parseSquare(); 2705 else if (FormatTok->is(tok::l_paren)) 2706 parseParens(); 2707 else 2708 nextToken(); 2709 } 2710 if (FormatTok->is(tok::l_brace)) { 2711 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2712 addUnwrappedLine(); 2713 2714 unsigned AddLevels = 2715 Style.NamespaceIndentation == FormatStyle::NI_All || 2716 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2717 DeclarationScopeStack.size() > 1) 2718 ? 1u 2719 : 0u; 2720 bool ManageWhitesmithsBraces = 2721 AddLevels == 0u && 2722 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2723 2724 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2725 // the whole block. 2726 if (ManageWhitesmithsBraces) 2727 ++Line->Level; 2728 2729 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 2730 /*KeepBraces=*/true, ManageWhitesmithsBraces); 2731 2732 // Munch the semicolon after a namespace. This is more common than one would 2733 // think. Putting the semicolon into its own line is very ugly. 2734 if (FormatTok->is(tok::semi)) 2735 nextToken(); 2736 2737 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2738 2739 if (ManageWhitesmithsBraces) 2740 --Line->Level; 2741 } 2742 // FIXME: Add error handling. 2743 } 2744 2745 void UnwrappedLineParser::parseNew() { 2746 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2747 nextToken(); 2748 2749 if (Style.isCSharp()) { 2750 do { 2751 if (FormatTok->is(tok::l_brace)) 2752 parseBracedList(); 2753 2754 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2755 return; 2756 2757 nextToken(); 2758 } while (!eof()); 2759 } 2760 2761 if (Style.Language != FormatStyle::LK_Java) 2762 return; 2763 2764 // In Java, we can parse everything up to the parens, which aren't optional. 2765 do { 2766 // There should not be a ;, { or } before the new's open paren. 2767 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2768 return; 2769 2770 // Consume the parens. 2771 if (FormatTok->is(tok::l_paren)) { 2772 parseParens(); 2773 2774 // If there is a class body of an anonymous class, consume that as child. 2775 if (FormatTok->is(tok::l_brace)) 2776 parseChildBlock(); 2777 return; 2778 } 2779 nextToken(); 2780 } while (!eof()); 2781 } 2782 2783 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 2784 keepAncestorBraces(); 2785 2786 if (FormatTok->is(tok::l_brace)) { 2787 if (!KeepBraces) 2788 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2789 FormatToken *LeftBrace = FormatTok; 2790 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2791 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2792 /*MunchSemi=*/true, KeepBraces); 2793 if (!KeepBraces) { 2794 assert(!NestedTooDeep.empty()); 2795 if (!NestedTooDeep.back()) 2796 markOptionalBraces(LeftBrace); 2797 } 2798 if (WrapRightBrace) 2799 addUnwrappedLine(); 2800 } else { 2801 parseUnbracedBody(); 2802 } 2803 2804 if (!KeepBraces) 2805 NestedTooDeep.pop_back(); 2806 } 2807 2808 void UnwrappedLineParser::parseForOrWhileLoop() { 2809 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2810 "'for', 'while' or foreach macro expected"); 2811 const bool KeepBraces = !Style.RemoveBracesLLVM || 2812 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 2813 2814 nextToken(); 2815 // JS' for await ( ... 2816 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2817 nextToken(); 2818 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2819 nextToken(); 2820 if (FormatTok->is(tok::l_paren)) 2821 parseParens(); 2822 2823 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 2824 } 2825 2826 void UnwrappedLineParser::parseDoWhile() { 2827 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 2828 nextToken(); 2829 2830 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 2831 2832 // FIXME: Add error handling. 2833 if (!FormatTok->is(tok::kw_while)) { 2834 addUnwrappedLine(); 2835 return; 2836 } 2837 2838 // If in Whitesmiths mode, the line with the while() needs to be indented 2839 // to the same level as the block. 2840 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2841 ++Line->Level; 2842 2843 nextToken(); 2844 parseStructuralElement(); 2845 } 2846 2847 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2848 nextToken(); 2849 unsigned OldLineLevel = Line->Level; 2850 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2851 --Line->Level; 2852 if (LeftAlignLabel) 2853 Line->Level = 0; 2854 2855 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2856 FormatTok->is(tok::l_brace)) { 2857 2858 CompoundStatementIndenter Indenter(this, Line->Level, 2859 Style.BraceWrapping.AfterCaseLabel, 2860 Style.BraceWrapping.IndentBraces); 2861 parseBlock(); 2862 if (FormatTok->is(tok::kw_break)) { 2863 if (Style.BraceWrapping.AfterControlStatement == 2864 FormatStyle::BWACS_Always) { 2865 addUnwrappedLine(); 2866 if (!Style.IndentCaseBlocks && 2867 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2868 ++Line->Level; 2869 } 2870 parseStructuralElement(); 2871 } 2872 addUnwrappedLine(); 2873 } else { 2874 if (FormatTok->is(tok::semi)) 2875 nextToken(); 2876 addUnwrappedLine(); 2877 } 2878 Line->Level = OldLineLevel; 2879 if (FormatTok->isNot(tok::l_brace)) { 2880 parseStructuralElement(); 2881 addUnwrappedLine(); 2882 } 2883 } 2884 2885 void UnwrappedLineParser::parseCaseLabel() { 2886 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 2887 2888 // FIXME: fix handling of complex expressions here. 2889 do { 2890 nextToken(); 2891 } while (!eof() && !FormatTok->is(tok::colon)); 2892 parseLabel(); 2893 } 2894 2895 void UnwrappedLineParser::parseSwitch() { 2896 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 2897 nextToken(); 2898 if (FormatTok->is(tok::l_paren)) 2899 parseParens(); 2900 2901 keepAncestorBraces(); 2902 2903 if (FormatTok->is(tok::l_brace)) { 2904 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2905 parseBlock(); 2906 addUnwrappedLine(); 2907 } else { 2908 addUnwrappedLine(); 2909 ++Line->Level; 2910 parseStructuralElement(); 2911 --Line->Level; 2912 } 2913 2914 if (Style.RemoveBracesLLVM) 2915 NestedTooDeep.pop_back(); 2916 } 2917 2918 // Operators that can follow a C variable. 2919 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 2920 switch (kind) { 2921 case tok::ampamp: 2922 case tok::ampequal: 2923 case tok::arrow: 2924 case tok::caret: 2925 case tok::caretequal: 2926 case tok::comma: 2927 case tok::ellipsis: 2928 case tok::equal: 2929 case tok::equalequal: 2930 case tok::exclaim: 2931 case tok::exclaimequal: 2932 case tok::greater: 2933 case tok::greaterequal: 2934 case tok::greatergreater: 2935 case tok::greatergreaterequal: 2936 case tok::l_paren: 2937 case tok::l_square: 2938 case tok::less: 2939 case tok::lessequal: 2940 case tok::lessless: 2941 case tok::lesslessequal: 2942 case tok::minus: 2943 case tok::minusequal: 2944 case tok::minusminus: 2945 case tok::percent: 2946 case tok::percentequal: 2947 case tok::period: 2948 case tok::pipe: 2949 case tok::pipeequal: 2950 case tok::pipepipe: 2951 case tok::plus: 2952 case tok::plusequal: 2953 case tok::plusplus: 2954 case tok::question: 2955 case tok::r_brace: 2956 case tok::r_paren: 2957 case tok::r_square: 2958 case tok::semi: 2959 case tok::slash: 2960 case tok::slashequal: 2961 case tok::star: 2962 case tok::starequal: 2963 return true; 2964 default: 2965 return false; 2966 } 2967 } 2968 2969 void UnwrappedLineParser::parseAccessSpecifier() { 2970 FormatToken *AccessSpecifierCandidate = FormatTok; 2971 nextToken(); 2972 // Understand Qt's slots. 2973 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2974 nextToken(); 2975 // Otherwise, we don't know what it is, and we'd better keep the next token. 2976 if (FormatTok->is(tok::colon)) { 2977 nextToken(); 2978 addUnwrappedLine(); 2979 } else if (!FormatTok->is(tok::coloncolon) && 2980 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 2981 // Not a variable name nor namespace name. 2982 addUnwrappedLine(); 2983 } else if (AccessSpecifierCandidate) { 2984 // Consider the access specifier to be a C identifier. 2985 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 2986 } 2987 } 2988 2989 /// \brief Parses a concept definition. 2990 /// \pre The current token has to be the concept keyword. 2991 /// 2992 /// Returns if either the concept has been completely parsed, or if it detects 2993 /// that the concept definition is incorrect. 2994 void UnwrappedLineParser::parseConcept() { 2995 assert(FormatTok->is(tok::kw_concept) && "'concept' expected"); 2996 nextToken(); 2997 if (!FormatTok->is(tok::identifier)) 2998 return; 2999 nextToken(); 3000 if (!FormatTok->is(tok::equal)) 3001 return; 3002 nextToken(); 3003 parseConstraintExpression(); 3004 if (FormatTok->is(tok::semi)) 3005 nextToken(); 3006 addUnwrappedLine(); 3007 } 3008 3009 /// \brief Parses a requires, decides if it is a clause or an expression. 3010 /// \pre The current token has to be the requires keyword. 3011 /// \returns true if it parsed a clause. 3012 bool clang::format::UnwrappedLineParser::parseRequires() { 3013 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3014 auto RequiresToken = FormatTok; 3015 3016 // We try to guess if it is a requires clause, or a requires expression. For 3017 // that we first consume the keyword and check the next token. 3018 nextToken(); 3019 3020 switch (FormatTok->Tok.getKind()) { 3021 case tok::l_brace: 3022 // This can only be an expression, never a clause. 3023 parseRequiresExpression(RequiresToken); 3024 return false; 3025 case tok::l_paren: 3026 // Clauses and expression can start with a paren, it's unclear what we have. 3027 break; 3028 default: 3029 // All other tokens can only be a clause. 3030 parseRequiresClause(RequiresToken); 3031 return true; 3032 } 3033 3034 // Looking forward we would have to decide if there are function declaration 3035 // like arguments to the requires expression: 3036 // requires (T t) { 3037 // Or there is a constraint expression for the requires clause: 3038 // requires (C<T> && ... 3039 3040 // But first let's look behind. 3041 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3042 3043 if (!PreviousNonComment || 3044 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3045 // If there is no token, or an expression left brace, we are a requires 3046 // clause within a requires expression. 3047 parseRequiresClause(RequiresToken); 3048 return true; 3049 } 3050 3051 switch (PreviousNonComment->Tok.getKind()) { 3052 case tok::greater: 3053 case tok::r_paren: 3054 case tok::kw_noexcept: 3055 case tok::kw_const: 3056 // This is a requires clause. 3057 parseRequiresClause(RequiresToken); 3058 return true; 3059 case tok::amp: 3060 case tok::ampamp: { 3061 // This can be either: 3062 // if (... && requires (T t) ...) 3063 // Or 3064 // void member(...) && requires (C<T> ... 3065 // We check the one token before that for a const: 3066 // void member(...) const && requires (C<T> ... 3067 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3068 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3069 parseRequiresClause(RequiresToken); 3070 return true; 3071 } 3072 break; 3073 } 3074 default: 3075 // It's an expression. 3076 parseRequiresExpression(RequiresToken); 3077 return false; 3078 } 3079 3080 // Now we look forward and try to check if the paren content is a parameter 3081 // list. The parameters can be cv-qualified and contain references or 3082 // pointers. 3083 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3084 // of stuff: typename, const, *, &, &&, ::, identifiers. 3085 3086 int NextTokenOffset = 1; 3087 auto NextToken = Tokens->peekNextToken(NextTokenOffset); 3088 auto PeekNext = [&NextTokenOffset, &NextToken, this] { 3089 ++NextTokenOffset; 3090 NextToken = Tokens->peekNextToken(NextTokenOffset); 3091 }; 3092 3093 bool FoundType = false; 3094 bool LastWasColonColon = false; 3095 int OpenAngles = 0; 3096 3097 for (; NextTokenOffset < 50; PeekNext()) { 3098 switch (NextToken->Tok.getKind()) { 3099 case tok::kw_volatile: 3100 case tok::kw_const: 3101 case tok::comma: 3102 parseRequiresExpression(RequiresToken); 3103 return false; 3104 case tok::r_paren: 3105 case tok::pipepipe: 3106 parseRequiresClause(RequiresToken); 3107 return true; 3108 case tok::eof: 3109 // Break out of the loop. 3110 NextTokenOffset = 50; 3111 break; 3112 case tok::coloncolon: 3113 LastWasColonColon = true; 3114 break; 3115 case tok::identifier: 3116 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3117 parseRequiresExpression(RequiresToken); 3118 return false; 3119 } 3120 FoundType = true; 3121 LastWasColonColon = false; 3122 break; 3123 case tok::less: 3124 ++OpenAngles; 3125 break; 3126 case tok::greater: 3127 --OpenAngles; 3128 break; 3129 default: 3130 if (NextToken->isSimpleTypeSpecifier()) { 3131 parseRequiresExpression(RequiresToken); 3132 return false; 3133 } 3134 break; 3135 } 3136 } 3137 3138 // This seems to be a complicated expression, just assume it's a clause. 3139 parseRequiresClause(RequiresToken); 3140 return true; 3141 } 3142 3143 /// \brief Parses a requires clause. 3144 /// \param RequiresToken The requires keyword token, which starts this clause. 3145 /// \pre We need to be on the next token after the requires keyword. 3146 /// \sa parseRequiresExpression 3147 /// 3148 /// Returns if it either has finished parsing the clause, or it detects, that 3149 /// the clause is incorrect. 3150 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3151 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3152 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3153 3154 // If there is no previous token, we are within a requires expression, 3155 // otherwise we will always have the template or function declaration in front 3156 // of it. 3157 bool InRequiresExpression = 3158 !RequiresToken->Previous || 3159 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3160 3161 RequiresToken->setFinalizedType(InRequiresExpression 3162 ? TT_RequiresClauseInARequiresExpression 3163 : TT_RequiresClause); 3164 3165 parseConstraintExpression(); 3166 3167 if (!InRequiresExpression) 3168 FormatTok->Previous->ClosesRequiresClause = true; 3169 } 3170 3171 /// \brief Parses a requires expression. 3172 /// \param RequiresToken The requires keyword token, which starts this clause. 3173 /// \pre We need to be on the next token after the requires keyword. 3174 /// \sa parseRequiresClause 3175 /// 3176 /// Returns if it either has finished parsing the expression, or it detects, 3177 /// that the expression is incorrect. 3178 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3179 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3180 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3181 3182 RequiresToken->setFinalizedType(TT_RequiresExpression); 3183 3184 if (FormatTok->is(tok::l_paren)) { 3185 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3186 parseParens(); 3187 } 3188 3189 if (FormatTok->is(tok::l_brace)) { 3190 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3191 parseChildBlock(/*CanContainBracedList=*/false, 3192 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3193 } 3194 } 3195 3196 /// \brief Parses a constraint expression. 3197 /// 3198 /// This is either the definition of a concept, or the body of a requires 3199 /// clause. It returns, when the parsing is complete, or the expression is 3200 /// incorrect. 3201 void UnwrappedLineParser::parseConstraintExpression() { 3202 // The special handling for lambdas is needed since tryToParseLambda() eats a 3203 // token and if a requires expression is the last part of a requires clause 3204 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3205 // not set on the correct token. Thus we need to be aware if we even expect a 3206 // lambda to be possible. 3207 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3208 bool LambdaNextTimeAllowed = true; 3209 do { 3210 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3211 3212 switch (FormatTok->Tok.getKind()) { 3213 case tok::kw_requires: { 3214 auto RequiresToken = FormatTok; 3215 nextToken(); 3216 parseRequiresExpression(RequiresToken); 3217 break; 3218 } 3219 3220 case tok::l_paren: 3221 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3222 break; 3223 3224 case tok::l_square: 3225 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3226 return; 3227 break; 3228 3229 case tok::kw_const: 3230 case tok::semi: 3231 case tok::kw_class: 3232 case tok::kw_struct: 3233 case tok::kw_union: 3234 return; 3235 3236 case tok::l_brace: 3237 // Potential function body. 3238 return; 3239 3240 case tok::ampamp: 3241 case tok::pipepipe: 3242 FormatTok->setFinalizedType(TT_BinaryOperator); 3243 nextToken(); 3244 LambdaNextTimeAllowed = true; 3245 break; 3246 3247 case tok::comma: 3248 case tok::comment: 3249 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3250 nextToken(); 3251 break; 3252 3253 case tok::kw_sizeof: 3254 case tok::greater: 3255 case tok::greaterequal: 3256 case tok::greatergreater: 3257 case tok::less: 3258 case tok::lessequal: 3259 case tok::lessless: 3260 case tok::equalequal: 3261 case tok::exclaim: 3262 case tok::exclaimequal: 3263 case tok::plus: 3264 case tok::minus: 3265 case tok::star: 3266 case tok::slash: 3267 case tok::kw_decltype: 3268 LambdaNextTimeAllowed = true; 3269 // Just eat them. 3270 nextToken(); 3271 break; 3272 3273 case tok::numeric_constant: 3274 case tok::coloncolon: 3275 case tok::kw_true: 3276 case tok::kw_false: 3277 // Just eat them. 3278 nextToken(); 3279 break; 3280 3281 case tok::kw_static_cast: 3282 case tok::kw_const_cast: 3283 case tok::kw_reinterpret_cast: 3284 case tok::kw_dynamic_cast: 3285 nextToken(); 3286 if (!FormatTok->is(tok::less)) 3287 return; 3288 3289 nextToken(); 3290 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3291 /*ClosingBraceKind=*/tok::greater); 3292 break; 3293 3294 case tok::kw_bool: 3295 // bool is only allowed if it is directly followed by a paren for a cast: 3296 // concept C = bool(...); 3297 // and bool is the only type, all other types as cast must be inside a 3298 // cast to bool an thus are handled by the other cases. 3299 nextToken(); 3300 if (FormatTok->isNot(tok::l_paren)) 3301 return; 3302 parseParens(); 3303 break; 3304 3305 default: 3306 if (!FormatTok->Tok.getIdentifierInfo()) { 3307 // Identifiers are part of the default case, we check for more then 3308 // tok::identifier to handle builtin type traits. 3309 return; 3310 } 3311 3312 // We need to differentiate identifiers for a template deduction guide, 3313 // variables, or function return types (the constraint expression has 3314 // ended before that), and basically all other cases. But it's easier to 3315 // check the other way around. 3316 assert(FormatTok->Previous); 3317 switch (FormatTok->Previous->Tok.getKind()) { 3318 case tok::coloncolon: // Nested identifier. 3319 case tok::ampamp: // Start of a function or variable for the 3320 case tok::pipepipe: // constraint expression. 3321 case tok::kw_requires: // Initial identifier of a requires clause. 3322 case tok::equal: // Initial identifier of a concept declaration. 3323 break; 3324 default: 3325 return; 3326 } 3327 3328 // Read identifier with optional template declaration. 3329 nextToken(); 3330 if (FormatTok->is(tok::less)) { 3331 nextToken(); 3332 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3333 /*ClosingBraceKind=*/tok::greater); 3334 } 3335 break; 3336 } 3337 } while (!eof()); 3338 } 3339 3340 bool UnwrappedLineParser::parseEnum() { 3341 const FormatToken &InitialToken = *FormatTok; 3342 3343 // Won't be 'enum' for NS_ENUMs. 3344 if (FormatTok->is(tok::kw_enum)) 3345 nextToken(); 3346 3347 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3348 // declarations. An "enum" keyword followed by a colon would be a syntax 3349 // error and thus assume it is just an identifier. 3350 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3351 return false; 3352 3353 // In protobuf, "enum" can be used as a field name. 3354 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3355 return false; 3356 3357 // Eat up enum class ... 3358 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3359 nextToken(); 3360 3361 while (FormatTok->Tok.getIdentifierInfo() || 3362 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3363 tok::greater, tok::comma, tok::question)) { 3364 nextToken(); 3365 // We can have macros or attributes in between 'enum' and the enum name. 3366 if (FormatTok->is(tok::l_paren)) 3367 parseParens(); 3368 if (FormatTok->is(tok::identifier)) { 3369 nextToken(); 3370 // If there are two identifiers in a row, this is likely an elaborate 3371 // return type. In Java, this can be "implements", etc. 3372 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3373 return false; 3374 } 3375 } 3376 3377 // Just a declaration or something is wrong. 3378 if (FormatTok->isNot(tok::l_brace)) 3379 return true; 3380 FormatTok->setFinalizedType(TT_EnumLBrace); 3381 FormatTok->setBlockKind(BK_Block); 3382 3383 if (Style.Language == FormatStyle::LK_Java) { 3384 // Java enums are different. 3385 parseJavaEnumBody(); 3386 return true; 3387 } 3388 if (Style.Language == FormatStyle::LK_Proto) { 3389 parseBlock(/*MustBeDeclaration=*/true); 3390 return true; 3391 } 3392 3393 if (!Style.AllowShortEnumsOnASingleLine && 3394 ShouldBreakBeforeBrace(Style, InitialToken)) 3395 addUnwrappedLine(); 3396 // Parse enum body. 3397 nextToken(); 3398 if (!Style.AllowShortEnumsOnASingleLine) { 3399 addUnwrappedLine(); 3400 Line->Level += 1; 3401 } 3402 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3403 /*IsEnum=*/true); 3404 if (!Style.AllowShortEnumsOnASingleLine) 3405 Line->Level -= 1; 3406 if (HasError) { 3407 if (FormatTok->is(tok::semi)) 3408 nextToken(); 3409 addUnwrappedLine(); 3410 } 3411 return true; 3412 3413 // There is no addUnwrappedLine() here so that we fall through to parsing a 3414 // structural element afterwards. Thus, in "enum A {} n, m;", 3415 // "} n, m;" will end up in one unwrapped line. 3416 } 3417 3418 bool UnwrappedLineParser::parseStructLike() { 3419 // parseRecord falls through and does not yet add an unwrapped line as a 3420 // record declaration or definition can start a structural element. 3421 parseRecord(); 3422 // This does not apply to Java, JavaScript and C#. 3423 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3424 Style.isCSharp()) { 3425 if (FormatTok->is(tok::semi)) 3426 nextToken(); 3427 addUnwrappedLine(); 3428 return true; 3429 } 3430 return false; 3431 } 3432 3433 namespace { 3434 // A class used to set and restore the Token position when peeking 3435 // ahead in the token source. 3436 class ScopedTokenPosition { 3437 unsigned StoredPosition; 3438 FormatTokenSource *Tokens; 3439 3440 public: 3441 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3442 assert(Tokens && "Tokens expected to not be null"); 3443 StoredPosition = Tokens->getPosition(); 3444 } 3445 3446 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3447 }; 3448 } // namespace 3449 3450 // Look to see if we have [[ by looking ahead, if 3451 // its not then rewind to the original position. 3452 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3453 ScopedTokenPosition AutoPosition(Tokens); 3454 FormatToken *Tok = Tokens->getNextToken(); 3455 // We already read the first [ check for the second. 3456 if (!Tok->is(tok::l_square)) 3457 return false; 3458 // Double check that the attribute is just something 3459 // fairly simple. 3460 while (Tok->isNot(tok::eof)) { 3461 if (Tok->is(tok::r_square)) 3462 break; 3463 Tok = Tokens->getNextToken(); 3464 } 3465 if (Tok->is(tok::eof)) 3466 return false; 3467 Tok = Tokens->getNextToken(); 3468 if (!Tok->is(tok::r_square)) 3469 return false; 3470 Tok = Tokens->getNextToken(); 3471 if (Tok->is(tok::semi)) 3472 return false; 3473 return true; 3474 } 3475 3476 void UnwrappedLineParser::parseJavaEnumBody() { 3477 assert(FormatTok->is(tok::l_brace)); 3478 const FormatToken *OpeningBrace = FormatTok; 3479 3480 // Determine whether the enum is simple, i.e. does not have a semicolon or 3481 // constants with class bodies. Simple enums can be formatted like braced 3482 // lists, contracted to a single line, etc. 3483 unsigned StoredPosition = Tokens->getPosition(); 3484 bool IsSimple = true; 3485 FormatToken *Tok = Tokens->getNextToken(); 3486 while (!Tok->is(tok::eof)) { 3487 if (Tok->is(tok::r_brace)) 3488 break; 3489 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3490 IsSimple = false; 3491 break; 3492 } 3493 // FIXME: This will also mark enums with braces in the arguments to enum 3494 // constants as "not simple". This is probably fine in practice, though. 3495 Tok = Tokens->getNextToken(); 3496 } 3497 FormatTok = Tokens->setPosition(StoredPosition); 3498 3499 if (IsSimple) { 3500 nextToken(); 3501 parseBracedList(); 3502 addUnwrappedLine(); 3503 return; 3504 } 3505 3506 // Parse the body of a more complex enum. 3507 // First add a line for everything up to the "{". 3508 nextToken(); 3509 addUnwrappedLine(); 3510 ++Line->Level; 3511 3512 // Parse the enum constants. 3513 while (FormatTok) { 3514 if (FormatTok->is(tok::l_brace)) { 3515 // Parse the constant's class body. 3516 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3517 /*MunchSemi=*/false); 3518 } else if (FormatTok->is(tok::l_paren)) { 3519 parseParens(); 3520 } else if (FormatTok->is(tok::comma)) { 3521 nextToken(); 3522 addUnwrappedLine(); 3523 } else if (FormatTok->is(tok::semi)) { 3524 nextToken(); 3525 addUnwrappedLine(); 3526 break; 3527 } else if (FormatTok->is(tok::r_brace)) { 3528 addUnwrappedLine(); 3529 break; 3530 } else { 3531 nextToken(); 3532 } 3533 } 3534 3535 // Parse the class body after the enum's ";" if any. 3536 parseLevel(OpeningBrace, /*CanContainBracedList=*/true); 3537 nextToken(); 3538 --Line->Level; 3539 addUnwrappedLine(); 3540 } 3541 3542 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3543 const FormatToken &InitialToken = *FormatTok; 3544 nextToken(); 3545 3546 // The actual identifier can be a nested name specifier, and in macros 3547 // it is often token-pasted. 3548 // An [[attribute]] can be before the identifier. 3549 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3550 tok::kw___attribute, tok::kw___declspec, 3551 tok::kw_alignas, tok::l_square, tok::r_square) || 3552 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3553 FormatTok->isOneOf(tok::period, tok::comma))) { 3554 if (Style.isJavaScript() && 3555 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3556 // JavaScript/TypeScript supports inline object types in 3557 // extends/implements positions: 3558 // class Foo implements {bar: number} { } 3559 nextToken(); 3560 if (FormatTok->is(tok::l_brace)) { 3561 tryToParseBracedList(); 3562 continue; 3563 } 3564 } 3565 bool IsNonMacroIdentifier = 3566 FormatTok->is(tok::identifier) && 3567 FormatTok->TokenText != FormatTok->TokenText.upper(); 3568 nextToken(); 3569 // We can have macros or attributes in between 'class' and the class name. 3570 if (!IsNonMacroIdentifier) { 3571 if (FormatTok->is(tok::l_paren)) { 3572 parseParens(); 3573 } else if (FormatTok->is(TT_AttributeSquare)) { 3574 parseSquare(); 3575 // Consume the closing TT_AttributeSquare. 3576 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3577 nextToken(); 3578 } 3579 } 3580 } 3581 3582 // Note that parsing away template declarations here leads to incorrectly 3583 // accepting function declarations as record declarations. 3584 // In general, we cannot solve this problem. Consider: 3585 // class A<int> B() {} 3586 // which can be a function definition or a class definition when B() is a 3587 // macro. If we find enough real-world cases where this is a problem, we 3588 // can parse for the 'template' keyword in the beginning of the statement, 3589 // and thus rule out the record production in case there is no template 3590 // (this would still leave us with an ambiguity between template function 3591 // and class declarations). 3592 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3593 do { 3594 if (FormatTok->is(tok::l_brace)) { 3595 calculateBraceTypes(/*ExpectClassBody=*/true); 3596 if (!tryToParseBracedList()) 3597 break; 3598 } 3599 if (FormatTok->is(tok::l_square)) { 3600 FormatToken *Previous = FormatTok->Previous; 3601 if (!Previous || 3602 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3603 // Don't try parsing a lambda if we had a closing parenthesis before, 3604 // it was probably a pointer to an array: int (*)[]. 3605 if (!tryToParseLambda()) 3606 break; 3607 } else { 3608 parseSquare(); 3609 continue; 3610 } 3611 } 3612 if (FormatTok->is(tok::semi)) 3613 return; 3614 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3615 addUnwrappedLine(); 3616 nextToken(); 3617 parseCSharpGenericTypeConstraint(); 3618 break; 3619 } 3620 nextToken(); 3621 } while (!eof()); 3622 } 3623 3624 auto GetBraceType = [](const FormatToken &RecordTok) { 3625 switch (RecordTok.Tok.getKind()) { 3626 case tok::kw_class: 3627 return TT_ClassLBrace; 3628 case tok::kw_struct: 3629 return TT_StructLBrace; 3630 case tok::kw_union: 3631 return TT_UnionLBrace; 3632 default: 3633 // Useful for e.g. interface. 3634 return TT_RecordLBrace; 3635 } 3636 }; 3637 if (FormatTok->is(tok::l_brace)) { 3638 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3639 if (ParseAsExpr) { 3640 parseChildBlock(); 3641 } else { 3642 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3643 addUnwrappedLine(); 3644 3645 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3646 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3647 } 3648 } 3649 // There is no addUnwrappedLine() here so that we fall through to parsing a 3650 // structural element afterwards. Thus, in "class A {} n, m;", 3651 // "} n, m;" will end up in one unwrapped line. 3652 } 3653 3654 void UnwrappedLineParser::parseObjCMethod() { 3655 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3656 "'(' or identifier expected."); 3657 do { 3658 if (FormatTok->is(tok::semi)) { 3659 nextToken(); 3660 addUnwrappedLine(); 3661 return; 3662 } else if (FormatTok->is(tok::l_brace)) { 3663 if (Style.BraceWrapping.AfterFunction) 3664 addUnwrappedLine(); 3665 parseBlock(); 3666 addUnwrappedLine(); 3667 return; 3668 } else { 3669 nextToken(); 3670 } 3671 } while (!eof()); 3672 } 3673 3674 void UnwrappedLineParser::parseObjCProtocolList() { 3675 assert(FormatTok->is(tok::less) && "'<' expected."); 3676 do { 3677 nextToken(); 3678 // Early exit in case someone forgot a close angle. 3679 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3680 FormatTok->isObjCAtKeyword(tok::objc_end)) 3681 return; 3682 } while (!eof() && FormatTok->isNot(tok::greater)); 3683 nextToken(); // Skip '>'. 3684 } 3685 3686 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3687 do { 3688 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3689 nextToken(); 3690 addUnwrappedLine(); 3691 break; 3692 } 3693 if (FormatTok->is(tok::l_brace)) { 3694 parseBlock(); 3695 // In ObjC interfaces, nothing should be following the "}". 3696 addUnwrappedLine(); 3697 } else if (FormatTok->is(tok::r_brace)) { 3698 // Ignore stray "}". parseStructuralElement doesn't consume them. 3699 nextToken(); 3700 addUnwrappedLine(); 3701 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3702 nextToken(); 3703 parseObjCMethod(); 3704 } else { 3705 parseStructuralElement(); 3706 } 3707 } while (!eof()); 3708 } 3709 3710 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3711 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3712 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3713 nextToken(); 3714 nextToken(); // interface name 3715 3716 // @interface can be followed by a lightweight generic 3717 // specialization list, then either a base class or a category. 3718 if (FormatTok->is(tok::less)) 3719 parseObjCLightweightGenerics(); 3720 if (FormatTok->is(tok::colon)) { 3721 nextToken(); 3722 nextToken(); // base class name 3723 // The base class can also have lightweight generics applied to it. 3724 if (FormatTok->is(tok::less)) 3725 parseObjCLightweightGenerics(); 3726 } else if (FormatTok->is(tok::l_paren)) 3727 // Skip category, if present. 3728 parseParens(); 3729 3730 if (FormatTok->is(tok::less)) 3731 parseObjCProtocolList(); 3732 3733 if (FormatTok->is(tok::l_brace)) { 3734 if (Style.BraceWrapping.AfterObjCDeclaration) 3735 addUnwrappedLine(); 3736 parseBlock(/*MustBeDeclaration=*/true); 3737 } 3738 3739 // With instance variables, this puts '}' on its own line. Without instance 3740 // variables, this ends the @interface line. 3741 addUnwrappedLine(); 3742 3743 parseObjCUntilAtEnd(); 3744 } 3745 3746 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3747 assert(FormatTok->is(tok::less)); 3748 // Unlike protocol lists, generic parameterizations support 3749 // nested angles: 3750 // 3751 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3752 // NSObject <NSCopying, NSSecureCoding> 3753 // 3754 // so we need to count how many open angles we have left. 3755 unsigned NumOpenAngles = 1; 3756 do { 3757 nextToken(); 3758 // Early exit in case someone forgot a close angle. 3759 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3760 FormatTok->isObjCAtKeyword(tok::objc_end)) 3761 break; 3762 if (FormatTok->is(tok::less)) 3763 ++NumOpenAngles; 3764 else if (FormatTok->is(tok::greater)) { 3765 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3766 --NumOpenAngles; 3767 } 3768 } while (!eof() && NumOpenAngles != 0); 3769 nextToken(); // Skip '>'. 3770 } 3771 3772 // Returns true for the declaration/definition form of @protocol, 3773 // false for the expression form. 3774 bool UnwrappedLineParser::parseObjCProtocol() { 3775 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3776 nextToken(); 3777 3778 if (FormatTok->is(tok::l_paren)) 3779 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3780 return false; 3781 3782 // The definition/declaration form, 3783 // @protocol Foo 3784 // - (int)someMethod; 3785 // @end 3786 3787 nextToken(); // protocol name 3788 3789 if (FormatTok->is(tok::less)) 3790 parseObjCProtocolList(); 3791 3792 // Check for protocol declaration. 3793 if (FormatTok->is(tok::semi)) { 3794 nextToken(); 3795 addUnwrappedLine(); 3796 return true; 3797 } 3798 3799 addUnwrappedLine(); 3800 parseObjCUntilAtEnd(); 3801 return true; 3802 } 3803 3804 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3805 bool IsImport = FormatTok->is(Keywords.kw_import); 3806 assert(IsImport || FormatTok->is(tok::kw_export)); 3807 nextToken(); 3808 3809 // Consume the "default" in "export default class/function". 3810 if (FormatTok->is(tok::kw_default)) 3811 nextToken(); 3812 3813 // Consume "async function", "function" and "default function", so that these 3814 // get parsed as free-standing JS functions, i.e. do not require a trailing 3815 // semicolon. 3816 if (FormatTok->is(Keywords.kw_async)) 3817 nextToken(); 3818 if (FormatTok->is(Keywords.kw_function)) { 3819 nextToken(); 3820 return; 3821 } 3822 3823 // For imports, `export *`, `export {...}`, consume the rest of the line up 3824 // to the terminating `;`. For everything else, just return and continue 3825 // parsing the structural element, i.e. the declaration or expression for 3826 // `export default`. 3827 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3828 !FormatTok->isStringLiteral()) 3829 return; 3830 3831 while (!eof()) { 3832 if (FormatTok->is(tok::semi)) 3833 return; 3834 if (Line->Tokens.empty()) { 3835 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3836 // import statement should terminate. 3837 return; 3838 } 3839 if (FormatTok->is(tok::l_brace)) { 3840 FormatTok->setBlockKind(BK_Block); 3841 nextToken(); 3842 parseBracedList(); 3843 } else { 3844 nextToken(); 3845 } 3846 } 3847 } 3848 3849 void UnwrappedLineParser::parseStatementMacro() { 3850 nextToken(); 3851 if (FormatTok->is(tok::l_paren)) 3852 parseParens(); 3853 if (FormatTok->is(tok::semi)) 3854 nextToken(); 3855 addUnwrappedLine(); 3856 } 3857 3858 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3859 StringRef Prefix = "") { 3860 llvm::dbgs() << Prefix << "Line(" << Line.Level 3861 << ", FSC=" << Line.FirstStartColumn << ")" 3862 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3863 for (const auto &Node : Line.Tokens) { 3864 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3865 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3866 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3867 } 3868 for (const auto &Node : Line.Tokens) 3869 for (const auto &ChildNode : Node.Children) 3870 printDebugInfo(ChildNode, "\nChild: "); 3871 3872 llvm::dbgs() << "\n"; 3873 } 3874 3875 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3876 if (Line->Tokens.empty()) 3877 return; 3878 LLVM_DEBUG({ 3879 if (CurrentLines == &Lines) 3880 printDebugInfo(*Line); 3881 }); 3882 3883 // If this line closes a block when in Whitesmiths mode, remember that 3884 // information so that the level can be decreased after the line is added. 3885 // This has to happen after the addition of the line since the line itself 3886 // needs to be indented. 3887 bool ClosesWhitesmithsBlock = 3888 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3889 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3890 3891 CurrentLines->push_back(std::move(*Line)); 3892 Line->Tokens.clear(); 3893 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3894 Line->FirstStartColumn = 0; 3895 3896 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3897 --Line->Level; 3898 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3899 CurrentLines->append( 3900 std::make_move_iterator(PreprocessorDirectives.begin()), 3901 std::make_move_iterator(PreprocessorDirectives.end())); 3902 PreprocessorDirectives.clear(); 3903 } 3904 // Disconnect the current token from the last token on the previous line. 3905 FormatTok->Previous = nullptr; 3906 } 3907 3908 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 3909 3910 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3911 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3912 FormatTok.NewlinesBefore > 0; 3913 } 3914 3915 // Checks if \p FormatTok is a line comment that continues the line comment 3916 // section on \p Line. 3917 static bool 3918 continuesLineCommentSection(const FormatToken &FormatTok, 3919 const UnwrappedLine &Line, 3920 const llvm::Regex &CommentPragmasRegex) { 3921 if (Line.Tokens.empty()) 3922 return false; 3923 3924 StringRef IndentContent = FormatTok.TokenText; 3925 if (FormatTok.TokenText.startswith("//") || 3926 FormatTok.TokenText.startswith("/*")) 3927 IndentContent = FormatTok.TokenText.substr(2); 3928 if (CommentPragmasRegex.match(IndentContent)) 3929 return false; 3930 3931 // If Line starts with a line comment, then FormatTok continues the comment 3932 // section if its original column is greater or equal to the original start 3933 // column of the line. 3934 // 3935 // Define the min column token of a line as follows: if a line ends in '{' or 3936 // contains a '{' followed by a line comment, then the min column token is 3937 // that '{'. Otherwise, the min column token of the line is the first token of 3938 // the line. 3939 // 3940 // If Line starts with a token other than a line comment, then FormatTok 3941 // continues the comment section if its original column is greater than the 3942 // original start column of the min column token of the line. 3943 // 3944 // For example, the second line comment continues the first in these cases: 3945 // 3946 // // first line 3947 // // second line 3948 // 3949 // and: 3950 // 3951 // // first line 3952 // // second line 3953 // 3954 // and: 3955 // 3956 // int i; // first line 3957 // // second line 3958 // 3959 // and: 3960 // 3961 // do { // first line 3962 // // second line 3963 // int i; 3964 // } while (true); 3965 // 3966 // and: 3967 // 3968 // enum { 3969 // a, // first line 3970 // // second line 3971 // b 3972 // }; 3973 // 3974 // The second line comment doesn't continue the first in these cases: 3975 // 3976 // // first line 3977 // // second line 3978 // 3979 // and: 3980 // 3981 // int i; // first line 3982 // // second line 3983 // 3984 // and: 3985 // 3986 // do { // first line 3987 // // second line 3988 // int i; 3989 // } while (true); 3990 // 3991 // and: 3992 // 3993 // enum { 3994 // a, // first line 3995 // // second line 3996 // }; 3997 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3998 3999 // Scan for '{//'. If found, use the column of '{' as a min column for line 4000 // comment section continuation. 4001 const FormatToken *PreviousToken = nullptr; 4002 for (const UnwrappedLineNode &Node : Line.Tokens) { 4003 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4004 isLineComment(*Node.Tok)) { 4005 MinColumnToken = PreviousToken; 4006 break; 4007 } 4008 PreviousToken = Node.Tok; 4009 4010 // Grab the last newline preceding a token in this unwrapped line. 4011 if (Node.Tok->NewlinesBefore > 0) 4012 MinColumnToken = Node.Tok; 4013 } 4014 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4015 MinColumnToken = PreviousToken; 4016 4017 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4018 MinColumnToken); 4019 } 4020 4021 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4022 bool JustComments = Line->Tokens.empty(); 4023 for (FormatToken *Tok : CommentsBeforeNextToken) { 4024 // Line comments that belong to the same line comment section are put on the 4025 // same line since later we might want to reflow content between them. 4026 // Additional fine-grained breaking of line comment sections is controlled 4027 // by the class BreakableLineCommentSection in case it is desirable to keep 4028 // several line comment sections in the same unwrapped line. 4029 // 4030 // FIXME: Consider putting separate line comment sections as children to the 4031 // unwrapped line instead. 4032 Tok->ContinuesLineCommentSection = 4033 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4034 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4035 addUnwrappedLine(); 4036 pushToken(Tok); 4037 } 4038 if (NewlineBeforeNext && JustComments) 4039 addUnwrappedLine(); 4040 CommentsBeforeNextToken.clear(); 4041 } 4042 4043 void UnwrappedLineParser::nextToken(int LevelDifference) { 4044 if (eof()) 4045 return; 4046 flushComments(isOnNewLine(*FormatTok)); 4047 pushToken(FormatTok); 4048 FormatToken *Previous = FormatTok; 4049 if (!Style.isJavaScript()) 4050 readToken(LevelDifference); 4051 else 4052 readTokenWithJavaScriptASI(); 4053 FormatTok->Previous = Previous; 4054 } 4055 4056 void UnwrappedLineParser::distributeComments( 4057 const SmallVectorImpl<FormatToken *> &Comments, 4058 const FormatToken *NextTok) { 4059 // Whether or not a line comment token continues a line is controlled by 4060 // the method continuesLineCommentSection, with the following caveat: 4061 // 4062 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4063 // that each comment line from the trail is aligned with the next token, if 4064 // the next token exists. If a trail exists, the beginning of the maximal 4065 // trail is marked as a start of a new comment section. 4066 // 4067 // For example in this code: 4068 // 4069 // int a; // line about a 4070 // // line 1 about b 4071 // // line 2 about b 4072 // int b; 4073 // 4074 // the two lines about b form a maximal trail, so there are two sections, the 4075 // first one consisting of the single comment "// line about a" and the 4076 // second one consisting of the next two comments. 4077 if (Comments.empty()) 4078 return; 4079 bool ShouldPushCommentsInCurrentLine = true; 4080 bool HasTrailAlignedWithNextToken = false; 4081 unsigned StartOfTrailAlignedWithNextToken = 0; 4082 if (NextTok) { 4083 // We are skipping the first element intentionally. 4084 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4085 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4086 HasTrailAlignedWithNextToken = true; 4087 StartOfTrailAlignedWithNextToken = i; 4088 } 4089 } 4090 } 4091 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4092 FormatToken *FormatTok = Comments[i]; 4093 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4094 FormatTok->ContinuesLineCommentSection = false; 4095 } else { 4096 FormatTok->ContinuesLineCommentSection = 4097 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4098 } 4099 if (!FormatTok->ContinuesLineCommentSection && 4100 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) 4101 ShouldPushCommentsInCurrentLine = false; 4102 if (ShouldPushCommentsInCurrentLine) 4103 pushToken(FormatTok); 4104 else 4105 CommentsBeforeNextToken.push_back(FormatTok); 4106 } 4107 } 4108 4109 void UnwrappedLineParser::readToken(int LevelDifference) { 4110 SmallVector<FormatToken *, 1> Comments; 4111 bool PreviousWasComment = false; 4112 bool FirstNonCommentOnLine = false; 4113 do { 4114 FormatTok = Tokens->getNextToken(); 4115 assert(FormatTok); 4116 while (FormatTok->getType() == TT_ConflictStart || 4117 FormatTok->getType() == TT_ConflictEnd || 4118 FormatTok->getType() == TT_ConflictAlternative) { 4119 if (FormatTok->getType() == TT_ConflictStart) 4120 conditionalCompilationStart(/*Unreachable=*/false); 4121 else if (FormatTok->getType() == TT_ConflictAlternative) 4122 conditionalCompilationAlternative(); 4123 else if (FormatTok->getType() == TT_ConflictEnd) 4124 conditionalCompilationEnd(); 4125 FormatTok = Tokens->getNextToken(); 4126 FormatTok->MustBreakBefore = true; 4127 } 4128 4129 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4130 const FormatToken &Tok, 4131 bool PreviousWasComment) { 4132 auto IsFirstOnLine = [](const FormatToken &Tok) { 4133 return Tok.HasUnescapedNewline || Tok.IsFirst; 4134 }; 4135 4136 // Consider preprocessor directives preceded by block comments as first 4137 // on line. 4138 if (PreviousWasComment) 4139 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4140 return IsFirstOnLine(Tok); 4141 }; 4142 4143 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4144 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4145 PreviousWasComment = FormatTok->is(tok::comment); 4146 4147 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4148 FirstNonCommentOnLine) { 4149 distributeComments(Comments, FormatTok); 4150 Comments.clear(); 4151 // If there is an unfinished unwrapped line, we flush the preprocessor 4152 // directives only after that unwrapped line was finished later. 4153 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4154 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4155 assert((LevelDifference >= 0 || 4156 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4157 "LevelDifference makes Line->Level negative"); 4158 Line->Level += LevelDifference; 4159 // Comments stored before the preprocessor directive need to be output 4160 // before the preprocessor directive, at the same level as the 4161 // preprocessor directive, as we consider them to apply to the directive. 4162 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4163 PPBranchLevel > 0) 4164 Line->Level += PPBranchLevel; 4165 flushComments(isOnNewLine(*FormatTok)); 4166 parsePPDirective(); 4167 PreviousWasComment = FormatTok->is(tok::comment); 4168 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4169 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4170 } 4171 4172 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4173 !Line->InPPDirective) 4174 continue; 4175 4176 if (!FormatTok->is(tok::comment)) { 4177 distributeComments(Comments, FormatTok); 4178 Comments.clear(); 4179 return; 4180 } 4181 4182 Comments.push_back(FormatTok); 4183 } while (!eof()); 4184 4185 distributeComments(Comments, nullptr); 4186 Comments.clear(); 4187 } 4188 4189 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4190 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4191 if (MustBreakBeforeNextToken) { 4192 Line->Tokens.back().Tok->MustBreakBefore = true; 4193 MustBreakBeforeNextToken = false; 4194 } 4195 } 4196 4197 } // end namespace format 4198 } // end namespace clang 4199