1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 #include <utility> 24 25 #define DEBUG_TYPE "format-parser" 26 27 namespace clang { 28 namespace format { 29 30 class FormatTokenSource { 31 public: 32 virtual ~FormatTokenSource() {} 33 34 // Returns the next token in the token stream. 35 virtual FormatToken *getNextToken() = 0; 36 37 // Returns the token preceding the token returned by the last call to 38 // getNextToken() in the token stream, or nullptr if no such token exists. 39 virtual FormatToken *getPreviousToken() = 0; 40 41 // Returns the token that would be returned by the next call to 42 // getNextToken(). 43 virtual FormatToken *peekNextToken() = 0; 44 45 // Returns the token that would be returned after the next N calls to 46 // getNextToken(). N needs to be greater than zero, and small enough that 47 // there are still tokens. Check for tok::eof with N-1 before calling it with 48 // N. 49 virtual FormatToken *peekNextToken(int N) = 0; 50 51 // Returns whether we are at the end of the file. 52 // This can be different from whether getNextToken() returned an eof token 53 // when the FormatTokenSource is a view on a part of the token stream. 54 virtual bool isEOF() = 0; 55 56 // Gets the current position in the token stream, to be used by setPosition(). 57 virtual unsigned getPosition() = 0; 58 59 // Resets the token stream to the state it was in when getPosition() returned 60 // Position, and return the token at that position in the stream. 61 virtual FormatToken *setPosition(unsigned Position) = 0; 62 }; 63 64 namespace { 65 66 class ScopedDeclarationState { 67 public: 68 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 69 bool MustBeDeclaration) 70 : Line(Line), Stack(Stack) { 71 Line.MustBeDeclaration = MustBeDeclaration; 72 Stack.push_back(MustBeDeclaration); 73 } 74 ~ScopedDeclarationState() { 75 Stack.pop_back(); 76 if (!Stack.empty()) 77 Line.MustBeDeclaration = Stack.back(); 78 else 79 Line.MustBeDeclaration = true; 80 } 81 82 private: 83 UnwrappedLine &Line; 84 llvm::BitVector &Stack; 85 }; 86 87 static bool isLineComment(const FormatToken &FormatTok) { 88 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 89 } 90 91 // Checks if \p FormatTok is a line comment that continues the line comment 92 // \p Previous. The original column of \p MinColumnToken is used to determine 93 // whether \p FormatTok is indented enough to the right to continue \p Previous. 94 static bool continuesLineComment(const FormatToken &FormatTok, 95 const FormatToken *Previous, 96 const FormatToken *MinColumnToken) { 97 if (!Previous || !MinColumnToken) 98 return false; 99 unsigned MinContinueColumn = 100 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 101 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 102 isLineComment(*Previous) && 103 FormatTok.OriginalColumn >= MinContinueColumn; 104 } 105 106 class ScopedMacroState : public FormatTokenSource { 107 public: 108 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 109 FormatToken *&ResetToken) 110 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 111 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 112 Token(nullptr), PreviousToken(nullptr) { 113 FakeEOF.Tok.startToken(); 114 FakeEOF.Tok.setKind(tok::eof); 115 TokenSource = this; 116 Line.Level = 0; 117 Line.InPPDirective = true; 118 } 119 120 ~ScopedMacroState() override { 121 TokenSource = PreviousTokenSource; 122 ResetToken = Token; 123 Line.InPPDirective = false; 124 Line.Level = PreviousLineLevel; 125 } 126 127 FormatToken *getNextToken() override { 128 // The \c UnwrappedLineParser guards against this by never calling 129 // \c getNextToken() after it has encountered the first eof token. 130 assert(!eof()); 131 PreviousToken = Token; 132 Token = PreviousTokenSource->getNextToken(); 133 if (eof()) 134 return &FakeEOF; 135 return Token; 136 } 137 138 FormatToken *getPreviousToken() override { 139 return PreviousTokenSource->getPreviousToken(); 140 } 141 142 FormatToken *peekNextToken() override { 143 if (eof()) 144 return &FakeEOF; 145 return PreviousTokenSource->peekNextToken(); 146 } 147 148 FormatToken *peekNextToken(int N) override { 149 assert(N > 0); 150 if (eof()) 151 return &FakeEOF; 152 return PreviousTokenSource->peekNextToken(N); 153 } 154 155 bool isEOF() override { return PreviousTokenSource->isEOF(); } 156 157 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 158 159 FormatToken *setPosition(unsigned Position) override { 160 PreviousToken = nullptr; 161 Token = PreviousTokenSource->setPosition(Position); 162 return Token; 163 } 164 165 private: 166 bool eof() { 167 return Token && Token->HasUnescapedNewline && 168 !continuesLineComment(*Token, PreviousToken, 169 /*MinColumnToken=*/PreviousToken); 170 } 171 172 FormatToken FakeEOF; 173 UnwrappedLine &Line; 174 FormatTokenSource *&TokenSource; 175 FormatToken *&ResetToken; 176 unsigned PreviousLineLevel; 177 FormatTokenSource *PreviousTokenSource; 178 179 FormatToken *Token; 180 FormatToken *PreviousToken; 181 }; 182 183 } // end anonymous namespace 184 185 class ScopedLineState { 186 public: 187 ScopedLineState(UnwrappedLineParser &Parser, 188 bool SwitchToPreprocessorLines = false) 189 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 190 if (SwitchToPreprocessorLines) 191 Parser.CurrentLines = &Parser.PreprocessorDirectives; 192 else if (!Parser.Line->Tokens.empty()) 193 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 194 PreBlockLine = std::move(Parser.Line); 195 Parser.Line = std::make_unique<UnwrappedLine>(); 196 Parser.Line->Level = PreBlockLine->Level; 197 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 198 } 199 200 ~ScopedLineState() { 201 if (!Parser.Line->Tokens.empty()) 202 Parser.addUnwrappedLine(); 203 assert(Parser.Line->Tokens.empty()); 204 Parser.Line = std::move(PreBlockLine); 205 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 206 Parser.MustBreakBeforeNextToken = true; 207 Parser.CurrentLines = OriginalLines; 208 } 209 210 private: 211 UnwrappedLineParser &Parser; 212 213 std::unique_ptr<UnwrappedLine> PreBlockLine; 214 SmallVectorImpl<UnwrappedLine> *OriginalLines; 215 }; 216 217 class CompoundStatementIndenter { 218 public: 219 CompoundStatementIndenter(UnwrappedLineParser *Parser, 220 const FormatStyle &Style, unsigned &LineLevel) 221 : CompoundStatementIndenter(Parser, LineLevel, 222 Style.BraceWrapping.AfterControlStatement, 223 Style.BraceWrapping.IndentBraces) {} 224 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 225 bool WrapBrace, bool IndentBrace) 226 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 227 if (WrapBrace) 228 Parser->addUnwrappedLine(); 229 if (IndentBrace) 230 ++LineLevel; 231 } 232 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 233 234 private: 235 unsigned &LineLevel; 236 unsigned OldLineLevel; 237 }; 238 239 namespace { 240 241 class IndexedTokenSource : public FormatTokenSource { 242 public: 243 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 244 : Tokens(Tokens), Position(-1) {} 245 246 FormatToken *getNextToken() override { 247 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 248 LLVM_DEBUG({ 249 llvm::dbgs() << "Next "; 250 dbgToken(Position); 251 }); 252 return Tokens[Position]; 253 } 254 ++Position; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Next "; 257 dbgToken(Position); 258 }); 259 return Tokens[Position]; 260 } 261 262 FormatToken *getPreviousToken() override { 263 return Position > 0 ? Tokens[Position - 1] : nullptr; 264 } 265 266 FormatToken *peekNextToken() override { 267 int Next = Position + 1; 268 LLVM_DEBUG({ 269 llvm::dbgs() << "Peeking "; 270 dbgToken(Next); 271 }); 272 return Tokens[Next]; 273 } 274 275 FormatToken *peekNextToken(int N) override { 276 assert(N > 0); 277 int Next = Position + N; 278 LLVM_DEBUG({ 279 llvm::dbgs() << "Peeking (+" << (N - 1) << ") "; 280 dbgToken(Next); 281 }); 282 return Tokens[Next]; 283 } 284 285 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 286 287 unsigned getPosition() override { 288 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 289 assert(Position >= 0); 290 return Position; 291 } 292 293 FormatToken *setPosition(unsigned P) override { 294 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 295 Position = P; 296 return Tokens[Position]; 297 } 298 299 void reset() { Position = -1; } 300 301 private: 302 void dbgToken(int Position, llvm::StringRef Indent = "") { 303 FormatToken *Tok = Tokens[Position]; 304 llvm::dbgs() << Indent << "[" << Position 305 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 306 << ", Macro: " << !!Tok->MacroCtx << "\n"; 307 } 308 309 ArrayRef<FormatToken *> Tokens; 310 int Position; 311 }; 312 313 } // end anonymous namespace 314 315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 316 const AdditionalKeywords &Keywords, 317 unsigned FirstStartColumn, 318 ArrayRef<FormatToken *> Tokens, 319 UnwrappedLineConsumer &Callback) 320 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 321 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 322 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 323 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 324 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 325 ? IG_Rejected 326 : IG_Inited), 327 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 328 329 void UnwrappedLineParser::reset() { 330 PPBranchLevel = -1; 331 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 332 ? IG_Rejected 333 : IG_Inited; 334 IncludeGuardToken = nullptr; 335 Line.reset(new UnwrappedLine); 336 CommentsBeforeNextToken.clear(); 337 FormatTok = nullptr; 338 MustBreakBeforeNextToken = false; 339 PreprocessorDirectives.clear(); 340 CurrentLines = &Lines; 341 DeclarationScopeStack.clear(); 342 NestedTooDeep.clear(); 343 PPStack.clear(); 344 Line->FirstStartColumn = FirstStartColumn; 345 } 346 347 void UnwrappedLineParser::parse() { 348 IndexedTokenSource TokenSource(AllTokens); 349 Line->FirstStartColumn = FirstStartColumn; 350 do { 351 LLVM_DEBUG(llvm::dbgs() << "----\n"); 352 reset(); 353 Tokens = &TokenSource; 354 TokenSource.reset(); 355 356 readToken(); 357 parseFile(); 358 359 // If we found an include guard then all preprocessor directives (other than 360 // the guard) are over-indented by one. 361 if (IncludeGuard == IG_Found) 362 for (auto &Line : Lines) 363 if (Line.InPPDirective && Line.Level > 0) 364 --Line.Level; 365 366 // Create line with eof token. 367 pushToken(FormatTok); 368 addUnwrappedLine(); 369 370 for (const UnwrappedLine &Line : Lines) 371 Callback.consumeUnwrappedLine(Line); 372 373 Callback.finishRun(); 374 Lines.clear(); 375 while (!PPLevelBranchIndex.empty() && 376 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 377 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 378 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 379 } 380 if (!PPLevelBranchIndex.empty()) { 381 ++PPLevelBranchIndex.back(); 382 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 383 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 384 } 385 } while (!PPLevelBranchIndex.empty()); 386 } 387 388 void UnwrappedLineParser::parseFile() { 389 // The top-level context in a file always has declarations, except for pre- 390 // processor directives and JavaScript files. 391 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 392 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 393 MustBeDeclaration); 394 if (Style.Language == FormatStyle::LK_TextProto) 395 parseBracedList(); 396 else 397 parseLevel(/*OpeningBrace=*/nullptr, /*CanContainBracedList=*/true); 398 // Make sure to format the remaining tokens. 399 // 400 // LK_TextProto is special since its top-level is parsed as the body of a 401 // braced list, which does not necessarily have natural line separators such 402 // as a semicolon. Comments after the last entry that have been determined to 403 // not belong to that line, as in: 404 // key: value 405 // // endfile comment 406 // do not have a chance to be put on a line of their own until this point. 407 // Here we add this newline before end-of-file comments. 408 if (Style.Language == FormatStyle::LK_TextProto && 409 !CommentsBeforeNextToken.empty()) 410 addUnwrappedLine(); 411 flushComments(true); 412 addUnwrappedLine(); 413 } 414 415 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 416 do { 417 switch (FormatTok->Tok.getKind()) { 418 case tok::l_brace: 419 return; 420 default: 421 if (FormatTok->is(Keywords.kw_where)) { 422 addUnwrappedLine(); 423 nextToken(); 424 parseCSharpGenericTypeConstraint(); 425 break; 426 } 427 nextToken(); 428 break; 429 } 430 } while (!eof()); 431 } 432 433 void UnwrappedLineParser::parseCSharpAttribute() { 434 int UnpairedSquareBrackets = 1; 435 do { 436 switch (FormatTok->Tok.getKind()) { 437 case tok::r_square: 438 nextToken(); 439 --UnpairedSquareBrackets; 440 if (UnpairedSquareBrackets == 0) { 441 addUnwrappedLine(); 442 return; 443 } 444 break; 445 case tok::l_square: 446 ++UnpairedSquareBrackets; 447 nextToken(); 448 break; 449 default: 450 nextToken(); 451 break; 452 } 453 } while (!eof()); 454 } 455 456 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 457 if (!Lines.empty() && Lines.back().InPPDirective) 458 return true; 459 460 const FormatToken *Previous = Tokens->getPreviousToken(); 461 return Previous && Previous->is(tok::comment) && 462 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 463 } 464 465 /// \brief Parses a level, that is ???. 466 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level 467 /// \param CanContainBracedList If the content can contain (at any level) a 468 /// braced list. 469 /// \param NextLBracesType The type for left brace found in this level. 470 /// \returns true if a simple block of if/else/for/while, or false otherwise. 471 /// (A simple block has a single statement.) 472 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 473 bool CanContainBracedList, 474 IfStmtKind *IfKind, 475 TokenType NextLBracesType) { 476 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace 477 ? TT_BracedListLBrace 478 : TT_Unknown; 479 const bool IsPrecededByCommentOrPPDirective = 480 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 481 bool HasLabel = false; 482 unsigned StatementCount = 0; 483 bool SwitchLabelEncountered = false; 484 do { 485 if (FormatTok->getType() == TT_AttributeMacro) { 486 nextToken(); 487 continue; 488 } 489 tok::TokenKind kind = FormatTok->Tok.getKind(); 490 if (FormatTok->getType() == TT_MacroBlockBegin) 491 kind = tok::l_brace; 492 else if (FormatTok->getType() == TT_MacroBlockEnd) 493 kind = tok::r_brace; 494 495 auto ParseDefault = [this, OpeningBrace, IfKind, NextLevelLBracesType, 496 &HasLabel, &StatementCount] { 497 parseStructuralElement(IfKind, !OpeningBrace, NextLevelLBracesType, 498 HasLabel ? nullptr : &HasLabel); 499 ++StatementCount; 500 assert(StatementCount > 0 && "StatementCount overflow!"); 501 }; 502 503 switch (kind) { 504 case tok::comment: 505 nextToken(); 506 addUnwrappedLine(); 507 break; 508 case tok::l_brace: 509 if (NextLBracesType != TT_Unknown) 510 FormatTok->setFinalizedType(NextLBracesType); 511 else if (FormatTok->Previous && 512 FormatTok->Previous->ClosesRequiresClause) { 513 // We need the 'default' case here to correctly parse a function 514 // l_brace. 515 ParseDefault(); 516 continue; 517 } 518 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && 519 tryToParseBracedList()) 520 continue; 521 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 522 /*MunchSemi=*/true, /*KeepBraces=*/true, 523 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList, 524 NextLBracesType); 525 ++StatementCount; 526 assert(StatementCount > 0 && "StatementCount overflow!"); 527 addUnwrappedLine(); 528 break; 529 case tok::r_brace: 530 if (OpeningBrace) { 531 if (!Style.RemoveBracesLLVM || 532 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) 533 return false; 534 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 535 IsPrecededByCommentOrPPDirective || 536 precededByCommentOrPPDirective()) 537 return false; 538 const FormatToken *Next = Tokens->peekNextToken(); 539 return Next->isNot(tok::comment) || Next->NewlinesBefore > 0; 540 } 541 nextToken(); 542 addUnwrappedLine(); 543 break; 544 case tok::kw_default: { 545 unsigned StoredPosition = Tokens->getPosition(); 546 FormatToken *Next; 547 do { 548 Next = Tokens->getNextToken(); 549 assert(Next); 550 } while (Next->is(tok::comment)); 551 FormatTok = Tokens->setPosition(StoredPosition); 552 if (Next->isNot(tok::colon)) { 553 // default not followed by ':' is not a case label; treat it like 554 // an identifier. 555 parseStructuralElement(); 556 break; 557 } 558 // Else, if it is 'default:', fall through to the case handling. 559 LLVM_FALLTHROUGH; 560 } 561 case tok::kw_case: 562 if (Style.isJavaScript() && Line->MustBeDeclaration) { 563 // A 'case: string' style field declaration. 564 parseStructuralElement(); 565 break; 566 } 567 if (!SwitchLabelEncountered && 568 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 569 ++Line->Level; 570 SwitchLabelEncountered = true; 571 parseStructuralElement(); 572 break; 573 case tok::l_square: 574 if (Style.isCSharp()) { 575 nextToken(); 576 parseCSharpAttribute(); 577 break; 578 } 579 if (handleCppAttributes()) 580 break; 581 LLVM_FALLTHROUGH; 582 default: 583 ParseDefault(); 584 break; 585 } 586 } while (!eof()); 587 return false; 588 } 589 590 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 591 // We'll parse forward through the tokens until we hit 592 // a closing brace or eof - note that getNextToken() will 593 // parse macros, so this will magically work inside macro 594 // definitions, too. 595 unsigned StoredPosition = Tokens->getPosition(); 596 FormatToken *Tok = FormatTok; 597 const FormatToken *PrevTok = Tok->Previous; 598 // Keep a stack of positions of lbrace tokens. We will 599 // update information about whether an lbrace starts a 600 // braced init list or a different block during the loop. 601 SmallVector<FormatToken *, 8> LBraceStack; 602 assert(Tok->is(tok::l_brace)); 603 do { 604 // Get next non-comment token. 605 FormatToken *NextTok; 606 do { 607 NextTok = Tokens->getNextToken(); 608 } while (NextTok->is(tok::comment)); 609 610 switch (Tok->Tok.getKind()) { 611 case tok::l_brace: 612 if (Style.isJavaScript() && PrevTok) { 613 if (PrevTok->isOneOf(tok::colon, tok::less)) 614 // A ':' indicates this code is in a type, or a braced list 615 // following a label in an object literal ({a: {b: 1}}). 616 // A '<' could be an object used in a comparison, but that is nonsense 617 // code (can never return true), so more likely it is a generic type 618 // argument (`X<{a: string; b: number}>`). 619 // The code below could be confused by semicolons between the 620 // individual members in a type member list, which would normally 621 // trigger BK_Block. In both cases, this must be parsed as an inline 622 // braced init. 623 Tok->setBlockKind(BK_BracedInit); 624 else if (PrevTok->is(tok::r_paren)) 625 // `) { }` can only occur in function or method declarations in JS. 626 Tok->setBlockKind(BK_Block); 627 } else { 628 Tok->setBlockKind(BK_Unknown); 629 } 630 LBraceStack.push_back(Tok); 631 break; 632 case tok::r_brace: 633 if (LBraceStack.empty()) 634 break; 635 if (LBraceStack.back()->is(BK_Unknown)) { 636 bool ProbablyBracedList = false; 637 if (Style.Language == FormatStyle::LK_Proto) { 638 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 639 } else { 640 // Skip NextTok over preprocessor lines, otherwise we may not 641 // properly diagnose the block as a braced intializer 642 // if the comma separator appears after the pp directive. 643 while (NextTok->is(tok::hash)) { 644 ScopedMacroState MacroState(*Line, Tokens, NextTok); 645 do { 646 NextTok = Tokens->getNextToken(); 647 } while (NextTok->isNot(tok::eof)); 648 } 649 650 // Using OriginalColumn to distinguish between ObjC methods and 651 // binary operators is a bit hacky. 652 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 653 NextTok->OriginalColumn == 0; 654 655 // Try to detect a braced list. Note that regardless how we mark inner 656 // braces here, we will overwrite the BlockKind later if we parse a 657 // braced list (where all blocks inside are by default braced lists), 658 // or when we explicitly detect blocks (for example while parsing 659 // lambdas). 660 661 // If we already marked the opening brace as braced list, the closing 662 // must also be part of it. 663 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace); 664 665 ProbablyBracedList = ProbablyBracedList || 666 (Style.isJavaScript() && 667 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 668 Keywords.kw_as)); 669 ProbablyBracedList = ProbablyBracedList || 670 (Style.isCpp() && NextTok->is(tok::l_paren)); 671 672 // If there is a comma, semicolon or right paren after the closing 673 // brace, we assume this is a braced initializer list. 674 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 675 // braced list in JS. 676 ProbablyBracedList = 677 ProbablyBracedList || 678 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 679 tok::r_paren, tok::r_square, tok::l_brace, 680 tok::ellipsis); 681 682 ProbablyBracedList = 683 ProbablyBracedList || 684 (NextTok->is(tok::identifier) && 685 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 686 687 ProbablyBracedList = ProbablyBracedList || 688 (NextTok->is(tok::semi) && 689 (!ExpectClassBody || LBraceStack.size() != 1)); 690 691 ProbablyBracedList = 692 ProbablyBracedList || 693 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 694 695 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 696 // We can have an array subscript after a braced init 697 // list, but C++11 attributes are expected after blocks. 698 NextTok = Tokens->getNextToken(); 699 ProbablyBracedList = NextTok->isNot(tok::l_square); 700 } 701 } 702 if (ProbablyBracedList) { 703 Tok->setBlockKind(BK_BracedInit); 704 LBraceStack.back()->setBlockKind(BK_BracedInit); 705 } else { 706 Tok->setBlockKind(BK_Block); 707 LBraceStack.back()->setBlockKind(BK_Block); 708 } 709 } 710 LBraceStack.pop_back(); 711 break; 712 case tok::identifier: 713 if (!Tok->is(TT_StatementMacro)) 714 break; 715 LLVM_FALLTHROUGH; 716 case tok::at: 717 case tok::semi: 718 case tok::kw_if: 719 case tok::kw_while: 720 case tok::kw_for: 721 case tok::kw_switch: 722 case tok::kw_try: 723 case tok::kw___try: 724 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 725 LBraceStack.back()->setBlockKind(BK_Block); 726 break; 727 default: 728 break; 729 } 730 PrevTok = Tok; 731 Tok = NextTok; 732 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 733 734 // Assume other blocks for all unclosed opening braces. 735 for (FormatToken *LBrace : LBraceStack) 736 if (LBrace->is(BK_Unknown)) 737 LBrace->setBlockKind(BK_Block); 738 739 FormatTok = Tokens->setPosition(StoredPosition); 740 } 741 742 template <class T> 743 static inline void hash_combine(std::size_t &seed, const T &v) { 744 std::hash<T> hasher; 745 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 746 } 747 748 size_t UnwrappedLineParser::computePPHash() const { 749 size_t h = 0; 750 for (const auto &i : PPStack) { 751 hash_combine(h, size_t(i.Kind)); 752 hash_combine(h, i.Line); 753 } 754 return h; 755 } 756 757 // Checks whether \p ParsedLine might fit on a single line. We must clone the 758 // tokens of \p ParsedLine before running the token annotator on it so that we 759 // can restore them afterward. 760 bool UnwrappedLineParser::mightFitOnOneLine(UnwrappedLine &ParsedLine) const { 761 const auto ColumnLimit = Style.ColumnLimit; 762 if (ColumnLimit == 0) 763 return true; 764 765 auto &Tokens = ParsedLine.Tokens; 766 assert(!Tokens.empty()); 767 const auto *LastToken = Tokens.back().Tok; 768 assert(LastToken); 769 770 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 771 772 int Index = 0; 773 for (const auto &Token : Tokens) { 774 assert(Token.Tok); 775 auto &SavedToken = SavedTokens[Index++]; 776 SavedToken.Tok = new FormatToken; 777 SavedToken.Tok->copyFrom(*Token.Tok); 778 SavedToken.Children = std::move(Token.Children); 779 } 780 781 AnnotatedLine Line(ParsedLine); 782 assert(Line.Last == LastToken); 783 784 TokenAnnotator Annotator(Style, Keywords); 785 Annotator.annotate(Line); 786 Annotator.calculateFormattingInformation(Line); 787 788 const int Length = LastToken->TotalLength; 789 790 Index = 0; 791 for (auto &Token : Tokens) { 792 const auto &SavedToken = SavedTokens[Index++]; 793 Token.Tok->copyFrom(*SavedToken.Tok); 794 Token.Children = std::move(SavedToken.Children); 795 delete SavedToken.Tok; 796 } 797 798 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 799 } 800 801 UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock( 802 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, 803 bool UnindentWhitesmithsBraces, bool CanContainBracedList, 804 TokenType NextLBracesType) { 805 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 806 "'{' or macro block token expected"); 807 FormatToken *Tok = FormatTok; 808 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 809 FormatTok->setBlockKind(BK_Block); 810 811 // For Whitesmiths mode, jump to the next level prior to skipping over the 812 // braces. 813 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 814 ++Line->Level; 815 816 size_t PPStartHash = computePPHash(); 817 818 unsigned InitialLevel = Line->Level; 819 nextToken(/*LevelDifference=*/AddLevels); 820 821 if (MacroBlock && FormatTok->is(tok::l_paren)) 822 parseParens(); 823 824 size_t NbPreprocessorDirectives = 825 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 826 addUnwrappedLine(); 827 size_t OpeningLineIndex = 828 CurrentLines->empty() 829 ? (UnwrappedLine::kInvalidIndex) 830 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 831 832 // Whitesmiths is weird here. The brace needs to be indented for the namespace 833 // block, but the block itself may not be indented depending on the style 834 // settings. This allows the format to back up one level in those cases. 835 if (UnindentWhitesmithsBraces) 836 --Line->Level; 837 838 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 839 MustBeDeclaration); 840 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 841 Line->Level += AddLevels; 842 843 IfStmtKind IfKind = IfStmtKind::NotIf; 844 const bool SimpleBlock = 845 parseLevel(Tok, CanContainBracedList, &IfKind, NextLBracesType); 846 847 if (eof()) 848 return IfKind; 849 850 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 851 : !FormatTok->is(tok::r_brace)) { 852 Line->Level = InitialLevel; 853 FormatTok->setBlockKind(BK_Block); 854 return IfKind; 855 } 856 857 if (SimpleBlock && !KeepBraces && 858 Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 859 assert(FormatTok->is(tok::r_brace)); 860 const FormatToken *Previous = Tokens->getPreviousToken(); 861 assert(Previous); 862 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 863 assert(!CurrentLines->empty()); 864 if (mightFitOnOneLine(CurrentLines->back())) { 865 Tok->MatchingParen = FormatTok; 866 FormatTok->MatchingParen = Tok; 867 } 868 } 869 } 870 871 size_t PPEndHash = computePPHash(); 872 873 // Munch the closing brace. 874 nextToken(/*LevelDifference=*/-AddLevels); 875 876 if (MacroBlock && FormatTok->is(tok::l_paren)) 877 parseParens(); 878 879 if (FormatTok->is(tok::kw_noexcept)) { 880 // A noexcept in a requires expression. 881 nextToken(); 882 } 883 884 if (FormatTok->is(tok::arrow)) { 885 // Following the } or noexcept we can find a trailing return type arrow 886 // as part of an implicit conversion constraint. 887 nextToken(); 888 parseStructuralElement(); 889 } 890 891 if (MunchSemi && FormatTok->is(tok::semi)) 892 nextToken(); 893 894 Line->Level = InitialLevel; 895 896 if (PPStartHash == PPEndHash) { 897 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 898 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 899 // Update the opening line to add the forward reference as well 900 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 901 CurrentLines->size() - 1; 902 } 903 } 904 905 return IfKind; 906 } 907 908 static bool isGoogScope(const UnwrappedLine &Line) { 909 // FIXME: Closure-library specific stuff should not be hard-coded but be 910 // configurable. 911 if (Line.Tokens.size() < 4) 912 return false; 913 auto I = Line.Tokens.begin(); 914 if (I->Tok->TokenText != "goog") 915 return false; 916 ++I; 917 if (I->Tok->isNot(tok::period)) 918 return false; 919 ++I; 920 if (I->Tok->TokenText != "scope") 921 return false; 922 ++I; 923 return I->Tok->is(tok::l_paren); 924 } 925 926 static bool isIIFE(const UnwrappedLine &Line, 927 const AdditionalKeywords &Keywords) { 928 // Look for the start of an immediately invoked anonymous function. 929 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 930 // This is commonly done in JavaScript to create a new, anonymous scope. 931 // Example: (function() { ... })() 932 if (Line.Tokens.size() < 3) 933 return false; 934 auto I = Line.Tokens.begin(); 935 if (I->Tok->isNot(tok::l_paren)) 936 return false; 937 ++I; 938 if (I->Tok->isNot(Keywords.kw_function)) 939 return false; 940 ++I; 941 return I->Tok->is(tok::l_paren); 942 } 943 944 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 945 const FormatToken &InitialToken) { 946 tok::TokenKind Kind = InitialToken.Tok.getKind(); 947 if (InitialToken.is(TT_NamespaceMacro)) 948 Kind = tok::kw_namespace; 949 950 switch (Kind) { 951 case tok::kw_namespace: 952 return Style.BraceWrapping.AfterNamespace; 953 case tok::kw_class: 954 return Style.BraceWrapping.AfterClass; 955 case tok::kw_union: 956 return Style.BraceWrapping.AfterUnion; 957 case tok::kw_struct: 958 return Style.BraceWrapping.AfterStruct; 959 case tok::kw_enum: 960 return Style.BraceWrapping.AfterEnum; 961 default: 962 return false; 963 } 964 } 965 966 void UnwrappedLineParser::parseChildBlock( 967 bool CanContainBracedList, clang::format::TokenType NextLBracesType) { 968 assert(FormatTok->is(tok::l_brace)); 969 FormatTok->setBlockKind(BK_Block); 970 const FormatToken *OpeningBrace = FormatTok; 971 nextToken(); 972 { 973 bool SkipIndent = (Style.isJavaScript() && 974 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 975 ScopedLineState LineState(*this); 976 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 977 /*MustBeDeclaration=*/false); 978 Line->Level += SkipIndent ? 0 : 1; 979 parseLevel(OpeningBrace, CanContainBracedList, /*IfKind=*/nullptr, 980 NextLBracesType); 981 flushComments(isOnNewLine(*FormatTok)); 982 Line->Level -= SkipIndent ? 0 : 1; 983 } 984 nextToken(); 985 } 986 987 void UnwrappedLineParser::parsePPDirective() { 988 assert(FormatTok->is(tok::hash) && "'#' expected"); 989 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 990 991 nextToken(); 992 993 if (!FormatTok->Tok.getIdentifierInfo()) { 994 parsePPUnknown(); 995 return; 996 } 997 998 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 999 case tok::pp_define: 1000 parsePPDefine(); 1001 return; 1002 case tok::pp_if: 1003 parsePPIf(/*IfDef=*/false); 1004 break; 1005 case tok::pp_ifdef: 1006 case tok::pp_ifndef: 1007 parsePPIf(/*IfDef=*/true); 1008 break; 1009 case tok::pp_else: 1010 parsePPElse(); 1011 break; 1012 case tok::pp_elifdef: 1013 case tok::pp_elifndef: 1014 case tok::pp_elif: 1015 parsePPElIf(); 1016 break; 1017 case tok::pp_endif: 1018 parsePPEndIf(); 1019 break; 1020 default: 1021 parsePPUnknown(); 1022 break; 1023 } 1024 } 1025 1026 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1027 size_t Line = CurrentLines->size(); 1028 if (CurrentLines == &PreprocessorDirectives) 1029 Line += Lines.size(); 1030 1031 if (Unreachable || 1032 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 1033 PPStack.push_back({PP_Unreachable, Line}); 1034 else 1035 PPStack.push_back({PP_Conditional, Line}); 1036 } 1037 1038 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1039 ++PPBranchLevel; 1040 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1041 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1042 PPLevelBranchIndex.push_back(0); 1043 PPLevelBranchCount.push_back(0); 1044 } 1045 PPChainBranchIndex.push(0); 1046 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1047 conditionalCompilationCondition(Unreachable || Skip); 1048 } 1049 1050 void UnwrappedLineParser::conditionalCompilationAlternative() { 1051 if (!PPStack.empty()) 1052 PPStack.pop_back(); 1053 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1054 if (!PPChainBranchIndex.empty()) 1055 ++PPChainBranchIndex.top(); 1056 conditionalCompilationCondition( 1057 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1058 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1059 } 1060 1061 void UnwrappedLineParser::conditionalCompilationEnd() { 1062 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1063 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1064 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1065 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1066 } 1067 // Guard against #endif's without #if. 1068 if (PPBranchLevel > -1) 1069 --PPBranchLevel; 1070 if (!PPChainBranchIndex.empty()) 1071 PPChainBranchIndex.pop(); 1072 if (!PPStack.empty()) 1073 PPStack.pop_back(); 1074 } 1075 1076 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1077 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1078 nextToken(); 1079 bool Unreachable = false; 1080 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1081 Unreachable = true; 1082 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1083 Unreachable = true; 1084 conditionalCompilationStart(Unreachable); 1085 FormatToken *IfCondition = FormatTok; 1086 // If there's a #ifndef on the first line, and the only lines before it are 1087 // comments, it could be an include guard. 1088 bool MaybeIncludeGuard = IfNDef; 1089 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 1090 for (auto &Line : Lines) { 1091 if (!Line.Tokens.front().Tok->is(tok::comment)) { 1092 MaybeIncludeGuard = false; 1093 IncludeGuard = IG_Rejected; 1094 break; 1095 } 1096 } 1097 --PPBranchLevel; 1098 parsePPUnknown(); 1099 ++PPBranchLevel; 1100 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1101 IncludeGuard = IG_IfNdefed; 1102 IncludeGuardToken = IfCondition; 1103 } 1104 } 1105 1106 void UnwrappedLineParser::parsePPElse() { 1107 // If a potential include guard has an #else, it's not an include guard. 1108 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1109 IncludeGuard = IG_Rejected; 1110 conditionalCompilationAlternative(); 1111 if (PPBranchLevel > -1) 1112 --PPBranchLevel; 1113 parsePPUnknown(); 1114 ++PPBranchLevel; 1115 } 1116 1117 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1118 1119 void UnwrappedLineParser::parsePPEndIf() { 1120 conditionalCompilationEnd(); 1121 parsePPUnknown(); 1122 // If the #endif of a potential include guard is the last thing in the file, 1123 // then we found an include guard. 1124 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1125 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1126 IncludeGuard = IG_Found; 1127 } 1128 1129 void UnwrappedLineParser::parsePPDefine() { 1130 nextToken(); 1131 1132 if (!FormatTok->Tok.getIdentifierInfo()) { 1133 IncludeGuard = IG_Rejected; 1134 IncludeGuardToken = nullptr; 1135 parsePPUnknown(); 1136 return; 1137 } 1138 1139 if (IncludeGuard == IG_IfNdefed && 1140 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1141 IncludeGuard = IG_Defined; 1142 IncludeGuardToken = nullptr; 1143 for (auto &Line : Lines) { 1144 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1145 IncludeGuard = IG_Rejected; 1146 break; 1147 } 1148 } 1149 } 1150 1151 // In the context of a define, even keywords should be treated as normal 1152 // identifiers. Setting the kind to identifier is not enough, because we need 1153 // to treat additional keywords like __except as well, which are already 1154 // identifiers. Setting the identifier info to null interferes with include 1155 // guard processing above, and changes preprocessing nesting. 1156 FormatTok->Tok.setKind(tok::identifier); 1157 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1158 nextToken(); 1159 if (FormatTok->Tok.getKind() == tok::l_paren && 1160 !FormatTok->hasWhitespaceBefore()) 1161 parseParens(); 1162 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1163 Line->Level += PPBranchLevel + 1; 1164 addUnwrappedLine(); 1165 ++Line->Level; 1166 1167 // Errors during a preprocessor directive can only affect the layout of the 1168 // preprocessor directive, and thus we ignore them. An alternative approach 1169 // would be to use the same approach we use on the file level (no 1170 // re-indentation if there was a structural error) within the macro 1171 // definition. 1172 parseFile(); 1173 } 1174 1175 void UnwrappedLineParser::parsePPUnknown() { 1176 do { 1177 nextToken(); 1178 } while (!eof()); 1179 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1180 Line->Level += PPBranchLevel + 1; 1181 addUnwrappedLine(); 1182 } 1183 1184 // Here we exclude certain tokens that are not usually the first token in an 1185 // unwrapped line. This is used in attempt to distinguish macro calls without 1186 // trailing semicolons from other constructs split to several lines. 1187 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1188 // Semicolon can be a null-statement, l_square can be a start of a macro or 1189 // a C++11 attribute, but this doesn't seem to be common. 1190 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1191 Tok.isNot(TT_AttributeSquare) && 1192 // Tokens that can only be used as binary operators and a part of 1193 // overloaded operator names. 1194 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1195 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1196 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1197 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1198 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1199 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1200 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1201 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1202 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1203 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1204 Tok.isNot(tok::lesslessequal) && 1205 // Colon is used in labels, base class lists, initializer lists, 1206 // range-based for loops, ternary operator, but should never be the 1207 // first token in an unwrapped line. 1208 Tok.isNot(tok::colon) && 1209 // 'noexcept' is a trailing annotation. 1210 Tok.isNot(tok::kw_noexcept); 1211 } 1212 1213 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1214 const FormatToken *FormatTok) { 1215 // FIXME: This returns true for C/C++ keywords like 'struct'. 1216 return FormatTok->is(tok::identifier) && 1217 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1218 !FormatTok->isOneOf( 1219 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1220 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1221 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1222 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1223 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1224 Keywords.kw_instanceof, Keywords.kw_interface, 1225 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1226 } 1227 1228 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1229 const FormatToken *FormatTok) { 1230 return FormatTok->Tok.isLiteral() || 1231 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1232 mustBeJSIdent(Keywords, FormatTok); 1233 } 1234 1235 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1236 // when encountered after a value (see mustBeJSIdentOrValue). 1237 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1238 const FormatToken *FormatTok) { 1239 return FormatTok->isOneOf( 1240 tok::kw_return, Keywords.kw_yield, 1241 // conditionals 1242 tok::kw_if, tok::kw_else, 1243 // loops 1244 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1245 // switch/case 1246 tok::kw_switch, tok::kw_case, 1247 // exceptions 1248 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1249 // declaration 1250 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1251 Keywords.kw_async, Keywords.kw_function, 1252 // import/export 1253 Keywords.kw_import, tok::kw_export); 1254 } 1255 1256 // Checks whether a token is a type in K&R C (aka C78). 1257 static bool isC78Type(const FormatToken &Tok) { 1258 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1259 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1260 tok::identifier); 1261 } 1262 1263 // This function checks whether a token starts the first parameter declaration 1264 // in a K&R C (aka C78) function definition, e.g.: 1265 // int f(a, b) 1266 // short a, b; 1267 // { 1268 // return a + b; 1269 // } 1270 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1271 const FormatToken *FuncName) { 1272 assert(Tok); 1273 assert(Next); 1274 assert(FuncName); 1275 1276 if (FuncName->isNot(tok::identifier)) 1277 return false; 1278 1279 const FormatToken *Prev = FuncName->Previous; 1280 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1281 return false; 1282 1283 if (!isC78Type(*Tok) && 1284 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1285 return false; 1286 1287 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1288 return false; 1289 1290 Tok = Tok->Previous; 1291 if (!Tok || Tok->isNot(tok::r_paren)) 1292 return false; 1293 1294 Tok = Tok->Previous; 1295 if (!Tok || Tok->isNot(tok::identifier)) 1296 return false; 1297 1298 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1299 } 1300 1301 void UnwrappedLineParser::parseModuleImport() { 1302 nextToken(); 1303 while (!eof()) { 1304 if (FormatTok->is(tok::colon)) { 1305 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1306 } 1307 // Handle import <foo/bar.h> as we would an include statement. 1308 else if (FormatTok->is(tok::less)) { 1309 nextToken(); 1310 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1311 // Mark tokens up to the trailing line comments as implicit string 1312 // literals. 1313 if (FormatTok->isNot(tok::comment) && 1314 !FormatTok->TokenText.startswith("//")) 1315 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1316 nextToken(); 1317 } 1318 } 1319 if (FormatTok->is(tok::semi)) { 1320 nextToken(); 1321 break; 1322 } 1323 nextToken(); 1324 } 1325 1326 addUnwrappedLine(); 1327 } 1328 1329 // readTokenWithJavaScriptASI reads the next token and terminates the current 1330 // line if JavaScript Automatic Semicolon Insertion must 1331 // happen between the current token and the next token. 1332 // 1333 // This method is conservative - it cannot cover all edge cases of JavaScript, 1334 // but only aims to correctly handle certain well known cases. It *must not* 1335 // return true in speculative cases. 1336 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1337 FormatToken *Previous = FormatTok; 1338 readToken(); 1339 FormatToken *Next = FormatTok; 1340 1341 bool IsOnSameLine = 1342 CommentsBeforeNextToken.empty() 1343 ? Next->NewlinesBefore == 0 1344 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1345 if (IsOnSameLine) 1346 return; 1347 1348 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1349 bool PreviousStartsTemplateExpr = 1350 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1351 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1352 // If the line contains an '@' sign, the previous token might be an 1353 // annotation, which can precede another identifier/value. 1354 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1355 return LineNode.Tok->is(tok::at); 1356 }); 1357 if (HasAt) 1358 return; 1359 } 1360 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1361 return addUnwrappedLine(); 1362 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1363 bool NextEndsTemplateExpr = 1364 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1365 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1366 (PreviousMustBeValue || 1367 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1368 tok::minusminus))) 1369 return addUnwrappedLine(); 1370 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1371 isJSDeclOrStmt(Keywords, Next)) 1372 return addUnwrappedLine(); 1373 } 1374 1375 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1376 bool IsTopLevel, 1377 TokenType NextLBracesType, 1378 bool *HasLabel) { 1379 if (Style.Language == FormatStyle::LK_TableGen && 1380 FormatTok->is(tok::pp_include)) { 1381 nextToken(); 1382 if (FormatTok->is(tok::string_literal)) 1383 nextToken(); 1384 addUnwrappedLine(); 1385 return; 1386 } 1387 switch (FormatTok->Tok.getKind()) { 1388 case tok::kw_asm: 1389 nextToken(); 1390 if (FormatTok->is(tok::l_brace)) { 1391 FormatTok->setFinalizedType(TT_InlineASMBrace); 1392 nextToken(); 1393 while (FormatTok && FormatTok->isNot(tok::eof)) { 1394 if (FormatTok->is(tok::r_brace)) { 1395 FormatTok->setFinalizedType(TT_InlineASMBrace); 1396 nextToken(); 1397 addUnwrappedLine(); 1398 break; 1399 } 1400 FormatTok->Finalized = true; 1401 nextToken(); 1402 } 1403 } 1404 break; 1405 case tok::kw_namespace: 1406 parseNamespace(); 1407 return; 1408 case tok::kw_public: 1409 case tok::kw_protected: 1410 case tok::kw_private: 1411 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1412 Style.isCSharp()) 1413 nextToken(); 1414 else 1415 parseAccessSpecifier(); 1416 return; 1417 case tok::kw_if: 1418 if (Style.isJavaScript() && Line->MustBeDeclaration) 1419 // field/method declaration. 1420 break; 1421 parseIfThenElse(IfKind); 1422 return; 1423 case tok::kw_for: 1424 case tok::kw_while: 1425 if (Style.isJavaScript() && Line->MustBeDeclaration) 1426 // field/method declaration. 1427 break; 1428 parseForOrWhileLoop(); 1429 return; 1430 case tok::kw_do: 1431 if (Style.isJavaScript() && Line->MustBeDeclaration) 1432 // field/method declaration. 1433 break; 1434 parseDoWhile(); 1435 return; 1436 case tok::kw_switch: 1437 if (Style.isJavaScript() && Line->MustBeDeclaration) 1438 // 'switch: string' field declaration. 1439 break; 1440 parseSwitch(); 1441 return; 1442 case tok::kw_default: 1443 if (Style.isJavaScript() && Line->MustBeDeclaration) 1444 // 'default: string' field declaration. 1445 break; 1446 nextToken(); 1447 if (FormatTok->is(tok::colon)) { 1448 parseLabel(); 1449 return; 1450 } 1451 // e.g. "default void f() {}" in a Java interface. 1452 break; 1453 case tok::kw_case: 1454 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1455 // 'case: string' field declaration. 1456 nextToken(); 1457 break; 1458 } 1459 parseCaseLabel(); 1460 return; 1461 case tok::kw_try: 1462 case tok::kw___try: 1463 if (Style.isJavaScript() && Line->MustBeDeclaration) 1464 // field/method declaration. 1465 break; 1466 parseTryCatch(); 1467 return; 1468 case tok::kw_extern: 1469 nextToken(); 1470 if (FormatTok->is(tok::string_literal)) { 1471 nextToken(); 1472 if (FormatTok->is(tok::l_brace)) { 1473 if (Style.BraceWrapping.AfterExternBlock) 1474 addUnwrappedLine(); 1475 // Either we indent or for backwards compatibility we follow the 1476 // AfterExternBlock style. 1477 unsigned AddLevels = 1478 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1479 (Style.BraceWrapping.AfterExternBlock && 1480 Style.IndentExternBlock == 1481 FormatStyle::IEBS_AfterExternBlock) 1482 ? 1u 1483 : 0u; 1484 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1485 addUnwrappedLine(); 1486 return; 1487 } 1488 } 1489 break; 1490 case tok::kw_export: 1491 if (Style.isJavaScript()) { 1492 parseJavaScriptEs6ImportExport(); 1493 return; 1494 } 1495 if (!Style.isCpp()) 1496 break; 1497 // Handle C++ "(inline|export) namespace". 1498 LLVM_FALLTHROUGH; 1499 case tok::kw_inline: 1500 nextToken(); 1501 if (FormatTok->is(tok::kw_namespace)) { 1502 parseNamespace(); 1503 return; 1504 } 1505 break; 1506 case tok::identifier: 1507 if (FormatTok->is(TT_ForEachMacro)) { 1508 parseForOrWhileLoop(); 1509 return; 1510 } 1511 if (FormatTok->is(TT_MacroBlockBegin)) { 1512 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1513 /*MunchSemi=*/false); 1514 return; 1515 } 1516 if (FormatTok->is(Keywords.kw_import)) { 1517 if (Style.isJavaScript()) { 1518 parseJavaScriptEs6ImportExport(); 1519 return; 1520 } 1521 if (Style.Language == FormatStyle::LK_Proto) { 1522 nextToken(); 1523 if (FormatTok->is(tok::kw_public)) 1524 nextToken(); 1525 if (!FormatTok->is(tok::string_literal)) 1526 return; 1527 nextToken(); 1528 if (FormatTok->is(tok::semi)) 1529 nextToken(); 1530 addUnwrappedLine(); 1531 return; 1532 } 1533 if (Style.isCpp()) { 1534 parseModuleImport(); 1535 return; 1536 } 1537 } 1538 if (Style.isCpp() && 1539 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1540 Keywords.kw_slots, Keywords.kw_qslots)) { 1541 nextToken(); 1542 if (FormatTok->is(tok::colon)) { 1543 nextToken(); 1544 addUnwrappedLine(); 1545 return; 1546 } 1547 } 1548 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1549 parseStatementMacro(); 1550 return; 1551 } 1552 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1553 parseNamespace(); 1554 return; 1555 } 1556 // In all other cases, parse the declaration. 1557 break; 1558 default: 1559 break; 1560 } 1561 do { 1562 const FormatToken *Previous = FormatTok->Previous; 1563 switch (FormatTok->Tok.getKind()) { 1564 case tok::at: 1565 nextToken(); 1566 if (FormatTok->is(tok::l_brace)) { 1567 nextToken(); 1568 parseBracedList(); 1569 break; 1570 } else if (Style.Language == FormatStyle::LK_Java && 1571 FormatTok->is(Keywords.kw_interface)) { 1572 nextToken(); 1573 break; 1574 } 1575 switch (FormatTok->Tok.getObjCKeywordID()) { 1576 case tok::objc_public: 1577 case tok::objc_protected: 1578 case tok::objc_package: 1579 case tok::objc_private: 1580 return parseAccessSpecifier(); 1581 case tok::objc_interface: 1582 case tok::objc_implementation: 1583 return parseObjCInterfaceOrImplementation(); 1584 case tok::objc_protocol: 1585 if (parseObjCProtocol()) 1586 return; 1587 break; 1588 case tok::objc_end: 1589 return; // Handled by the caller. 1590 case tok::objc_optional: 1591 case tok::objc_required: 1592 nextToken(); 1593 addUnwrappedLine(); 1594 return; 1595 case tok::objc_autoreleasepool: 1596 nextToken(); 1597 if (FormatTok->is(tok::l_brace)) { 1598 if (Style.BraceWrapping.AfterControlStatement == 1599 FormatStyle::BWACS_Always) 1600 addUnwrappedLine(); 1601 parseBlock(); 1602 } 1603 addUnwrappedLine(); 1604 return; 1605 case tok::objc_synchronized: 1606 nextToken(); 1607 if (FormatTok->is(tok::l_paren)) 1608 // Skip synchronization object 1609 parseParens(); 1610 if (FormatTok->is(tok::l_brace)) { 1611 if (Style.BraceWrapping.AfterControlStatement == 1612 FormatStyle::BWACS_Always) 1613 addUnwrappedLine(); 1614 parseBlock(); 1615 } 1616 addUnwrappedLine(); 1617 return; 1618 case tok::objc_try: 1619 // This branch isn't strictly necessary (the kw_try case below would 1620 // do this too after the tok::at is parsed above). But be explicit. 1621 parseTryCatch(); 1622 return; 1623 default: 1624 break; 1625 } 1626 break; 1627 case tok::kw_concept: 1628 parseConcept(); 1629 return; 1630 case tok::kw_requires: { 1631 if (Style.isCpp()) { 1632 bool ParsedClause = parseRequires(); 1633 if (ParsedClause) 1634 return; 1635 } else { 1636 nextToken(); 1637 } 1638 break; 1639 } 1640 case tok::kw_enum: 1641 // Ignore if this is part of "template <enum ...". 1642 if (Previous && Previous->is(tok::less)) { 1643 nextToken(); 1644 break; 1645 } 1646 1647 // parseEnum falls through and does not yet add an unwrapped line as an 1648 // enum definition can start a structural element. 1649 if (!parseEnum()) 1650 break; 1651 // This only applies for C++. 1652 if (!Style.isCpp()) { 1653 addUnwrappedLine(); 1654 return; 1655 } 1656 break; 1657 case tok::kw_typedef: 1658 nextToken(); 1659 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1660 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1661 Keywords.kw_CF_CLOSED_ENUM, 1662 Keywords.kw_NS_CLOSED_ENUM)) 1663 parseEnum(); 1664 break; 1665 case tok::kw_struct: 1666 case tok::kw_union: 1667 case tok::kw_class: 1668 if (parseStructLike()) 1669 return; 1670 break; 1671 case tok::period: 1672 nextToken(); 1673 // In Java, classes have an implicit static member "class". 1674 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1675 FormatTok->is(tok::kw_class)) 1676 nextToken(); 1677 if (Style.isJavaScript() && FormatTok && 1678 FormatTok->Tok.getIdentifierInfo()) 1679 // JavaScript only has pseudo keywords, all keywords are allowed to 1680 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1681 nextToken(); 1682 break; 1683 case tok::semi: 1684 nextToken(); 1685 addUnwrappedLine(); 1686 return; 1687 case tok::r_brace: 1688 addUnwrappedLine(); 1689 return; 1690 case tok::l_paren: { 1691 parseParens(); 1692 // Break the unwrapped line if a K&R C function definition has a parameter 1693 // declaration. 1694 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1695 break; 1696 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1697 addUnwrappedLine(); 1698 return; 1699 } 1700 break; 1701 } 1702 case tok::kw_operator: 1703 nextToken(); 1704 if (FormatTok->isBinaryOperator()) 1705 nextToken(); 1706 break; 1707 case tok::caret: 1708 nextToken(); 1709 if (FormatTok->Tok.isAnyIdentifier() || 1710 FormatTok->isSimpleTypeSpecifier()) 1711 nextToken(); 1712 if (FormatTok->is(tok::l_paren)) 1713 parseParens(); 1714 if (FormatTok->is(tok::l_brace)) 1715 parseChildBlock(); 1716 break; 1717 case tok::l_brace: 1718 if (NextLBracesType != TT_Unknown) 1719 FormatTok->setFinalizedType(NextLBracesType); 1720 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1721 // A block outside of parentheses must be the last part of a 1722 // structural element. 1723 // FIXME: Figure out cases where this is not true, and add projections 1724 // for them (the one we know is missing are lambdas). 1725 if (Style.Language == FormatStyle::LK_Java && 1726 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1727 // If necessary, we could set the type to something different than 1728 // TT_FunctionLBrace. 1729 if (Style.BraceWrapping.AfterControlStatement == 1730 FormatStyle::BWACS_Always) 1731 addUnwrappedLine(); 1732 } else if (Style.BraceWrapping.AfterFunction) { 1733 addUnwrappedLine(); 1734 } 1735 if (!Line->InPPDirective) 1736 FormatTok->setFinalizedType(TT_FunctionLBrace); 1737 parseBlock(); 1738 addUnwrappedLine(); 1739 return; 1740 } 1741 // Otherwise this was a braced init list, and the structural 1742 // element continues. 1743 break; 1744 case tok::kw_try: 1745 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1746 // field/method declaration. 1747 nextToken(); 1748 break; 1749 } 1750 // We arrive here when parsing function-try blocks. 1751 if (Style.BraceWrapping.AfterFunction) 1752 addUnwrappedLine(); 1753 parseTryCatch(); 1754 return; 1755 case tok::identifier: { 1756 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1757 Line->MustBeDeclaration) { 1758 addUnwrappedLine(); 1759 parseCSharpGenericTypeConstraint(); 1760 break; 1761 } 1762 if (FormatTok->is(TT_MacroBlockEnd)) { 1763 addUnwrappedLine(); 1764 return; 1765 } 1766 1767 // Function declarations (as opposed to function expressions) are parsed 1768 // on their own unwrapped line by continuing this loop. Function 1769 // expressions (functions that are not on their own line) must not create 1770 // a new unwrapped line, so they are special cased below. 1771 size_t TokenCount = Line->Tokens.size(); 1772 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1773 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1774 Keywords.kw_async)))) { 1775 tryToParseJSFunction(); 1776 break; 1777 } 1778 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1779 FormatTok->is(Keywords.kw_interface)) { 1780 if (Style.isJavaScript()) { 1781 // In JavaScript/TypeScript, "interface" can be used as a standalone 1782 // identifier, e.g. in `var interface = 1;`. If "interface" is 1783 // followed by another identifier, it is very like to be an actual 1784 // interface declaration. 1785 unsigned StoredPosition = Tokens->getPosition(); 1786 FormatToken *Next = Tokens->getNextToken(); 1787 FormatTok = Tokens->setPosition(StoredPosition); 1788 if (!mustBeJSIdent(Keywords, Next)) { 1789 nextToken(); 1790 break; 1791 } 1792 } 1793 parseRecord(); 1794 addUnwrappedLine(); 1795 return; 1796 } 1797 1798 if (FormatTok->is(Keywords.kw_interface)) { 1799 if (parseStructLike()) 1800 return; 1801 break; 1802 } 1803 1804 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1805 parseStatementMacro(); 1806 return; 1807 } 1808 1809 // See if the following token should start a new unwrapped line. 1810 StringRef Text = FormatTok->TokenText; 1811 1812 FormatToken *PreviousToken = FormatTok; 1813 nextToken(); 1814 1815 // JS doesn't have macros, and within classes colons indicate fields, not 1816 // labels. 1817 if (Style.isJavaScript()) 1818 break; 1819 1820 TokenCount = Line->Tokens.size(); 1821 if (TokenCount == 1 || 1822 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1823 if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) { 1824 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1825 parseLabel(!Style.IndentGotoLabels); 1826 if (HasLabel) 1827 *HasLabel = true; 1828 return; 1829 } 1830 // Recognize function-like macro usages without trailing semicolon as 1831 // well as free-standing macros like Q_OBJECT. 1832 bool FunctionLike = FormatTok->is(tok::l_paren); 1833 if (FunctionLike) 1834 parseParens(); 1835 1836 bool FollowedByNewline = 1837 CommentsBeforeNextToken.empty() 1838 ? FormatTok->NewlinesBefore > 0 1839 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1840 1841 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1842 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1843 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 1844 addUnwrappedLine(); 1845 return; 1846 } 1847 } 1848 break; 1849 } 1850 case tok::equal: 1851 if ((Style.isJavaScript() || Style.isCSharp()) && 1852 FormatTok->is(TT_FatArrow)) { 1853 tryToParseChildBlock(); 1854 break; 1855 } 1856 1857 nextToken(); 1858 if (FormatTok->is(tok::l_brace)) { 1859 // Block kind should probably be set to BK_BracedInit for any language. 1860 // C# needs this change to ensure that array initialisers and object 1861 // initialisers are indented the same way. 1862 if (Style.isCSharp()) 1863 FormatTok->setBlockKind(BK_BracedInit); 1864 nextToken(); 1865 parseBracedList(); 1866 } else if (Style.Language == FormatStyle::LK_Proto && 1867 FormatTok->is(tok::less)) { 1868 nextToken(); 1869 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1870 /*ClosingBraceKind=*/tok::greater); 1871 } 1872 break; 1873 case tok::l_square: 1874 parseSquare(); 1875 break; 1876 case tok::kw_new: 1877 parseNew(); 1878 break; 1879 case tok::kw_case: 1880 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1881 // 'case: string' field declaration. 1882 nextToken(); 1883 break; 1884 } 1885 parseCaseLabel(); 1886 break; 1887 default: 1888 nextToken(); 1889 break; 1890 } 1891 } while (!eof()); 1892 } 1893 1894 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1895 assert(FormatTok->is(tok::l_brace)); 1896 if (!Style.isCSharp()) 1897 return false; 1898 // See if it's a property accessor. 1899 if (FormatTok->Previous->isNot(tok::identifier)) 1900 return false; 1901 1902 // See if we are inside a property accessor. 1903 // 1904 // Record the current tokenPosition so that we can advance and 1905 // reset the current token. `Next` is not set yet so we need 1906 // another way to advance along the token stream. 1907 unsigned int StoredPosition = Tokens->getPosition(); 1908 FormatToken *Tok = Tokens->getNextToken(); 1909 1910 // A trivial property accessor is of the form: 1911 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 1912 // Track these as they do not require line breaks to be introduced. 1913 bool HasSpecialAccessor = false; 1914 bool IsTrivialPropertyAccessor = true; 1915 while (!eof()) { 1916 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1917 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1918 Keywords.kw_init, Keywords.kw_set)) { 1919 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 1920 HasSpecialAccessor = true; 1921 Tok = Tokens->getNextToken(); 1922 continue; 1923 } 1924 if (Tok->isNot(tok::r_brace)) 1925 IsTrivialPropertyAccessor = false; 1926 break; 1927 } 1928 1929 if (!HasSpecialAccessor) { 1930 Tokens->setPosition(StoredPosition); 1931 return false; 1932 } 1933 1934 // Try to parse the property accessor: 1935 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1936 Tokens->setPosition(StoredPosition); 1937 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1938 addUnwrappedLine(); 1939 nextToken(); 1940 do { 1941 switch (FormatTok->Tok.getKind()) { 1942 case tok::r_brace: 1943 nextToken(); 1944 if (FormatTok->is(tok::equal)) { 1945 while (!eof() && FormatTok->isNot(tok::semi)) 1946 nextToken(); 1947 nextToken(); 1948 } 1949 addUnwrappedLine(); 1950 return true; 1951 case tok::l_brace: 1952 ++Line->Level; 1953 parseBlock(/*MustBeDeclaration=*/true); 1954 addUnwrappedLine(); 1955 --Line->Level; 1956 break; 1957 case tok::equal: 1958 if (FormatTok->is(TT_FatArrow)) { 1959 ++Line->Level; 1960 do { 1961 nextToken(); 1962 } while (!eof() && FormatTok->isNot(tok::semi)); 1963 nextToken(); 1964 addUnwrappedLine(); 1965 --Line->Level; 1966 break; 1967 } 1968 nextToken(); 1969 break; 1970 default: 1971 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 1972 Keywords.kw_set) && 1973 !IsTrivialPropertyAccessor) { 1974 // Non-trivial get/set needs to be on its own line. 1975 addUnwrappedLine(); 1976 } 1977 nextToken(); 1978 } 1979 } while (!eof()); 1980 1981 // Unreachable for well-formed code (paired '{' and '}'). 1982 return true; 1983 } 1984 1985 bool UnwrappedLineParser::tryToParseLambda() { 1986 assert(FormatTok->is(tok::l_square)); 1987 if (!Style.isCpp()) { 1988 nextToken(); 1989 return false; 1990 } 1991 FormatToken &LSquare = *FormatTok; 1992 if (!tryToParseLambdaIntroducer()) 1993 return false; 1994 1995 bool SeenArrow = false; 1996 bool InTemplateParameterList = false; 1997 1998 while (FormatTok->isNot(tok::l_brace)) { 1999 if (FormatTok->isSimpleTypeSpecifier()) { 2000 nextToken(); 2001 continue; 2002 } 2003 switch (FormatTok->Tok.getKind()) { 2004 case tok::l_brace: 2005 break; 2006 case tok::l_paren: 2007 parseParens(); 2008 break; 2009 case tok::l_square: 2010 parseSquare(); 2011 break; 2012 case tok::kw_class: 2013 case tok::kw_template: 2014 case tok::kw_typename: 2015 assert(FormatTok->Previous); 2016 if (FormatTok->Previous->is(tok::less)) 2017 InTemplateParameterList = true; 2018 nextToken(); 2019 break; 2020 case tok::amp: 2021 case tok::star: 2022 case tok::kw_const: 2023 case tok::comma: 2024 case tok::less: 2025 case tok::greater: 2026 case tok::identifier: 2027 case tok::numeric_constant: 2028 case tok::coloncolon: 2029 case tok::kw_mutable: 2030 case tok::kw_noexcept: 2031 nextToken(); 2032 break; 2033 // Specialization of a template with an integer parameter can contain 2034 // arithmetic, logical, comparison and ternary operators. 2035 // 2036 // FIXME: This also accepts sequences of operators that are not in the scope 2037 // of a template argument list. 2038 // 2039 // In a C++ lambda a template type can only occur after an arrow. We use 2040 // this as an heuristic to distinguish between Objective-C expressions 2041 // followed by an `a->b` expression, such as: 2042 // ([obj func:arg] + a->b) 2043 // Otherwise the code below would parse as a lambda. 2044 // 2045 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 2046 // explicit template lists: []<bool b = true && false>(U &&u){} 2047 case tok::plus: 2048 case tok::minus: 2049 case tok::exclaim: 2050 case tok::tilde: 2051 case tok::slash: 2052 case tok::percent: 2053 case tok::lessless: 2054 case tok::pipe: 2055 case tok::pipepipe: 2056 case tok::ampamp: 2057 case tok::caret: 2058 case tok::equalequal: 2059 case tok::exclaimequal: 2060 case tok::greaterequal: 2061 case tok::lessequal: 2062 case tok::question: 2063 case tok::colon: 2064 case tok::ellipsis: 2065 case tok::kw_true: 2066 case tok::kw_false: 2067 if (SeenArrow || InTemplateParameterList) { 2068 nextToken(); 2069 break; 2070 } 2071 return true; 2072 case tok::arrow: 2073 // This might or might not actually be a lambda arrow (this could be an 2074 // ObjC method invocation followed by a dereferencing arrow). We might 2075 // reset this back to TT_Unknown in TokenAnnotator. 2076 FormatTok->setFinalizedType(TT_LambdaArrow); 2077 SeenArrow = true; 2078 nextToken(); 2079 break; 2080 default: 2081 return true; 2082 } 2083 } 2084 FormatTok->setFinalizedType(TT_LambdaLBrace); 2085 LSquare.setFinalizedType(TT_LambdaLSquare); 2086 parseChildBlock(); 2087 return true; 2088 } 2089 2090 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2091 const FormatToken *Previous = FormatTok->Previous; 2092 const FormatToken *LeftSquare = FormatTok; 2093 nextToken(); 2094 if (Previous && 2095 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 2096 tok::kw_delete, tok::l_square) || 2097 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() || 2098 Previous->isSimpleTypeSpecifier())) { 2099 return false; 2100 } 2101 if (FormatTok->is(tok::l_square)) 2102 return false; 2103 if (FormatTok->is(tok::r_square)) { 2104 const FormatToken *Next = Tokens->peekNextToken(); 2105 if (Next->is(tok::greater)) 2106 return false; 2107 } 2108 parseSquare(/*LambdaIntroducer=*/true); 2109 return true; 2110 } 2111 2112 void UnwrappedLineParser::tryToParseJSFunction() { 2113 assert(FormatTok->is(Keywords.kw_function) || 2114 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 2115 if (FormatTok->is(Keywords.kw_async)) 2116 nextToken(); 2117 // Consume "function". 2118 nextToken(); 2119 2120 // Consume * (generator function). Treat it like C++'s overloaded operators. 2121 if (FormatTok->is(tok::star)) { 2122 FormatTok->setFinalizedType(TT_OverloadedOperator); 2123 nextToken(); 2124 } 2125 2126 // Consume function name. 2127 if (FormatTok->is(tok::identifier)) 2128 nextToken(); 2129 2130 if (FormatTok->isNot(tok::l_paren)) 2131 return; 2132 2133 // Parse formal parameter list. 2134 parseParens(); 2135 2136 if (FormatTok->is(tok::colon)) { 2137 // Parse a type definition. 2138 nextToken(); 2139 2140 // Eat the type declaration. For braced inline object types, balance braces, 2141 // otherwise just parse until finding an l_brace for the function body. 2142 if (FormatTok->is(tok::l_brace)) 2143 tryToParseBracedList(); 2144 else 2145 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2146 nextToken(); 2147 } 2148 2149 if (FormatTok->is(tok::semi)) 2150 return; 2151 2152 parseChildBlock(); 2153 } 2154 2155 bool UnwrappedLineParser::tryToParseBracedList() { 2156 if (FormatTok->is(BK_Unknown)) 2157 calculateBraceTypes(); 2158 assert(FormatTok->isNot(BK_Unknown)); 2159 if (FormatTok->is(BK_Block)) 2160 return false; 2161 nextToken(); 2162 parseBracedList(); 2163 return true; 2164 } 2165 2166 bool UnwrappedLineParser::tryToParseChildBlock() { 2167 assert(Style.isJavaScript() || Style.isCSharp()); 2168 assert(FormatTok->is(TT_FatArrow)); 2169 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2170 // They always start an expression or a child block if followed by a curly 2171 // brace. 2172 nextToken(); 2173 if (FormatTok->isNot(tok::l_brace)) 2174 return false; 2175 parseChildBlock(); 2176 return true; 2177 } 2178 2179 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2180 bool IsEnum, 2181 tok::TokenKind ClosingBraceKind) { 2182 bool HasError = false; 2183 2184 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2185 // replace this by using parseAssignmentExpression() inside. 2186 do { 2187 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2188 tryToParseChildBlock()) 2189 continue; 2190 if (Style.isJavaScript()) { 2191 if (FormatTok->is(Keywords.kw_function) || 2192 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2193 tryToParseJSFunction(); 2194 continue; 2195 } 2196 if (FormatTok->is(tok::l_brace)) { 2197 // Could be a method inside of a braced list `{a() { return 1; }}`. 2198 if (tryToParseBracedList()) 2199 continue; 2200 parseChildBlock(); 2201 } 2202 } 2203 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2204 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2205 addUnwrappedLine(); 2206 nextToken(); 2207 return !HasError; 2208 } 2209 switch (FormatTok->Tok.getKind()) { 2210 case tok::l_square: 2211 if (Style.isCSharp()) 2212 parseSquare(); 2213 else 2214 tryToParseLambda(); 2215 break; 2216 case tok::l_paren: 2217 parseParens(); 2218 // JavaScript can just have free standing methods and getters/setters in 2219 // object literals. Detect them by a "{" following ")". 2220 if (Style.isJavaScript()) { 2221 if (FormatTok->is(tok::l_brace)) 2222 parseChildBlock(); 2223 break; 2224 } 2225 break; 2226 case tok::l_brace: 2227 // Assume there are no blocks inside a braced init list apart 2228 // from the ones we explicitly parse out (like lambdas). 2229 FormatTok->setBlockKind(BK_BracedInit); 2230 nextToken(); 2231 parseBracedList(); 2232 break; 2233 case tok::less: 2234 if (Style.Language == FormatStyle::LK_Proto || 2235 ClosingBraceKind == tok::greater) { 2236 nextToken(); 2237 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2238 /*ClosingBraceKind=*/tok::greater); 2239 } else { 2240 nextToken(); 2241 } 2242 break; 2243 case tok::semi: 2244 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2245 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2246 // used for error recovery if we have otherwise determined that this is 2247 // a braced list. 2248 if (Style.isJavaScript()) { 2249 nextToken(); 2250 break; 2251 } 2252 HasError = true; 2253 if (!ContinueOnSemicolons) 2254 return !HasError; 2255 nextToken(); 2256 break; 2257 case tok::comma: 2258 nextToken(); 2259 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2260 addUnwrappedLine(); 2261 break; 2262 default: 2263 nextToken(); 2264 break; 2265 } 2266 } while (!eof()); 2267 return false; 2268 } 2269 2270 /// \brief Parses a pair of parentheses (and everything between them). 2271 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2272 /// double ampersands. This only counts for the current parens scope. 2273 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2274 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2275 nextToken(); 2276 do { 2277 switch (FormatTok->Tok.getKind()) { 2278 case tok::l_paren: 2279 parseParens(); 2280 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2281 parseChildBlock(); 2282 break; 2283 case tok::r_paren: 2284 nextToken(); 2285 return; 2286 case tok::r_brace: 2287 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2288 return; 2289 case tok::l_square: 2290 tryToParseLambda(); 2291 break; 2292 case tok::l_brace: 2293 if (!tryToParseBracedList()) 2294 parseChildBlock(); 2295 break; 2296 case tok::at: 2297 nextToken(); 2298 if (FormatTok->is(tok::l_brace)) { 2299 nextToken(); 2300 parseBracedList(); 2301 } 2302 break; 2303 case tok::equal: 2304 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2305 tryToParseChildBlock(); 2306 else 2307 nextToken(); 2308 break; 2309 case tok::kw_class: 2310 if (Style.isJavaScript()) 2311 parseRecord(/*ParseAsExpr=*/true); 2312 else 2313 nextToken(); 2314 break; 2315 case tok::identifier: 2316 if (Style.isJavaScript() && 2317 (FormatTok->is(Keywords.kw_function) || 2318 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2319 tryToParseJSFunction(); 2320 else 2321 nextToken(); 2322 break; 2323 case tok::kw_requires: { 2324 auto RequiresToken = FormatTok; 2325 nextToken(); 2326 parseRequiresExpression(RequiresToken); 2327 break; 2328 } 2329 case tok::ampamp: 2330 if (AmpAmpTokenType != TT_Unknown) 2331 FormatTok->setFinalizedType(AmpAmpTokenType); 2332 LLVM_FALLTHROUGH; 2333 default: 2334 nextToken(); 2335 break; 2336 } 2337 } while (!eof()); 2338 } 2339 2340 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2341 if (!LambdaIntroducer) { 2342 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2343 if (tryToParseLambda()) 2344 return; 2345 } 2346 do { 2347 switch (FormatTok->Tok.getKind()) { 2348 case tok::l_paren: 2349 parseParens(); 2350 break; 2351 case tok::r_square: 2352 nextToken(); 2353 return; 2354 case tok::r_brace: 2355 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2356 return; 2357 case tok::l_square: 2358 parseSquare(); 2359 break; 2360 case tok::l_brace: { 2361 if (!tryToParseBracedList()) 2362 parseChildBlock(); 2363 break; 2364 } 2365 case tok::at: 2366 nextToken(); 2367 if (FormatTok->is(tok::l_brace)) { 2368 nextToken(); 2369 parseBracedList(); 2370 } 2371 break; 2372 default: 2373 nextToken(); 2374 break; 2375 } 2376 } while (!eof()); 2377 } 2378 2379 void UnwrappedLineParser::keepAncestorBraces() { 2380 if (!Style.RemoveBracesLLVM) 2381 return; 2382 2383 const int MaxNestingLevels = 2; 2384 const int Size = NestedTooDeep.size(); 2385 if (Size >= MaxNestingLevels) 2386 NestedTooDeep[Size - MaxNestingLevels] = true; 2387 NestedTooDeep.push_back(false); 2388 } 2389 2390 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2391 for (const auto &Token : llvm::reverse(Line.Tokens)) 2392 if (Token.Tok->isNot(tok::comment)) 2393 return Token.Tok; 2394 2395 return nullptr; 2396 } 2397 2398 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2399 FormatToken *Tok = nullptr; 2400 2401 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2402 PreprocessorDirectives.empty()) { 2403 Tok = getLastNonComment(*Line); 2404 assert(Tok); 2405 if (Tok->BraceCount < 0) { 2406 assert(Tok->BraceCount == -1); 2407 Tok = nullptr; 2408 } else { 2409 Tok->BraceCount = -1; 2410 } 2411 } 2412 2413 addUnwrappedLine(); 2414 ++Line->Level; 2415 parseStructuralElement(); 2416 2417 if (Tok) { 2418 assert(!Line->InPPDirective); 2419 Tok = nullptr; 2420 for (const auto &L : llvm::reverse(*CurrentLines)) { 2421 if (!L.InPPDirective && getLastNonComment(L)) { 2422 Tok = L.Tokens.back().Tok; 2423 break; 2424 } 2425 } 2426 assert(Tok); 2427 ++Tok->BraceCount; 2428 } 2429 2430 if (CheckEOF && FormatTok->is(tok::eof)) 2431 addUnwrappedLine(); 2432 2433 --Line->Level; 2434 } 2435 2436 static void markOptionalBraces(FormatToken *LeftBrace) { 2437 if (!LeftBrace) 2438 return; 2439 2440 assert(LeftBrace->is(tok::l_brace)); 2441 2442 FormatToken *RightBrace = LeftBrace->MatchingParen; 2443 if (!RightBrace) { 2444 assert(!LeftBrace->Optional); 2445 return; 2446 } 2447 2448 assert(RightBrace->is(tok::r_brace)); 2449 assert(RightBrace->MatchingParen == LeftBrace); 2450 assert(LeftBrace->Optional == RightBrace->Optional); 2451 2452 LeftBrace->Optional = true; 2453 RightBrace->Optional = true; 2454 } 2455 2456 void UnwrappedLineParser::handleAttributes() { 2457 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2458 if (FormatTok->is(TT_AttributeMacro)) 2459 nextToken(); 2460 handleCppAttributes(); 2461 } 2462 2463 bool UnwrappedLineParser::handleCppAttributes() { 2464 // Handle [[likely]] / [[unlikely]] attributes. 2465 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) { 2466 parseSquare(); 2467 return true; 2468 } 2469 return false; 2470 } 2471 2472 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2473 bool KeepBraces) { 2474 assert(FormatTok->is(tok::kw_if) && "'if' expected"); 2475 nextToken(); 2476 if (FormatTok->is(tok::exclaim)) 2477 nextToken(); 2478 2479 bool KeepIfBraces = true; 2480 if (FormatTok->is(tok::kw_consteval)) { 2481 nextToken(); 2482 } else { 2483 if (Style.RemoveBracesLLVM) 2484 KeepIfBraces = KeepBraces; 2485 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2486 nextToken(); 2487 if (FormatTok->is(tok::l_paren)) 2488 parseParens(); 2489 } 2490 handleAttributes(); 2491 2492 bool NeedsUnwrappedLine = false; 2493 keepAncestorBraces(); 2494 2495 FormatToken *IfLeftBrace = nullptr; 2496 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2497 2498 if (FormatTok->is(tok::l_brace)) { 2499 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2500 IfLeftBrace = FormatTok; 2501 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2502 IfBlockKind = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2503 /*MunchSemi=*/true, KeepIfBraces); 2504 if (Style.BraceWrapping.BeforeElse) 2505 addUnwrappedLine(); 2506 else 2507 NeedsUnwrappedLine = true; 2508 } else { 2509 parseUnbracedBody(); 2510 } 2511 2512 if (Style.RemoveBracesLLVM) { 2513 assert(!NestedTooDeep.empty()); 2514 KeepIfBraces = KeepIfBraces || 2515 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2516 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2517 IfBlockKind == IfStmtKind::IfElseIf; 2518 } 2519 2520 bool KeepElseBraces = KeepIfBraces; 2521 FormatToken *ElseLeftBrace = nullptr; 2522 IfStmtKind Kind = IfStmtKind::IfOnly; 2523 2524 if (FormatTok->is(tok::kw_else)) { 2525 if (Style.RemoveBracesLLVM) { 2526 NestedTooDeep.back() = false; 2527 Kind = IfStmtKind::IfElse; 2528 } 2529 nextToken(); 2530 handleAttributes(); 2531 if (FormatTok->is(tok::l_brace)) { 2532 FormatTok->setFinalizedType(TT_ElseLBrace); 2533 ElseLeftBrace = FormatTok; 2534 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2535 if (parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2536 /*MunchSemi=*/true, KeepElseBraces) == IfStmtKind::IfOnly) 2537 Kind = IfStmtKind::IfElseIf; 2538 addUnwrappedLine(); 2539 } else if (FormatTok->is(tok::kw_if)) { 2540 const FormatToken *Previous = Tokens->getPreviousToken(); 2541 assert(Previous); 2542 const bool IsPrecededByComment = Previous->is(tok::comment); 2543 if (IsPrecededByComment) { 2544 addUnwrappedLine(); 2545 ++Line->Level; 2546 } 2547 bool TooDeep = true; 2548 if (Style.RemoveBracesLLVM) { 2549 Kind = IfStmtKind::IfElseIf; 2550 TooDeep = NestedTooDeep.pop_back_val(); 2551 } 2552 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2553 if (Style.RemoveBracesLLVM) 2554 NestedTooDeep.push_back(TooDeep); 2555 if (IsPrecededByComment) 2556 --Line->Level; 2557 } else { 2558 parseUnbracedBody(/*CheckEOF=*/true); 2559 } 2560 } else { 2561 if (Style.RemoveBracesLLVM) 2562 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2563 if (NeedsUnwrappedLine) 2564 addUnwrappedLine(); 2565 } 2566 2567 if (!Style.RemoveBracesLLVM) 2568 return nullptr; 2569 2570 assert(!NestedTooDeep.empty()); 2571 KeepElseBraces = KeepElseBraces || 2572 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2573 NestedTooDeep.back(); 2574 2575 NestedTooDeep.pop_back(); 2576 2577 if (!KeepIfBraces && !KeepElseBraces) { 2578 markOptionalBraces(IfLeftBrace); 2579 markOptionalBraces(ElseLeftBrace); 2580 } else if (IfLeftBrace) { 2581 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2582 if (IfRightBrace) { 2583 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2584 assert(!IfLeftBrace->Optional); 2585 assert(!IfRightBrace->Optional); 2586 IfLeftBrace->MatchingParen = nullptr; 2587 IfRightBrace->MatchingParen = nullptr; 2588 } 2589 } 2590 2591 if (IfKind) 2592 *IfKind = Kind; 2593 2594 return IfLeftBrace; 2595 } 2596 2597 void UnwrappedLineParser::parseTryCatch() { 2598 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2599 nextToken(); 2600 bool NeedsUnwrappedLine = false; 2601 if (FormatTok->is(tok::colon)) { 2602 // We are in a function try block, what comes is an initializer list. 2603 nextToken(); 2604 2605 // In case identifiers were removed by clang-tidy, what might follow is 2606 // multiple commas in sequence - before the first identifier. 2607 while (FormatTok->is(tok::comma)) 2608 nextToken(); 2609 2610 while (FormatTok->is(tok::identifier)) { 2611 nextToken(); 2612 if (FormatTok->is(tok::l_paren)) 2613 parseParens(); 2614 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2615 FormatTok->is(tok::l_brace)) { 2616 do { 2617 nextToken(); 2618 } while (!FormatTok->is(tok::r_brace)); 2619 nextToken(); 2620 } 2621 2622 // In case identifiers were removed by clang-tidy, what might follow is 2623 // multiple commas in sequence - after the first identifier. 2624 while (FormatTok->is(tok::comma)) 2625 nextToken(); 2626 } 2627 } 2628 // Parse try with resource. 2629 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2630 parseParens(); 2631 2632 keepAncestorBraces(); 2633 2634 if (FormatTok->is(tok::l_brace)) { 2635 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2636 parseBlock(); 2637 if (Style.BraceWrapping.BeforeCatch) 2638 addUnwrappedLine(); 2639 else 2640 NeedsUnwrappedLine = true; 2641 } else if (!FormatTok->is(tok::kw_catch)) { 2642 // The C++ standard requires a compound-statement after a try. 2643 // If there's none, we try to assume there's a structuralElement 2644 // and try to continue. 2645 addUnwrappedLine(); 2646 ++Line->Level; 2647 parseStructuralElement(); 2648 --Line->Level; 2649 } 2650 while (true) { 2651 if (FormatTok->is(tok::at)) 2652 nextToken(); 2653 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2654 tok::kw___finally) || 2655 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2656 FormatTok->is(Keywords.kw_finally)) || 2657 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 2658 FormatTok->isObjCAtKeyword(tok::objc_finally)))) 2659 break; 2660 nextToken(); 2661 while (FormatTok->isNot(tok::l_brace)) { 2662 if (FormatTok->is(tok::l_paren)) { 2663 parseParens(); 2664 continue; 2665 } 2666 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2667 if (Style.RemoveBracesLLVM) 2668 NestedTooDeep.pop_back(); 2669 return; 2670 } 2671 nextToken(); 2672 } 2673 NeedsUnwrappedLine = false; 2674 Line->MustBeDeclaration = false; 2675 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2676 parseBlock(); 2677 if (Style.BraceWrapping.BeforeCatch) 2678 addUnwrappedLine(); 2679 else 2680 NeedsUnwrappedLine = true; 2681 } 2682 2683 if (Style.RemoveBracesLLVM) 2684 NestedTooDeep.pop_back(); 2685 2686 if (NeedsUnwrappedLine) 2687 addUnwrappedLine(); 2688 } 2689 2690 void UnwrappedLineParser::parseNamespace() { 2691 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2692 "'namespace' expected"); 2693 2694 const FormatToken &InitialToken = *FormatTok; 2695 nextToken(); 2696 if (InitialToken.is(TT_NamespaceMacro)) { 2697 parseParens(); 2698 } else { 2699 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2700 tok::l_square, tok::period, tok::l_paren) || 2701 (Style.isCSharp() && FormatTok->is(tok::kw_union))) 2702 if (FormatTok->is(tok::l_square)) 2703 parseSquare(); 2704 else if (FormatTok->is(tok::l_paren)) 2705 parseParens(); 2706 else 2707 nextToken(); 2708 } 2709 if (FormatTok->is(tok::l_brace)) { 2710 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2711 addUnwrappedLine(); 2712 2713 unsigned AddLevels = 2714 Style.NamespaceIndentation == FormatStyle::NI_All || 2715 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2716 DeclarationScopeStack.size() > 1) 2717 ? 1u 2718 : 0u; 2719 bool ManageWhitesmithsBraces = 2720 AddLevels == 0u && 2721 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2722 2723 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2724 // the whole block. 2725 if (ManageWhitesmithsBraces) 2726 ++Line->Level; 2727 2728 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 2729 /*KeepBraces=*/true, ManageWhitesmithsBraces); 2730 2731 // Munch the semicolon after a namespace. This is more common than one would 2732 // think. Putting the semicolon into its own line is very ugly. 2733 if (FormatTok->is(tok::semi)) 2734 nextToken(); 2735 2736 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2737 2738 if (ManageWhitesmithsBraces) 2739 --Line->Level; 2740 } 2741 // FIXME: Add error handling. 2742 } 2743 2744 void UnwrappedLineParser::parseNew() { 2745 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2746 nextToken(); 2747 2748 if (Style.isCSharp()) { 2749 do { 2750 if (FormatTok->is(tok::l_brace)) 2751 parseBracedList(); 2752 2753 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2754 return; 2755 2756 nextToken(); 2757 } while (!eof()); 2758 } 2759 2760 if (Style.Language != FormatStyle::LK_Java) 2761 return; 2762 2763 // In Java, we can parse everything up to the parens, which aren't optional. 2764 do { 2765 // There should not be a ;, { or } before the new's open paren. 2766 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2767 return; 2768 2769 // Consume the parens. 2770 if (FormatTok->is(tok::l_paren)) { 2771 parseParens(); 2772 2773 // If there is a class body of an anonymous class, consume that as child. 2774 if (FormatTok->is(tok::l_brace)) 2775 parseChildBlock(); 2776 return; 2777 } 2778 nextToken(); 2779 } while (!eof()); 2780 } 2781 2782 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 2783 keepAncestorBraces(); 2784 2785 if (FormatTok->is(tok::l_brace)) { 2786 if (!KeepBraces) 2787 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2788 FormatToken *LeftBrace = FormatTok; 2789 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2790 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2791 /*MunchSemi=*/true, KeepBraces); 2792 if (!KeepBraces) { 2793 assert(!NestedTooDeep.empty()); 2794 if (!NestedTooDeep.back()) 2795 markOptionalBraces(LeftBrace); 2796 } 2797 if (WrapRightBrace) 2798 addUnwrappedLine(); 2799 } else { 2800 parseUnbracedBody(); 2801 } 2802 2803 if (!KeepBraces) 2804 NestedTooDeep.pop_back(); 2805 } 2806 2807 void UnwrappedLineParser::parseForOrWhileLoop() { 2808 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2809 "'for', 'while' or foreach macro expected"); 2810 const bool KeepBraces = !Style.RemoveBracesLLVM || 2811 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 2812 2813 nextToken(); 2814 // JS' for await ( ... 2815 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2816 nextToken(); 2817 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2818 nextToken(); 2819 if (FormatTok->is(tok::l_paren)) 2820 parseParens(); 2821 2822 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 2823 } 2824 2825 void UnwrappedLineParser::parseDoWhile() { 2826 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 2827 nextToken(); 2828 2829 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 2830 2831 // FIXME: Add error handling. 2832 if (!FormatTok->is(tok::kw_while)) { 2833 addUnwrappedLine(); 2834 return; 2835 } 2836 2837 // If in Whitesmiths mode, the line with the while() needs to be indented 2838 // to the same level as the block. 2839 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2840 ++Line->Level; 2841 2842 nextToken(); 2843 parseStructuralElement(); 2844 } 2845 2846 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2847 nextToken(); 2848 unsigned OldLineLevel = Line->Level; 2849 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2850 --Line->Level; 2851 if (LeftAlignLabel) 2852 Line->Level = 0; 2853 2854 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2855 FormatTok->is(tok::l_brace)) { 2856 2857 CompoundStatementIndenter Indenter(this, Line->Level, 2858 Style.BraceWrapping.AfterCaseLabel, 2859 Style.BraceWrapping.IndentBraces); 2860 parseBlock(); 2861 if (FormatTok->is(tok::kw_break)) { 2862 if (Style.BraceWrapping.AfterControlStatement == 2863 FormatStyle::BWACS_Always) { 2864 addUnwrappedLine(); 2865 if (!Style.IndentCaseBlocks && 2866 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2867 ++Line->Level; 2868 } 2869 parseStructuralElement(); 2870 } 2871 addUnwrappedLine(); 2872 } else { 2873 if (FormatTok->is(tok::semi)) 2874 nextToken(); 2875 addUnwrappedLine(); 2876 } 2877 Line->Level = OldLineLevel; 2878 if (FormatTok->isNot(tok::l_brace)) { 2879 parseStructuralElement(); 2880 addUnwrappedLine(); 2881 } 2882 } 2883 2884 void UnwrappedLineParser::parseCaseLabel() { 2885 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 2886 2887 // FIXME: fix handling of complex expressions here. 2888 do { 2889 nextToken(); 2890 } while (!eof() && !FormatTok->is(tok::colon)); 2891 parseLabel(); 2892 } 2893 2894 void UnwrappedLineParser::parseSwitch() { 2895 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 2896 nextToken(); 2897 if (FormatTok->is(tok::l_paren)) 2898 parseParens(); 2899 2900 keepAncestorBraces(); 2901 2902 if (FormatTok->is(tok::l_brace)) { 2903 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2904 parseBlock(); 2905 addUnwrappedLine(); 2906 } else { 2907 addUnwrappedLine(); 2908 ++Line->Level; 2909 parseStructuralElement(); 2910 --Line->Level; 2911 } 2912 2913 if (Style.RemoveBracesLLVM) 2914 NestedTooDeep.pop_back(); 2915 } 2916 2917 // Operators that can follow a C variable. 2918 static bool isCOperatorFollowingVar(tok::TokenKind kind) { 2919 switch (kind) { 2920 case tok::ampamp: 2921 case tok::ampequal: 2922 case tok::arrow: 2923 case tok::caret: 2924 case tok::caretequal: 2925 case tok::comma: 2926 case tok::ellipsis: 2927 case tok::equal: 2928 case tok::equalequal: 2929 case tok::exclaim: 2930 case tok::exclaimequal: 2931 case tok::greater: 2932 case tok::greaterequal: 2933 case tok::greatergreater: 2934 case tok::greatergreaterequal: 2935 case tok::l_paren: 2936 case tok::l_square: 2937 case tok::less: 2938 case tok::lessequal: 2939 case tok::lessless: 2940 case tok::lesslessequal: 2941 case tok::minus: 2942 case tok::minusequal: 2943 case tok::minusminus: 2944 case tok::percent: 2945 case tok::percentequal: 2946 case tok::period: 2947 case tok::pipe: 2948 case tok::pipeequal: 2949 case tok::pipepipe: 2950 case tok::plus: 2951 case tok::plusequal: 2952 case tok::plusplus: 2953 case tok::question: 2954 case tok::r_brace: 2955 case tok::r_paren: 2956 case tok::r_square: 2957 case tok::semi: 2958 case tok::slash: 2959 case tok::slashequal: 2960 case tok::star: 2961 case tok::starequal: 2962 return true; 2963 default: 2964 return false; 2965 } 2966 } 2967 2968 void UnwrappedLineParser::parseAccessSpecifier() { 2969 FormatToken *AccessSpecifierCandidate = FormatTok; 2970 nextToken(); 2971 // Understand Qt's slots. 2972 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2973 nextToken(); 2974 // Otherwise, we don't know what it is, and we'd better keep the next token. 2975 if (FormatTok->is(tok::colon)) { 2976 nextToken(); 2977 addUnwrappedLine(); 2978 } else if (!FormatTok->is(tok::coloncolon) && 2979 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 2980 // Not a variable name nor namespace name. 2981 addUnwrappedLine(); 2982 } else if (AccessSpecifierCandidate) { 2983 // Consider the access specifier to be a C identifier. 2984 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 2985 } 2986 } 2987 2988 /// \brief Parses a concept definition. 2989 /// \pre The current token has to be the concept keyword. 2990 /// 2991 /// Returns if either the concept has been completely parsed, or if it detects 2992 /// that the concept definition is incorrect. 2993 void UnwrappedLineParser::parseConcept() { 2994 assert(FormatTok->is(tok::kw_concept) && "'concept' expected"); 2995 nextToken(); 2996 if (!FormatTok->is(tok::identifier)) 2997 return; 2998 nextToken(); 2999 if (!FormatTok->is(tok::equal)) 3000 return; 3001 nextToken(); 3002 parseConstraintExpression(); 3003 if (FormatTok->is(tok::semi)) 3004 nextToken(); 3005 addUnwrappedLine(); 3006 } 3007 3008 /// \brief Parses a requires, decides if it is a clause or an expression. 3009 /// \pre The current token has to be the requires keyword. 3010 /// \returns true if it parsed a clause. 3011 bool clang::format::UnwrappedLineParser::parseRequires() { 3012 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3013 auto RequiresToken = FormatTok; 3014 3015 // We try to guess if it is a requires clause, or a requires expression. For 3016 // that we first consume the keyword and check the next token. 3017 nextToken(); 3018 3019 switch (FormatTok->Tok.getKind()) { 3020 case tok::l_brace: 3021 // This can only be an expression, never a clause. 3022 parseRequiresExpression(RequiresToken); 3023 return false; 3024 case tok::l_paren: 3025 // Clauses and expression can start with a paren, it's unclear what we have. 3026 break; 3027 default: 3028 // All other tokens can only be a clause. 3029 parseRequiresClause(RequiresToken); 3030 return true; 3031 } 3032 3033 // Looking forward we would have to decide if there are function declaration 3034 // like arguments to the requires expression: 3035 // requires (T t) { 3036 // Or there is a constraint expression for the requires clause: 3037 // requires (C<T> && ... 3038 3039 // But first let's look behind. 3040 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3041 3042 if (!PreviousNonComment || 3043 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3044 // If there is no token, or an expression left brace, we are a requires 3045 // clause within a requires expression. 3046 parseRequiresClause(RequiresToken); 3047 return true; 3048 } 3049 3050 switch (PreviousNonComment->Tok.getKind()) { 3051 case tok::greater: 3052 case tok::r_paren: 3053 case tok::kw_noexcept: 3054 case tok::kw_const: 3055 // This is a requires clause. 3056 parseRequiresClause(RequiresToken); 3057 return true; 3058 case tok::amp: 3059 case tok::ampamp: { 3060 // This can be either: 3061 // if (... && requires (T t) ...) 3062 // Or 3063 // void member(...) && requires (C<T> ... 3064 // We check the one token before that for a const: 3065 // void member(...) const && requires (C<T> ... 3066 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3067 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3068 parseRequiresClause(RequiresToken); 3069 return true; 3070 } 3071 break; 3072 } 3073 default: 3074 // It's an expression. 3075 parseRequiresExpression(RequiresToken); 3076 return false; 3077 } 3078 3079 // Now we look forward and try to check if the paren content is a parameter 3080 // list. The parameters can be cv-qualified and contain references or 3081 // pointers. 3082 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3083 // of stuff: typename, const, *, &, &&, ::, identifiers. 3084 3085 int NextTokenOffset = 1; 3086 auto NextToken = Tokens->peekNextToken(NextTokenOffset); 3087 auto PeekNext = [&NextTokenOffset, &NextToken, this] { 3088 ++NextTokenOffset; 3089 NextToken = Tokens->peekNextToken(NextTokenOffset); 3090 }; 3091 3092 bool FoundType = false; 3093 bool LastWasColonColon = false; 3094 int OpenAngles = 0; 3095 3096 for (; NextTokenOffset < 50; PeekNext()) { 3097 switch (NextToken->Tok.getKind()) { 3098 case tok::kw_volatile: 3099 case tok::kw_const: 3100 case tok::comma: 3101 parseRequiresExpression(RequiresToken); 3102 return false; 3103 case tok::r_paren: 3104 case tok::pipepipe: 3105 parseRequiresClause(RequiresToken); 3106 return true; 3107 case tok::eof: 3108 // Break out of the loop. 3109 NextTokenOffset = 50; 3110 break; 3111 case tok::coloncolon: 3112 LastWasColonColon = true; 3113 break; 3114 case tok::identifier: 3115 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3116 parseRequiresExpression(RequiresToken); 3117 return false; 3118 } 3119 FoundType = true; 3120 LastWasColonColon = false; 3121 break; 3122 case tok::less: 3123 ++OpenAngles; 3124 break; 3125 case tok::greater: 3126 --OpenAngles; 3127 break; 3128 default: 3129 if (NextToken->isSimpleTypeSpecifier()) { 3130 parseRequiresExpression(RequiresToken); 3131 return false; 3132 } 3133 break; 3134 } 3135 } 3136 3137 // This seems to be a complicated expression, just assume it's a clause. 3138 parseRequiresClause(RequiresToken); 3139 return true; 3140 } 3141 3142 /// \brief Parses a requires clause. 3143 /// \param RequiresToken The requires keyword token, which starts this clause. 3144 /// \pre We need to be on the next token after the requires keyword. 3145 /// \sa parseRequiresExpression 3146 /// 3147 /// Returns if it either has finished parsing the clause, or it detects, that 3148 /// the clause is incorrect. 3149 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3150 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3151 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3152 3153 // If there is no previous token, we are within a requires expression, 3154 // otherwise we will always have the template or function declaration in front 3155 // of it. 3156 bool InRequiresExpression = 3157 !RequiresToken->Previous || 3158 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3159 3160 RequiresToken->setFinalizedType(InRequiresExpression 3161 ? TT_RequiresClauseInARequiresExpression 3162 : TT_RequiresClause); 3163 3164 parseConstraintExpression(); 3165 3166 if (!InRequiresExpression) 3167 FormatTok->Previous->ClosesRequiresClause = true; 3168 } 3169 3170 /// \brief Parses a requires expression. 3171 /// \param RequiresToken The requires keyword token, which starts this clause. 3172 /// \pre We need to be on the next token after the requires keyword. 3173 /// \sa parseRequiresClause 3174 /// 3175 /// Returns if it either has finished parsing the expression, or it detects, 3176 /// that the expression is incorrect. 3177 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3178 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3179 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3180 3181 RequiresToken->setFinalizedType(TT_RequiresExpression); 3182 3183 if (FormatTok->is(tok::l_paren)) { 3184 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3185 parseParens(); 3186 } 3187 3188 if (FormatTok->is(tok::l_brace)) { 3189 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3190 parseChildBlock(/*CanContainBracedList=*/false, 3191 /*NextLBracesType=*/TT_CompoundRequirementLBrace); 3192 } 3193 } 3194 3195 /// \brief Parses a constraint expression. 3196 /// 3197 /// This is either the definition of a concept, or the body of a requires 3198 /// clause. It returns, when the parsing is complete, or the expression is 3199 /// incorrect. 3200 void UnwrappedLineParser::parseConstraintExpression() { 3201 // The special handling for lambdas is needed since tryToParseLambda() eats a 3202 // token and if a requires expression is the last part of a requires clause 3203 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3204 // not set on the correct token. Thus we need to be aware if we even expect a 3205 // lambda to be possible. 3206 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3207 bool LambdaNextTimeAllowed = true; 3208 do { 3209 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3210 3211 switch (FormatTok->Tok.getKind()) { 3212 case tok::kw_requires: { 3213 auto RequiresToken = FormatTok; 3214 nextToken(); 3215 parseRequiresExpression(RequiresToken); 3216 break; 3217 } 3218 3219 case tok::l_paren: 3220 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3221 break; 3222 3223 case tok::l_square: 3224 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3225 return; 3226 break; 3227 3228 case tok::kw_const: 3229 case tok::semi: 3230 case tok::kw_class: 3231 case tok::kw_struct: 3232 case tok::kw_union: 3233 return; 3234 3235 case tok::l_brace: 3236 // Potential function body. 3237 return; 3238 3239 case tok::ampamp: 3240 case tok::pipepipe: 3241 FormatTok->setFinalizedType(TT_BinaryOperator); 3242 nextToken(); 3243 LambdaNextTimeAllowed = true; 3244 break; 3245 3246 case tok::comma: 3247 case tok::comment: 3248 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3249 nextToken(); 3250 break; 3251 3252 case tok::kw_sizeof: 3253 case tok::greater: 3254 case tok::greaterequal: 3255 case tok::greatergreater: 3256 case tok::less: 3257 case tok::lessequal: 3258 case tok::lessless: 3259 case tok::equalequal: 3260 case tok::exclaim: 3261 case tok::exclaimequal: 3262 case tok::plus: 3263 case tok::minus: 3264 case tok::star: 3265 case tok::slash: 3266 case tok::kw_decltype: 3267 LambdaNextTimeAllowed = true; 3268 // Just eat them. 3269 nextToken(); 3270 break; 3271 3272 case tok::numeric_constant: 3273 case tok::coloncolon: 3274 case tok::kw_true: 3275 case tok::kw_false: 3276 // Just eat them. 3277 nextToken(); 3278 break; 3279 3280 case tok::kw_static_cast: 3281 case tok::kw_const_cast: 3282 case tok::kw_reinterpret_cast: 3283 case tok::kw_dynamic_cast: 3284 nextToken(); 3285 if (!FormatTok->is(tok::less)) 3286 return; 3287 3288 nextToken(); 3289 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3290 /*ClosingBraceKind=*/tok::greater); 3291 break; 3292 3293 case tok::kw_bool: 3294 // bool is only allowed if it is directly followed by a paren for a cast: 3295 // concept C = bool(...); 3296 // and bool is the only type, all other types as cast must be inside a 3297 // cast to bool an thus are handled by the other cases. 3298 nextToken(); 3299 if (FormatTok->isNot(tok::l_paren)) 3300 return; 3301 parseParens(); 3302 break; 3303 3304 default: 3305 if (!FormatTok->Tok.getIdentifierInfo()) { 3306 // Identifiers are part of the default case, we check for more then 3307 // tok::identifier to handle builtin type traits. 3308 return; 3309 } 3310 3311 // We need to differentiate identifiers for a template deduction guide, 3312 // variables, or function return types (the constraint expression has 3313 // ended before that), and basically all other cases. But it's easier to 3314 // check the other way around. 3315 assert(FormatTok->Previous); 3316 switch (FormatTok->Previous->Tok.getKind()) { 3317 case tok::coloncolon: // Nested identifier. 3318 case tok::ampamp: // Start of a function or variable for the 3319 case tok::pipepipe: // constraint expression. 3320 case tok::kw_requires: // Initial identifier of a requires clause. 3321 case tok::equal: // Initial identifier of a concept declaration. 3322 break; 3323 default: 3324 return; 3325 } 3326 3327 // Read identifier with optional template declaration. 3328 nextToken(); 3329 if (FormatTok->is(tok::less)) { 3330 nextToken(); 3331 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 3332 /*ClosingBraceKind=*/tok::greater); 3333 } 3334 break; 3335 } 3336 } while (!eof()); 3337 } 3338 3339 bool UnwrappedLineParser::parseEnum() { 3340 const FormatToken &InitialToken = *FormatTok; 3341 3342 // Won't be 'enum' for NS_ENUMs. 3343 if (FormatTok->is(tok::kw_enum)) 3344 nextToken(); 3345 3346 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3347 // declarations. An "enum" keyword followed by a colon would be a syntax 3348 // error and thus assume it is just an identifier. 3349 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3350 return false; 3351 3352 // In protobuf, "enum" can be used as a field name. 3353 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3354 return false; 3355 3356 // Eat up enum class ... 3357 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3358 nextToken(); 3359 3360 while (FormatTok->Tok.getIdentifierInfo() || 3361 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3362 tok::greater, tok::comma, tok::question)) { 3363 nextToken(); 3364 // We can have macros or attributes in between 'enum' and the enum name. 3365 if (FormatTok->is(tok::l_paren)) 3366 parseParens(); 3367 if (FormatTok->is(tok::identifier)) { 3368 nextToken(); 3369 // If there are two identifiers in a row, this is likely an elaborate 3370 // return type. In Java, this can be "implements", etc. 3371 if (Style.isCpp() && FormatTok->is(tok::identifier)) 3372 return false; 3373 } 3374 } 3375 3376 // Just a declaration or something is wrong. 3377 if (FormatTok->isNot(tok::l_brace)) 3378 return true; 3379 FormatTok->setFinalizedType(TT_EnumLBrace); 3380 FormatTok->setBlockKind(BK_Block); 3381 3382 if (Style.Language == FormatStyle::LK_Java) { 3383 // Java enums are different. 3384 parseJavaEnumBody(); 3385 return true; 3386 } 3387 if (Style.Language == FormatStyle::LK_Proto) { 3388 parseBlock(/*MustBeDeclaration=*/true); 3389 return true; 3390 } 3391 3392 if (!Style.AllowShortEnumsOnASingleLine && 3393 ShouldBreakBeforeBrace(Style, InitialToken)) 3394 addUnwrappedLine(); 3395 // Parse enum body. 3396 nextToken(); 3397 if (!Style.AllowShortEnumsOnASingleLine) { 3398 addUnwrappedLine(); 3399 Line->Level += 1; 3400 } 3401 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 3402 /*IsEnum=*/true); 3403 if (!Style.AllowShortEnumsOnASingleLine) 3404 Line->Level -= 1; 3405 if (HasError) { 3406 if (FormatTok->is(tok::semi)) 3407 nextToken(); 3408 addUnwrappedLine(); 3409 } 3410 return true; 3411 3412 // There is no addUnwrappedLine() here so that we fall through to parsing a 3413 // structural element afterwards. Thus, in "enum A {} n, m;", 3414 // "} n, m;" will end up in one unwrapped line. 3415 } 3416 3417 bool UnwrappedLineParser::parseStructLike() { 3418 // parseRecord falls through and does not yet add an unwrapped line as a 3419 // record declaration or definition can start a structural element. 3420 parseRecord(); 3421 // This does not apply to Java, JavaScript and C#. 3422 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3423 Style.isCSharp()) { 3424 if (FormatTok->is(tok::semi)) 3425 nextToken(); 3426 addUnwrappedLine(); 3427 return true; 3428 } 3429 return false; 3430 } 3431 3432 namespace { 3433 // A class used to set and restore the Token position when peeking 3434 // ahead in the token source. 3435 class ScopedTokenPosition { 3436 unsigned StoredPosition; 3437 FormatTokenSource *Tokens; 3438 3439 public: 3440 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3441 assert(Tokens && "Tokens expected to not be null"); 3442 StoredPosition = Tokens->getPosition(); 3443 } 3444 3445 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3446 }; 3447 } // namespace 3448 3449 // Look to see if we have [[ by looking ahead, if 3450 // its not then rewind to the original position. 3451 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3452 ScopedTokenPosition AutoPosition(Tokens); 3453 FormatToken *Tok = Tokens->getNextToken(); 3454 // We already read the first [ check for the second. 3455 if (!Tok->is(tok::l_square)) 3456 return false; 3457 // Double check that the attribute is just something 3458 // fairly simple. 3459 while (Tok->isNot(tok::eof)) { 3460 if (Tok->is(tok::r_square)) 3461 break; 3462 Tok = Tokens->getNextToken(); 3463 } 3464 if (Tok->is(tok::eof)) 3465 return false; 3466 Tok = Tokens->getNextToken(); 3467 if (!Tok->is(tok::r_square)) 3468 return false; 3469 Tok = Tokens->getNextToken(); 3470 if (Tok->is(tok::semi)) 3471 return false; 3472 return true; 3473 } 3474 3475 void UnwrappedLineParser::parseJavaEnumBody() { 3476 assert(FormatTok->is(tok::l_brace)); 3477 const FormatToken *OpeningBrace = FormatTok; 3478 3479 // Determine whether the enum is simple, i.e. does not have a semicolon or 3480 // constants with class bodies. Simple enums can be formatted like braced 3481 // lists, contracted to a single line, etc. 3482 unsigned StoredPosition = Tokens->getPosition(); 3483 bool IsSimple = true; 3484 FormatToken *Tok = Tokens->getNextToken(); 3485 while (!Tok->is(tok::eof)) { 3486 if (Tok->is(tok::r_brace)) 3487 break; 3488 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3489 IsSimple = false; 3490 break; 3491 } 3492 // FIXME: This will also mark enums with braces in the arguments to enum 3493 // constants as "not simple". This is probably fine in practice, though. 3494 Tok = Tokens->getNextToken(); 3495 } 3496 FormatTok = Tokens->setPosition(StoredPosition); 3497 3498 if (IsSimple) { 3499 nextToken(); 3500 parseBracedList(); 3501 addUnwrappedLine(); 3502 return; 3503 } 3504 3505 // Parse the body of a more complex enum. 3506 // First add a line for everything up to the "{". 3507 nextToken(); 3508 addUnwrappedLine(); 3509 ++Line->Level; 3510 3511 // Parse the enum constants. 3512 while (FormatTok) { 3513 if (FormatTok->is(tok::l_brace)) { 3514 // Parse the constant's class body. 3515 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3516 /*MunchSemi=*/false); 3517 } else if (FormatTok->is(tok::l_paren)) { 3518 parseParens(); 3519 } else if (FormatTok->is(tok::comma)) { 3520 nextToken(); 3521 addUnwrappedLine(); 3522 } else if (FormatTok->is(tok::semi)) { 3523 nextToken(); 3524 addUnwrappedLine(); 3525 break; 3526 } else if (FormatTok->is(tok::r_brace)) { 3527 addUnwrappedLine(); 3528 break; 3529 } else { 3530 nextToken(); 3531 } 3532 } 3533 3534 // Parse the class body after the enum's ";" if any. 3535 parseLevel(OpeningBrace, /*CanContainBracedList=*/true); 3536 nextToken(); 3537 --Line->Level; 3538 addUnwrappedLine(); 3539 } 3540 3541 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3542 const FormatToken &InitialToken = *FormatTok; 3543 nextToken(); 3544 3545 // The actual identifier can be a nested name specifier, and in macros 3546 // it is often token-pasted. 3547 // An [[attribute]] can be before the identifier. 3548 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3549 tok::kw___attribute, tok::kw___declspec, 3550 tok::kw_alignas, tok::l_square, tok::r_square) || 3551 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3552 FormatTok->isOneOf(tok::period, tok::comma))) { 3553 if (Style.isJavaScript() && 3554 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3555 // JavaScript/TypeScript supports inline object types in 3556 // extends/implements positions: 3557 // class Foo implements {bar: number} { } 3558 nextToken(); 3559 if (FormatTok->is(tok::l_brace)) { 3560 tryToParseBracedList(); 3561 continue; 3562 } 3563 } 3564 bool IsNonMacroIdentifier = 3565 FormatTok->is(tok::identifier) && 3566 FormatTok->TokenText != FormatTok->TokenText.upper(); 3567 nextToken(); 3568 // We can have macros or attributes in between 'class' and the class name. 3569 if (!IsNonMacroIdentifier) { 3570 if (FormatTok->is(tok::l_paren)) { 3571 parseParens(); 3572 } else if (FormatTok->is(TT_AttributeSquare)) { 3573 parseSquare(); 3574 // Consume the closing TT_AttributeSquare. 3575 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3576 nextToken(); 3577 } 3578 } 3579 } 3580 3581 // Note that parsing away template declarations here leads to incorrectly 3582 // accepting function declarations as record declarations. 3583 // In general, we cannot solve this problem. Consider: 3584 // class A<int> B() {} 3585 // which can be a function definition or a class definition when B() is a 3586 // macro. If we find enough real-world cases where this is a problem, we 3587 // can parse for the 'template' keyword in the beginning of the statement, 3588 // and thus rule out the record production in case there is no template 3589 // (this would still leave us with an ambiguity between template function 3590 // and class declarations). 3591 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3592 do { 3593 if (FormatTok->is(tok::l_brace)) { 3594 calculateBraceTypes(/*ExpectClassBody=*/true); 3595 if (!tryToParseBracedList()) 3596 break; 3597 } 3598 if (FormatTok->is(tok::l_square)) { 3599 FormatToken *Previous = FormatTok->Previous; 3600 if (!Previous || 3601 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) { 3602 // Don't try parsing a lambda if we had a closing parenthesis before, 3603 // it was probably a pointer to an array: int (*)[]. 3604 if (!tryToParseLambda()) 3605 break; 3606 } else { 3607 parseSquare(); 3608 continue; 3609 } 3610 } 3611 if (FormatTok->is(tok::semi)) 3612 return; 3613 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3614 addUnwrappedLine(); 3615 nextToken(); 3616 parseCSharpGenericTypeConstraint(); 3617 break; 3618 } 3619 nextToken(); 3620 } while (!eof()); 3621 } 3622 3623 auto GetBraceType = [](const FormatToken &RecordTok) { 3624 switch (RecordTok.Tok.getKind()) { 3625 case tok::kw_class: 3626 return TT_ClassLBrace; 3627 case tok::kw_struct: 3628 return TT_StructLBrace; 3629 case tok::kw_union: 3630 return TT_UnionLBrace; 3631 default: 3632 // Useful for e.g. interface. 3633 return TT_RecordLBrace; 3634 } 3635 }; 3636 if (FormatTok->is(tok::l_brace)) { 3637 FormatTok->setFinalizedType(GetBraceType(InitialToken)); 3638 if (ParseAsExpr) { 3639 parseChildBlock(); 3640 } else { 3641 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3642 addUnwrappedLine(); 3643 3644 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3645 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3646 } 3647 } 3648 // There is no addUnwrappedLine() here so that we fall through to parsing a 3649 // structural element afterwards. Thus, in "class A {} n, m;", 3650 // "} n, m;" will end up in one unwrapped line. 3651 } 3652 3653 void UnwrappedLineParser::parseObjCMethod() { 3654 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 3655 "'(' or identifier expected."); 3656 do { 3657 if (FormatTok->is(tok::semi)) { 3658 nextToken(); 3659 addUnwrappedLine(); 3660 return; 3661 } else if (FormatTok->is(tok::l_brace)) { 3662 if (Style.BraceWrapping.AfterFunction) 3663 addUnwrappedLine(); 3664 parseBlock(); 3665 addUnwrappedLine(); 3666 return; 3667 } else { 3668 nextToken(); 3669 } 3670 } while (!eof()); 3671 } 3672 3673 void UnwrappedLineParser::parseObjCProtocolList() { 3674 assert(FormatTok->is(tok::less) && "'<' expected."); 3675 do { 3676 nextToken(); 3677 // Early exit in case someone forgot a close angle. 3678 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3679 FormatTok->isObjCAtKeyword(tok::objc_end)) 3680 return; 3681 } while (!eof() && FormatTok->isNot(tok::greater)); 3682 nextToken(); // Skip '>'. 3683 } 3684 3685 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3686 do { 3687 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 3688 nextToken(); 3689 addUnwrappedLine(); 3690 break; 3691 } 3692 if (FormatTok->is(tok::l_brace)) { 3693 parseBlock(); 3694 // In ObjC interfaces, nothing should be following the "}". 3695 addUnwrappedLine(); 3696 } else if (FormatTok->is(tok::r_brace)) { 3697 // Ignore stray "}". parseStructuralElement doesn't consume them. 3698 nextToken(); 3699 addUnwrappedLine(); 3700 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3701 nextToken(); 3702 parseObjCMethod(); 3703 } else { 3704 parseStructuralElement(); 3705 } 3706 } while (!eof()); 3707 } 3708 3709 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3710 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3711 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3712 nextToken(); 3713 nextToken(); // interface name 3714 3715 // @interface can be followed by a lightweight generic 3716 // specialization list, then either a base class or a category. 3717 if (FormatTok->is(tok::less)) 3718 parseObjCLightweightGenerics(); 3719 if (FormatTok->is(tok::colon)) { 3720 nextToken(); 3721 nextToken(); // base class name 3722 // The base class can also have lightweight generics applied to it. 3723 if (FormatTok->is(tok::less)) 3724 parseObjCLightweightGenerics(); 3725 } else if (FormatTok->is(tok::l_paren)) 3726 // Skip category, if present. 3727 parseParens(); 3728 3729 if (FormatTok->is(tok::less)) 3730 parseObjCProtocolList(); 3731 3732 if (FormatTok->is(tok::l_brace)) { 3733 if (Style.BraceWrapping.AfterObjCDeclaration) 3734 addUnwrappedLine(); 3735 parseBlock(/*MustBeDeclaration=*/true); 3736 } 3737 3738 // With instance variables, this puts '}' on its own line. Without instance 3739 // variables, this ends the @interface line. 3740 addUnwrappedLine(); 3741 3742 parseObjCUntilAtEnd(); 3743 } 3744 3745 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3746 assert(FormatTok->is(tok::less)); 3747 // Unlike protocol lists, generic parameterizations support 3748 // nested angles: 3749 // 3750 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3751 // NSObject <NSCopying, NSSecureCoding> 3752 // 3753 // so we need to count how many open angles we have left. 3754 unsigned NumOpenAngles = 1; 3755 do { 3756 nextToken(); 3757 // Early exit in case someone forgot a close angle. 3758 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3759 FormatTok->isObjCAtKeyword(tok::objc_end)) 3760 break; 3761 if (FormatTok->is(tok::less)) 3762 ++NumOpenAngles; 3763 else if (FormatTok->is(tok::greater)) { 3764 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3765 --NumOpenAngles; 3766 } 3767 } while (!eof() && NumOpenAngles != 0); 3768 nextToken(); // Skip '>'. 3769 } 3770 3771 // Returns true for the declaration/definition form of @protocol, 3772 // false for the expression form. 3773 bool UnwrappedLineParser::parseObjCProtocol() { 3774 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3775 nextToken(); 3776 3777 if (FormatTok->is(tok::l_paren)) 3778 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3779 return false; 3780 3781 // The definition/declaration form, 3782 // @protocol Foo 3783 // - (int)someMethod; 3784 // @end 3785 3786 nextToken(); // protocol name 3787 3788 if (FormatTok->is(tok::less)) 3789 parseObjCProtocolList(); 3790 3791 // Check for protocol declaration. 3792 if (FormatTok->is(tok::semi)) { 3793 nextToken(); 3794 addUnwrappedLine(); 3795 return true; 3796 } 3797 3798 addUnwrappedLine(); 3799 parseObjCUntilAtEnd(); 3800 return true; 3801 } 3802 3803 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3804 bool IsImport = FormatTok->is(Keywords.kw_import); 3805 assert(IsImport || FormatTok->is(tok::kw_export)); 3806 nextToken(); 3807 3808 // Consume the "default" in "export default class/function". 3809 if (FormatTok->is(tok::kw_default)) 3810 nextToken(); 3811 3812 // Consume "async function", "function" and "default function", so that these 3813 // get parsed as free-standing JS functions, i.e. do not require a trailing 3814 // semicolon. 3815 if (FormatTok->is(Keywords.kw_async)) 3816 nextToken(); 3817 if (FormatTok->is(Keywords.kw_function)) { 3818 nextToken(); 3819 return; 3820 } 3821 3822 // For imports, `export *`, `export {...}`, consume the rest of the line up 3823 // to the terminating `;`. For everything else, just return and continue 3824 // parsing the structural element, i.e. the declaration or expression for 3825 // `export default`. 3826 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3827 !FormatTok->isStringLiteral()) 3828 return; 3829 3830 while (!eof()) { 3831 if (FormatTok->is(tok::semi)) 3832 return; 3833 if (Line->Tokens.empty()) { 3834 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3835 // import statement should terminate. 3836 return; 3837 } 3838 if (FormatTok->is(tok::l_brace)) { 3839 FormatTok->setBlockKind(BK_Block); 3840 nextToken(); 3841 parseBracedList(); 3842 } else { 3843 nextToken(); 3844 } 3845 } 3846 } 3847 3848 void UnwrappedLineParser::parseStatementMacro() { 3849 nextToken(); 3850 if (FormatTok->is(tok::l_paren)) 3851 parseParens(); 3852 if (FormatTok->is(tok::semi)) 3853 nextToken(); 3854 addUnwrappedLine(); 3855 } 3856 3857 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3858 StringRef Prefix = "") { 3859 llvm::dbgs() << Prefix << "Line(" << Line.Level 3860 << ", FSC=" << Line.FirstStartColumn << ")" 3861 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3862 for (const auto &Node : Line.Tokens) { 3863 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3864 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3865 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3866 } 3867 for (const auto &Node : Line.Tokens) 3868 for (const auto &ChildNode : Node.Children) 3869 printDebugInfo(ChildNode, "\nChild: "); 3870 3871 llvm::dbgs() << "\n"; 3872 } 3873 3874 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3875 if (Line->Tokens.empty()) 3876 return; 3877 LLVM_DEBUG({ 3878 if (CurrentLines == &Lines) 3879 printDebugInfo(*Line); 3880 }); 3881 3882 // If this line closes a block when in Whitesmiths mode, remember that 3883 // information so that the level can be decreased after the line is added. 3884 // This has to happen after the addition of the line since the line itself 3885 // needs to be indented. 3886 bool ClosesWhitesmithsBlock = 3887 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3888 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3889 3890 CurrentLines->push_back(std::move(*Line)); 3891 Line->Tokens.clear(); 3892 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3893 Line->FirstStartColumn = 0; 3894 3895 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3896 --Line->Level; 3897 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3898 CurrentLines->append( 3899 std::make_move_iterator(PreprocessorDirectives.begin()), 3900 std::make_move_iterator(PreprocessorDirectives.end())); 3901 PreprocessorDirectives.clear(); 3902 } 3903 // Disconnect the current token from the last token on the previous line. 3904 FormatTok->Previous = nullptr; 3905 } 3906 3907 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 3908 3909 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3910 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3911 FormatTok.NewlinesBefore > 0; 3912 } 3913 3914 // Checks if \p FormatTok is a line comment that continues the line comment 3915 // section on \p Line. 3916 static bool 3917 continuesLineCommentSection(const FormatToken &FormatTok, 3918 const UnwrappedLine &Line, 3919 const llvm::Regex &CommentPragmasRegex) { 3920 if (Line.Tokens.empty()) 3921 return false; 3922 3923 StringRef IndentContent = FormatTok.TokenText; 3924 if (FormatTok.TokenText.startswith("//") || 3925 FormatTok.TokenText.startswith("/*")) 3926 IndentContent = FormatTok.TokenText.substr(2); 3927 if (CommentPragmasRegex.match(IndentContent)) 3928 return false; 3929 3930 // If Line starts with a line comment, then FormatTok continues the comment 3931 // section if its original column is greater or equal to the original start 3932 // column of the line. 3933 // 3934 // Define the min column token of a line as follows: if a line ends in '{' or 3935 // contains a '{' followed by a line comment, then the min column token is 3936 // that '{'. Otherwise, the min column token of the line is the first token of 3937 // the line. 3938 // 3939 // If Line starts with a token other than a line comment, then FormatTok 3940 // continues the comment section if its original column is greater than the 3941 // original start column of the min column token of the line. 3942 // 3943 // For example, the second line comment continues the first in these cases: 3944 // 3945 // // first line 3946 // // second line 3947 // 3948 // and: 3949 // 3950 // // first line 3951 // // second line 3952 // 3953 // and: 3954 // 3955 // int i; // first line 3956 // // second line 3957 // 3958 // and: 3959 // 3960 // do { // first line 3961 // // second line 3962 // int i; 3963 // } while (true); 3964 // 3965 // and: 3966 // 3967 // enum { 3968 // a, // first line 3969 // // second line 3970 // b 3971 // }; 3972 // 3973 // The second line comment doesn't continue the first in these cases: 3974 // 3975 // // first line 3976 // // second line 3977 // 3978 // and: 3979 // 3980 // int i; // first line 3981 // // second line 3982 // 3983 // and: 3984 // 3985 // do { // first line 3986 // // second line 3987 // int i; 3988 // } while (true); 3989 // 3990 // and: 3991 // 3992 // enum { 3993 // a, // first line 3994 // // second line 3995 // }; 3996 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3997 3998 // Scan for '{//'. If found, use the column of '{' as a min column for line 3999 // comment section continuation. 4000 const FormatToken *PreviousToken = nullptr; 4001 for (const UnwrappedLineNode &Node : Line.Tokens) { 4002 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4003 isLineComment(*Node.Tok)) { 4004 MinColumnToken = PreviousToken; 4005 break; 4006 } 4007 PreviousToken = Node.Tok; 4008 4009 // Grab the last newline preceding a token in this unwrapped line. 4010 if (Node.Tok->NewlinesBefore > 0) 4011 MinColumnToken = Node.Tok; 4012 } 4013 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4014 MinColumnToken = PreviousToken; 4015 4016 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4017 MinColumnToken); 4018 } 4019 4020 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4021 bool JustComments = Line->Tokens.empty(); 4022 for (FormatToken *Tok : CommentsBeforeNextToken) { 4023 // Line comments that belong to the same line comment section are put on the 4024 // same line since later we might want to reflow content between them. 4025 // Additional fine-grained breaking of line comment sections is controlled 4026 // by the class BreakableLineCommentSection in case it is desirable to keep 4027 // several line comment sections in the same unwrapped line. 4028 // 4029 // FIXME: Consider putting separate line comment sections as children to the 4030 // unwrapped line instead. 4031 Tok->ContinuesLineCommentSection = 4032 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4033 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4034 addUnwrappedLine(); 4035 pushToken(Tok); 4036 } 4037 if (NewlineBeforeNext && JustComments) 4038 addUnwrappedLine(); 4039 CommentsBeforeNextToken.clear(); 4040 } 4041 4042 void UnwrappedLineParser::nextToken(int LevelDifference) { 4043 if (eof()) 4044 return; 4045 flushComments(isOnNewLine(*FormatTok)); 4046 pushToken(FormatTok); 4047 FormatToken *Previous = FormatTok; 4048 if (!Style.isJavaScript()) 4049 readToken(LevelDifference); 4050 else 4051 readTokenWithJavaScriptASI(); 4052 FormatTok->Previous = Previous; 4053 } 4054 4055 void UnwrappedLineParser::distributeComments( 4056 const SmallVectorImpl<FormatToken *> &Comments, 4057 const FormatToken *NextTok) { 4058 // Whether or not a line comment token continues a line is controlled by 4059 // the method continuesLineCommentSection, with the following caveat: 4060 // 4061 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4062 // that each comment line from the trail is aligned with the next token, if 4063 // the next token exists. If a trail exists, the beginning of the maximal 4064 // trail is marked as a start of a new comment section. 4065 // 4066 // For example in this code: 4067 // 4068 // int a; // line about a 4069 // // line 1 about b 4070 // // line 2 about b 4071 // int b; 4072 // 4073 // the two lines about b form a maximal trail, so there are two sections, the 4074 // first one consisting of the single comment "// line about a" and the 4075 // second one consisting of the next two comments. 4076 if (Comments.empty()) 4077 return; 4078 bool ShouldPushCommentsInCurrentLine = true; 4079 bool HasTrailAlignedWithNextToken = false; 4080 unsigned StartOfTrailAlignedWithNextToken = 0; 4081 if (NextTok) { 4082 // We are skipping the first element intentionally. 4083 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4084 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4085 HasTrailAlignedWithNextToken = true; 4086 StartOfTrailAlignedWithNextToken = i; 4087 } 4088 } 4089 } 4090 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4091 FormatToken *FormatTok = Comments[i]; 4092 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4093 FormatTok->ContinuesLineCommentSection = false; 4094 } else { 4095 FormatTok->ContinuesLineCommentSection = 4096 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4097 } 4098 if (!FormatTok->ContinuesLineCommentSection && 4099 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) 4100 ShouldPushCommentsInCurrentLine = false; 4101 if (ShouldPushCommentsInCurrentLine) 4102 pushToken(FormatTok); 4103 else 4104 CommentsBeforeNextToken.push_back(FormatTok); 4105 } 4106 } 4107 4108 void UnwrappedLineParser::readToken(int LevelDifference) { 4109 SmallVector<FormatToken *, 1> Comments; 4110 bool PreviousWasComment = false; 4111 bool FirstNonCommentOnLine = false; 4112 do { 4113 FormatTok = Tokens->getNextToken(); 4114 assert(FormatTok); 4115 while (FormatTok->getType() == TT_ConflictStart || 4116 FormatTok->getType() == TT_ConflictEnd || 4117 FormatTok->getType() == TT_ConflictAlternative) { 4118 if (FormatTok->getType() == TT_ConflictStart) 4119 conditionalCompilationStart(/*Unreachable=*/false); 4120 else if (FormatTok->getType() == TT_ConflictAlternative) 4121 conditionalCompilationAlternative(); 4122 else if (FormatTok->getType() == TT_ConflictEnd) 4123 conditionalCompilationEnd(); 4124 FormatTok = Tokens->getNextToken(); 4125 FormatTok->MustBreakBefore = true; 4126 } 4127 4128 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4129 const FormatToken &Tok, 4130 bool PreviousWasComment) { 4131 auto IsFirstOnLine = [](const FormatToken &Tok) { 4132 return Tok.HasUnescapedNewline || Tok.IsFirst; 4133 }; 4134 4135 // Consider preprocessor directives preceded by block comments as first 4136 // on line. 4137 if (PreviousWasComment) 4138 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4139 return IsFirstOnLine(Tok); 4140 }; 4141 4142 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4143 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4144 PreviousWasComment = FormatTok->is(tok::comment); 4145 4146 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4147 FirstNonCommentOnLine) { 4148 distributeComments(Comments, FormatTok); 4149 Comments.clear(); 4150 // If there is an unfinished unwrapped line, we flush the preprocessor 4151 // directives only after that unwrapped line was finished later. 4152 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4153 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4154 assert((LevelDifference >= 0 || 4155 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4156 "LevelDifference makes Line->Level negative"); 4157 Line->Level += LevelDifference; 4158 // Comments stored before the preprocessor directive need to be output 4159 // before the preprocessor directive, at the same level as the 4160 // preprocessor directive, as we consider them to apply to the directive. 4161 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4162 PPBranchLevel > 0) 4163 Line->Level += PPBranchLevel; 4164 flushComments(isOnNewLine(*FormatTok)); 4165 parsePPDirective(); 4166 PreviousWasComment = FormatTok->is(tok::comment); 4167 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4168 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4169 } 4170 4171 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4172 !Line->InPPDirective) 4173 continue; 4174 4175 if (!FormatTok->is(tok::comment)) { 4176 distributeComments(Comments, FormatTok); 4177 Comments.clear(); 4178 return; 4179 } 4180 4181 Comments.push_back(FormatTok); 4182 } while (!eof()); 4183 4184 distributeComments(Comments, nullptr); 4185 Comments.clear(); 4186 } 4187 4188 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 4189 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 4190 if (MustBreakBeforeNextToken) { 4191 Line->Tokens.back().Tok->MustBreakBefore = true; 4192 MustBreakBeforeNextToken = false; 4193 } 4194 } 4195 4196 } // end namespace format 4197 } // end namespace clang 4198