1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-parser" 25 26 namespace clang { 27 namespace format { 28 29 class FormatTokenSource { 30 public: 31 virtual ~FormatTokenSource() {} 32 33 // Returns the next token in the token stream. 34 virtual FormatToken *getNextToken() = 0; 35 36 // Returns the token preceding the token returned by the last call to 37 // getNextToken() in the token stream, or nullptr if no such token exists. 38 virtual FormatToken *getPreviousToken() = 0; 39 40 // Returns the token that would be returned by the next call to 41 // getNextToken(). 42 virtual FormatToken *peekNextToken() = 0; 43 44 // Returns whether we are at the end of the file. 45 // This can be different from whether getNextToken() returned an eof token 46 // when the FormatTokenSource is a view on a part of the token stream. 47 virtual bool isEOF() = 0; 48 49 // Gets the current position in the token stream, to be used by setPosition(). 50 virtual unsigned getPosition() = 0; 51 52 // Resets the token stream to the state it was in when getPosition() returned 53 // Position, and return the token at that position in the stream. 54 virtual FormatToken *setPosition(unsigned Position) = 0; 55 }; 56 57 namespace { 58 59 class ScopedDeclarationState { 60 public: 61 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 62 bool MustBeDeclaration) 63 : Line(Line), Stack(Stack) { 64 Line.MustBeDeclaration = MustBeDeclaration; 65 Stack.push_back(MustBeDeclaration); 66 } 67 ~ScopedDeclarationState() { 68 Stack.pop_back(); 69 if (!Stack.empty()) 70 Line.MustBeDeclaration = Stack.back(); 71 else 72 Line.MustBeDeclaration = true; 73 } 74 75 private: 76 UnwrappedLine &Line; 77 llvm::BitVector &Stack; 78 }; 79 80 static bool isLineComment(const FormatToken &FormatTok) { 81 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 82 } 83 84 // Checks if \p FormatTok is a line comment that continues the line comment 85 // \p Previous. The original column of \p MinColumnToken is used to determine 86 // whether \p FormatTok is indented enough to the right to continue \p Previous. 87 static bool continuesLineComment(const FormatToken &FormatTok, 88 const FormatToken *Previous, 89 const FormatToken *MinColumnToken) { 90 if (!Previous || !MinColumnToken) 91 return false; 92 unsigned MinContinueColumn = 93 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 94 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 95 isLineComment(*Previous) && 96 FormatTok.OriginalColumn >= MinContinueColumn; 97 } 98 99 class ScopedMacroState : public FormatTokenSource { 100 public: 101 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 102 FormatToken *&ResetToken) 103 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 104 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 105 Token(nullptr), PreviousToken(nullptr) { 106 FakeEOF.Tok.startToken(); 107 FakeEOF.Tok.setKind(tok::eof); 108 TokenSource = this; 109 Line.Level = 0; 110 Line.InPPDirective = true; 111 } 112 113 ~ScopedMacroState() override { 114 TokenSource = PreviousTokenSource; 115 ResetToken = Token; 116 Line.InPPDirective = false; 117 Line.Level = PreviousLineLevel; 118 } 119 120 FormatToken *getNextToken() override { 121 // The \c UnwrappedLineParser guards against this by never calling 122 // \c getNextToken() after it has encountered the first eof token. 123 assert(!eof()); 124 PreviousToken = Token; 125 Token = PreviousTokenSource->getNextToken(); 126 if (eof()) 127 return &FakeEOF; 128 return Token; 129 } 130 131 FormatToken *getPreviousToken() override { 132 return PreviousTokenSource->getPreviousToken(); 133 } 134 135 FormatToken *peekNextToken() override { 136 if (eof()) 137 return &FakeEOF; 138 return PreviousTokenSource->peekNextToken(); 139 } 140 141 bool isEOF() override { return PreviousTokenSource->isEOF(); } 142 143 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 144 145 FormatToken *setPosition(unsigned Position) override { 146 PreviousToken = nullptr; 147 Token = PreviousTokenSource->setPosition(Position); 148 return Token; 149 } 150 151 private: 152 bool eof() { 153 return Token && Token->HasUnescapedNewline && 154 !continuesLineComment(*Token, PreviousToken, 155 /*MinColumnToken=*/PreviousToken); 156 } 157 158 FormatToken FakeEOF; 159 UnwrappedLine &Line; 160 FormatTokenSource *&TokenSource; 161 FormatToken *&ResetToken; 162 unsigned PreviousLineLevel; 163 FormatTokenSource *PreviousTokenSource; 164 165 FormatToken *Token; 166 FormatToken *PreviousToken; 167 }; 168 169 } // end anonymous namespace 170 171 class ScopedLineState { 172 public: 173 ScopedLineState(UnwrappedLineParser &Parser, 174 bool SwitchToPreprocessorLines = false) 175 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 176 if (SwitchToPreprocessorLines) 177 Parser.CurrentLines = &Parser.PreprocessorDirectives; 178 else if (!Parser.Line->Tokens.empty()) 179 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 180 PreBlockLine = std::move(Parser.Line); 181 Parser.Line = std::make_unique<UnwrappedLine>(); 182 Parser.Line->Level = PreBlockLine->Level; 183 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 184 } 185 186 ~ScopedLineState() { 187 if (!Parser.Line->Tokens.empty()) { 188 Parser.addUnwrappedLine(); 189 } 190 assert(Parser.Line->Tokens.empty()); 191 Parser.Line = std::move(PreBlockLine); 192 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 193 Parser.MustBreakBeforeNextToken = true; 194 Parser.CurrentLines = OriginalLines; 195 } 196 197 private: 198 UnwrappedLineParser &Parser; 199 200 std::unique_ptr<UnwrappedLine> PreBlockLine; 201 SmallVectorImpl<UnwrappedLine> *OriginalLines; 202 }; 203 204 class CompoundStatementIndenter { 205 public: 206 CompoundStatementIndenter(UnwrappedLineParser *Parser, 207 const FormatStyle &Style, unsigned &LineLevel) 208 : CompoundStatementIndenter(Parser, LineLevel, 209 Style.BraceWrapping.AfterControlStatement, 210 Style.BraceWrapping.IndentBraces) {} 211 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 212 bool WrapBrace, bool IndentBrace) 213 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 214 if (WrapBrace) 215 Parser->addUnwrappedLine(); 216 if (IndentBrace) 217 ++LineLevel; 218 } 219 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 220 221 private: 222 unsigned &LineLevel; 223 unsigned OldLineLevel; 224 }; 225 226 namespace { 227 228 class IndexedTokenSource : public FormatTokenSource { 229 public: 230 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 231 : Tokens(Tokens), Position(-1) {} 232 233 FormatToken *getNextToken() override { 234 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 235 LLVM_DEBUG({ 236 llvm::dbgs() << "Next "; 237 dbgToken(Position); 238 }); 239 return Tokens[Position]; 240 } 241 ++Position; 242 LLVM_DEBUG({ 243 llvm::dbgs() << "Next "; 244 dbgToken(Position); 245 }); 246 return Tokens[Position]; 247 } 248 249 FormatToken *getPreviousToken() override { 250 return Position > 0 ? Tokens[Position - 1] : nullptr; 251 } 252 253 FormatToken *peekNextToken() override { 254 int Next = Position + 1; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Peeking "; 257 dbgToken(Next); 258 }); 259 return Tokens[Next]; 260 } 261 262 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 263 264 unsigned getPosition() override { 265 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 266 assert(Position >= 0); 267 return Position; 268 } 269 270 FormatToken *setPosition(unsigned P) override { 271 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 272 Position = P; 273 return Tokens[Position]; 274 } 275 276 void reset() { Position = -1; } 277 278 private: 279 void dbgToken(int Position, llvm::StringRef Indent = "") { 280 FormatToken *Tok = Tokens[Position]; 281 llvm::dbgs() << Indent << "[" << Position 282 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 283 << ", Macro: " << !!Tok->MacroCtx << "\n"; 284 } 285 286 ArrayRef<FormatToken *> Tokens; 287 int Position; 288 }; 289 290 } // end anonymous namespace 291 292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 293 const AdditionalKeywords &Keywords, 294 unsigned FirstStartColumn, 295 ArrayRef<FormatToken *> Tokens, 296 UnwrappedLineConsumer &Callback) 297 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 298 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 299 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 300 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 301 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 302 ? IG_Rejected 303 : IG_Inited), 304 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 305 306 void UnwrappedLineParser::reset() { 307 PPBranchLevel = -1; 308 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 309 ? IG_Rejected 310 : IG_Inited; 311 IncludeGuardToken = nullptr; 312 Line.reset(new UnwrappedLine); 313 CommentsBeforeNextToken.clear(); 314 FormatTok = nullptr; 315 MustBreakBeforeNextToken = false; 316 PreprocessorDirectives.clear(); 317 CurrentLines = &Lines; 318 DeclarationScopeStack.clear(); 319 NestedTooDeep.clear(); 320 PPStack.clear(); 321 Line->FirstStartColumn = FirstStartColumn; 322 } 323 324 void UnwrappedLineParser::parse() { 325 IndexedTokenSource TokenSource(AllTokens); 326 Line->FirstStartColumn = FirstStartColumn; 327 do { 328 LLVM_DEBUG(llvm::dbgs() << "----\n"); 329 reset(); 330 Tokens = &TokenSource; 331 TokenSource.reset(); 332 333 readToken(); 334 parseFile(); 335 336 // If we found an include guard then all preprocessor directives (other than 337 // the guard) are over-indented by one. 338 if (IncludeGuard == IG_Found) 339 for (auto &Line : Lines) 340 if (Line.InPPDirective && Line.Level > 0) 341 --Line.Level; 342 343 // Create line with eof token. 344 pushToken(FormatTok); 345 addUnwrappedLine(); 346 347 for (const UnwrappedLine &Line : Lines) 348 Callback.consumeUnwrappedLine(Line); 349 350 Callback.finishRun(); 351 Lines.clear(); 352 while (!PPLevelBranchIndex.empty() && 353 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 354 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 355 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 356 } 357 if (!PPLevelBranchIndex.empty()) { 358 ++PPLevelBranchIndex.back(); 359 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 360 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 361 } 362 } while (!PPLevelBranchIndex.empty()); 363 } 364 365 void UnwrappedLineParser::parseFile() { 366 // The top-level context in a file always has declarations, except for pre- 367 // processor directives and JavaScript files. 368 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 369 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 370 MustBeDeclaration); 371 if (Style.Language == FormatStyle::LK_TextProto) 372 parseBracedList(); 373 else 374 parseLevel(/*HasOpeningBrace=*/false); 375 // Make sure to format the remaining tokens. 376 // 377 // LK_TextProto is special since its top-level is parsed as the body of a 378 // braced list, which does not necessarily have natural line separators such 379 // as a semicolon. Comments after the last entry that have been determined to 380 // not belong to that line, as in: 381 // key: value 382 // // endfile comment 383 // do not have a chance to be put on a line of their own until this point. 384 // Here we add this newline before end-of-file comments. 385 if (Style.Language == FormatStyle::LK_TextProto && 386 !CommentsBeforeNextToken.empty()) 387 addUnwrappedLine(); 388 flushComments(true); 389 addUnwrappedLine(); 390 } 391 392 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 393 do { 394 switch (FormatTok->Tok.getKind()) { 395 case tok::l_brace: 396 return; 397 default: 398 if (FormatTok->is(Keywords.kw_where)) { 399 addUnwrappedLine(); 400 nextToken(); 401 parseCSharpGenericTypeConstraint(); 402 break; 403 } 404 nextToken(); 405 break; 406 } 407 } while (!eof()); 408 } 409 410 void UnwrappedLineParser::parseCSharpAttribute() { 411 int UnpairedSquareBrackets = 1; 412 do { 413 switch (FormatTok->Tok.getKind()) { 414 case tok::r_square: 415 nextToken(); 416 --UnpairedSquareBrackets; 417 if (UnpairedSquareBrackets == 0) { 418 addUnwrappedLine(); 419 return; 420 } 421 break; 422 case tok::l_square: 423 ++UnpairedSquareBrackets; 424 nextToken(); 425 break; 426 default: 427 nextToken(); 428 break; 429 } 430 } while (!eof()); 431 } 432 433 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 434 if (!Lines.empty() && Lines.back().InPPDirective) 435 return true; 436 437 const FormatToken *Previous = Tokens->getPreviousToken(); 438 return Previous && Previous->is(tok::comment) && 439 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 440 } 441 442 bool UnwrappedLineParser::mightFitOnOneLine() const { 443 const auto ColumnLimit = Style.ColumnLimit; 444 if (ColumnLimit == 0) 445 return true; 446 447 if (Lines.empty()) 448 return true; 449 450 const auto &PreviousLine = Lines.back(); 451 const auto &Tokens = PreviousLine.Tokens; 452 assert(!Tokens.empty()); 453 const auto *LastToken = Tokens.back().Tok; 454 assert(LastToken); 455 if (!LastToken->isOneOf(tok::semi, tok::comment)) 456 return true; 457 458 AnnotatedLine Line(PreviousLine); 459 assert(Line.Last == LastToken); 460 461 TokenAnnotator Annotator(Style, Keywords); 462 Annotator.annotate(Line); 463 Annotator.calculateFormattingInformation(Line); 464 465 return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit; 466 } 467 468 // Returns true if a simple block, or false otherwise. (A simple block has a 469 // single statement that fits on a single line.) 470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) { 471 const bool IsPrecededByCommentOrPPDirective = 472 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 473 unsigned StatementCount = 0; 474 bool SwitchLabelEncountered = false; 475 do { 476 tok::TokenKind kind = FormatTok->Tok.getKind(); 477 if (FormatTok->getType() == TT_MacroBlockBegin) { 478 kind = tok::l_brace; 479 } else if (FormatTok->getType() == TT_MacroBlockEnd) { 480 kind = tok::r_brace; 481 } 482 483 switch (kind) { 484 case tok::comment: 485 nextToken(); 486 addUnwrappedLine(); 487 break; 488 case tok::l_brace: 489 // FIXME: Add parameter whether this can happen - if this happens, we must 490 // be in a non-declaration context. 491 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 492 continue; 493 parseBlock(); 494 ++StatementCount; 495 assert(StatementCount > 0 && "StatementCount overflow!"); 496 addUnwrappedLine(); 497 break; 498 case tok::r_brace: 499 if (HasOpeningBrace) { 500 if (!Style.RemoveBracesLLVM) 501 return false; 502 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || 503 IsPrecededByCommentOrPPDirective || 504 precededByCommentOrPPDirective()) { 505 return false; 506 } 507 const FormatToken *Next = Tokens->peekNextToken(); 508 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 509 return false; 510 return mightFitOnOneLine(); 511 } 512 nextToken(); 513 addUnwrappedLine(); 514 break; 515 case tok::kw_default: { 516 unsigned StoredPosition = Tokens->getPosition(); 517 FormatToken *Next; 518 do { 519 Next = Tokens->getNextToken(); 520 } while (Next->is(tok::comment)); 521 FormatTok = Tokens->setPosition(StoredPosition); 522 if (Next && Next->isNot(tok::colon)) { 523 // default not followed by ':' is not a case label; treat it like 524 // an identifier. 525 parseStructuralElement(); 526 break; 527 } 528 // Else, if it is 'default:', fall through to the case handling. 529 LLVM_FALLTHROUGH; 530 } 531 case tok::kw_case: 532 if (Style.isJavaScript() && Line->MustBeDeclaration) { 533 // A 'case: string' style field declaration. 534 parseStructuralElement(); 535 break; 536 } 537 if (!SwitchLabelEncountered && 538 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 539 ++Line->Level; 540 SwitchLabelEncountered = true; 541 parseStructuralElement(); 542 break; 543 case tok::l_square: 544 if (Style.isCSharp()) { 545 nextToken(); 546 parseCSharpAttribute(); 547 break; 548 } 549 LLVM_FALLTHROUGH; 550 default: 551 parseStructuralElement(IfKind, !HasOpeningBrace); 552 ++StatementCount; 553 assert(StatementCount > 0 && "StatementCount overflow!"); 554 break; 555 } 556 } while (!eof()); 557 return false; 558 } 559 560 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 561 // We'll parse forward through the tokens until we hit 562 // a closing brace or eof - note that getNextToken() will 563 // parse macros, so this will magically work inside macro 564 // definitions, too. 565 unsigned StoredPosition = Tokens->getPosition(); 566 FormatToken *Tok = FormatTok; 567 const FormatToken *PrevTok = Tok->Previous; 568 // Keep a stack of positions of lbrace tokens. We will 569 // update information about whether an lbrace starts a 570 // braced init list or a different block during the loop. 571 SmallVector<FormatToken *, 8> LBraceStack; 572 assert(Tok->Tok.is(tok::l_brace)); 573 do { 574 // Get next non-comment token. 575 FormatToken *NextTok; 576 unsigned ReadTokens = 0; 577 do { 578 NextTok = Tokens->getNextToken(); 579 ++ReadTokens; 580 } while (NextTok->is(tok::comment)); 581 582 switch (Tok->Tok.getKind()) { 583 case tok::l_brace: 584 if (Style.isJavaScript() && PrevTok) { 585 if (PrevTok->isOneOf(tok::colon, tok::less)) 586 // A ':' indicates this code is in a type, or a braced list 587 // following a label in an object literal ({a: {b: 1}}). 588 // A '<' could be an object used in a comparison, but that is nonsense 589 // code (can never return true), so more likely it is a generic type 590 // argument (`X<{a: string; b: number}>`). 591 // The code below could be confused by semicolons between the 592 // individual members in a type member list, which would normally 593 // trigger BK_Block. In both cases, this must be parsed as an inline 594 // braced init. 595 Tok->setBlockKind(BK_BracedInit); 596 else if (PrevTok->is(tok::r_paren)) 597 // `) { }` can only occur in function or method declarations in JS. 598 Tok->setBlockKind(BK_Block); 599 } else { 600 Tok->setBlockKind(BK_Unknown); 601 } 602 LBraceStack.push_back(Tok); 603 break; 604 case tok::r_brace: 605 if (LBraceStack.empty()) 606 break; 607 if (LBraceStack.back()->is(BK_Unknown)) { 608 bool ProbablyBracedList = false; 609 if (Style.Language == FormatStyle::LK_Proto) { 610 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 611 } else { 612 // Skip NextTok over preprocessor lines, otherwise we may not 613 // properly diagnose the block as a braced intializer 614 // if the comma separator appears after the pp directive. 615 while (NextTok->is(tok::hash)) { 616 ScopedMacroState MacroState(*Line, Tokens, NextTok); 617 do { 618 NextTok = Tokens->getNextToken(); 619 ++ReadTokens; 620 } while (NextTok->isNot(tok::eof)); 621 } 622 623 // Using OriginalColumn to distinguish between ObjC methods and 624 // binary operators is a bit hacky. 625 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 626 NextTok->OriginalColumn == 0; 627 628 // If there is a comma, semicolon or right paren after the closing 629 // brace, we assume this is a braced initializer list. Note that 630 // regardless how we mark inner braces here, we will overwrite the 631 // BlockKind later if we parse a braced list (where all blocks 632 // inside are by default braced lists), or when we explicitly detect 633 // blocks (for example while parsing lambdas). 634 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 635 // braced list in JS. 636 ProbablyBracedList = 637 (Style.isJavaScript() && 638 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 639 Keywords.kw_as)) || 640 (Style.isCpp() && NextTok->is(tok::l_paren)) || 641 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 642 tok::r_paren, tok::r_square, tok::l_brace, 643 tok::ellipsis) || 644 (NextTok->is(tok::identifier) && 645 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 646 (NextTok->is(tok::semi) && 647 (!ExpectClassBody || LBraceStack.size() != 1)) || 648 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 649 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 650 // We can have an array subscript after a braced init 651 // list, but C++11 attributes are expected after blocks. 652 NextTok = Tokens->getNextToken(); 653 ++ReadTokens; 654 ProbablyBracedList = NextTok->isNot(tok::l_square); 655 } 656 } 657 if (ProbablyBracedList) { 658 Tok->setBlockKind(BK_BracedInit); 659 LBraceStack.back()->setBlockKind(BK_BracedInit); 660 } else { 661 Tok->setBlockKind(BK_Block); 662 LBraceStack.back()->setBlockKind(BK_Block); 663 } 664 } 665 LBraceStack.pop_back(); 666 break; 667 case tok::identifier: 668 if (!Tok->is(TT_StatementMacro)) 669 break; 670 LLVM_FALLTHROUGH; 671 case tok::at: 672 case tok::semi: 673 case tok::kw_if: 674 case tok::kw_while: 675 case tok::kw_for: 676 case tok::kw_switch: 677 case tok::kw_try: 678 case tok::kw___try: 679 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 680 LBraceStack.back()->setBlockKind(BK_Block); 681 break; 682 default: 683 break; 684 } 685 PrevTok = Tok; 686 Tok = NextTok; 687 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 688 689 // Assume other blocks for all unclosed opening braces. 690 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 691 if (LBraceStack[i]->is(BK_Unknown)) 692 LBraceStack[i]->setBlockKind(BK_Block); 693 } 694 695 FormatTok = Tokens->setPosition(StoredPosition); 696 } 697 698 template <class T> 699 static inline void hash_combine(std::size_t &seed, const T &v) { 700 std::hash<T> hasher; 701 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 702 } 703 704 size_t UnwrappedLineParser::computePPHash() const { 705 size_t h = 0; 706 for (const auto &i : PPStack) { 707 hash_combine(h, size_t(i.Kind)); 708 hash_combine(h, i.Line); 709 } 710 return h; 711 } 712 713 UnwrappedLineParser::IfStmtKind 714 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, 715 bool MunchSemi, 716 bool UnindentWhitesmithsBraces) { 717 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 718 "'{' or macro block token expected"); 719 FormatToken *Tok = FormatTok; 720 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 721 FormatTok->setBlockKind(BK_Block); 722 723 // For Whitesmiths mode, jump to the next level prior to skipping over the 724 // braces. 725 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 726 ++Line->Level; 727 728 size_t PPStartHash = computePPHash(); 729 730 unsigned InitialLevel = Line->Level; 731 nextToken(/*LevelDifference=*/AddLevels); 732 733 if (MacroBlock && FormatTok->is(tok::l_paren)) 734 parseParens(); 735 736 size_t NbPreprocessorDirectives = 737 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 738 addUnwrappedLine(); 739 size_t OpeningLineIndex = 740 CurrentLines->empty() 741 ? (UnwrappedLine::kInvalidIndex) 742 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 743 744 // Whitesmiths is weird here. The brace needs to be indented for the namespace 745 // block, but the block itself may not be indented depending on the style 746 // settings. This allows the format to back up one level in those cases. 747 if (UnindentWhitesmithsBraces) 748 --Line->Level; 749 750 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 751 MustBeDeclaration); 752 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 753 Line->Level += AddLevels; 754 755 IfStmtKind IfKind = IfStmtKind::NotIf; 756 const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind); 757 758 if (eof()) 759 return IfKind; 760 761 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 762 : !FormatTok->is(tok::r_brace)) { 763 Line->Level = InitialLevel; 764 FormatTok->setBlockKind(BK_Block); 765 return IfKind; 766 } 767 768 if (SimpleBlock && Tok->is(tok::l_brace)) { 769 assert(FormatTok->is(tok::r_brace)); 770 const FormatToken *Previous = Tokens->getPreviousToken(); 771 assert(Previous); 772 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 773 Tok->MatchingParen = FormatTok; 774 FormatTok->MatchingParen = Tok; 775 } 776 } 777 778 size_t PPEndHash = computePPHash(); 779 780 // Munch the closing brace. 781 nextToken(/*LevelDifference=*/-AddLevels); 782 783 if (MacroBlock && FormatTok->is(tok::l_paren)) 784 parseParens(); 785 786 if (FormatTok->is(tok::arrow)) { 787 // Following the } we can find a trailing return type arrow 788 // as part of an implicit conversion constraint. 789 nextToken(); 790 parseStructuralElement(); 791 } 792 793 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 794 nextToken(); 795 796 Line->Level = InitialLevel; 797 798 if (PPStartHash == PPEndHash) { 799 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 800 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 801 // Update the opening line to add the forward reference as well 802 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 803 CurrentLines->size() - 1; 804 } 805 } 806 807 return IfKind; 808 } 809 810 static bool isGoogScope(const UnwrappedLine &Line) { 811 // FIXME: Closure-library specific stuff should not be hard-coded but be 812 // configurable. 813 if (Line.Tokens.size() < 4) 814 return false; 815 auto I = Line.Tokens.begin(); 816 if (I->Tok->TokenText != "goog") 817 return false; 818 ++I; 819 if (I->Tok->isNot(tok::period)) 820 return false; 821 ++I; 822 if (I->Tok->TokenText != "scope") 823 return false; 824 ++I; 825 return I->Tok->is(tok::l_paren); 826 } 827 828 static bool isIIFE(const UnwrappedLine &Line, 829 const AdditionalKeywords &Keywords) { 830 // Look for the start of an immediately invoked anonymous function. 831 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 832 // This is commonly done in JavaScript to create a new, anonymous scope. 833 // Example: (function() { ... })() 834 if (Line.Tokens.size() < 3) 835 return false; 836 auto I = Line.Tokens.begin(); 837 if (I->Tok->isNot(tok::l_paren)) 838 return false; 839 ++I; 840 if (I->Tok->isNot(Keywords.kw_function)) 841 return false; 842 ++I; 843 return I->Tok->is(tok::l_paren); 844 } 845 846 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 847 const FormatToken &InitialToken) { 848 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 849 return Style.BraceWrapping.AfterNamespace; 850 if (InitialToken.is(tok::kw_class)) 851 return Style.BraceWrapping.AfterClass; 852 if (InitialToken.is(tok::kw_union)) 853 return Style.BraceWrapping.AfterUnion; 854 if (InitialToken.is(tok::kw_struct)) 855 return Style.BraceWrapping.AfterStruct; 856 if (InitialToken.is(tok::kw_enum)) 857 return Style.BraceWrapping.AfterEnum; 858 return false; 859 } 860 861 void UnwrappedLineParser::parseChildBlock() { 862 FormatTok->setBlockKind(BK_Block); 863 nextToken(); 864 { 865 bool SkipIndent = (Style.isJavaScript() && 866 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 867 ScopedLineState LineState(*this); 868 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 869 /*MustBeDeclaration=*/false); 870 Line->Level += SkipIndent ? 0 : 1; 871 parseLevel(/*HasOpeningBrace=*/true); 872 flushComments(isOnNewLine(*FormatTok)); 873 Line->Level -= SkipIndent ? 0 : 1; 874 } 875 nextToken(); 876 } 877 878 void UnwrappedLineParser::parsePPDirective() { 879 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 880 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 881 882 nextToken(); 883 884 if (!FormatTok->Tok.getIdentifierInfo()) { 885 parsePPUnknown(); 886 return; 887 } 888 889 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 890 case tok::pp_define: 891 parsePPDefine(); 892 return; 893 case tok::pp_if: 894 parsePPIf(/*IfDef=*/false); 895 break; 896 case tok::pp_ifdef: 897 case tok::pp_ifndef: 898 parsePPIf(/*IfDef=*/true); 899 break; 900 case tok::pp_else: 901 parsePPElse(); 902 break; 903 case tok::pp_elifdef: 904 case tok::pp_elifndef: 905 case tok::pp_elif: 906 parsePPElIf(); 907 break; 908 case tok::pp_endif: 909 parsePPEndIf(); 910 break; 911 default: 912 parsePPUnknown(); 913 break; 914 } 915 } 916 917 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 918 size_t Line = CurrentLines->size(); 919 if (CurrentLines == &PreprocessorDirectives) 920 Line += Lines.size(); 921 922 if (Unreachable || 923 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 924 PPStack.push_back({PP_Unreachable, Line}); 925 else 926 PPStack.push_back({PP_Conditional, Line}); 927 } 928 929 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 930 ++PPBranchLevel; 931 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 932 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 933 PPLevelBranchIndex.push_back(0); 934 PPLevelBranchCount.push_back(0); 935 } 936 PPChainBranchIndex.push(0); 937 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 938 conditionalCompilationCondition(Unreachable || Skip); 939 } 940 941 void UnwrappedLineParser::conditionalCompilationAlternative() { 942 if (!PPStack.empty()) 943 PPStack.pop_back(); 944 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 945 if (!PPChainBranchIndex.empty()) 946 ++PPChainBranchIndex.top(); 947 conditionalCompilationCondition( 948 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 949 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 950 } 951 952 void UnwrappedLineParser::conditionalCompilationEnd() { 953 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 954 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 955 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 956 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 957 } 958 } 959 // Guard against #endif's without #if. 960 if (PPBranchLevel > -1) 961 --PPBranchLevel; 962 if (!PPChainBranchIndex.empty()) 963 PPChainBranchIndex.pop(); 964 if (!PPStack.empty()) 965 PPStack.pop_back(); 966 } 967 968 void UnwrappedLineParser::parsePPIf(bool IfDef) { 969 bool IfNDef = FormatTok->is(tok::pp_ifndef); 970 nextToken(); 971 bool Unreachable = false; 972 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 973 Unreachable = true; 974 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 975 Unreachable = true; 976 conditionalCompilationStart(Unreachable); 977 FormatToken *IfCondition = FormatTok; 978 // If there's a #ifndef on the first line, and the only lines before it are 979 // comments, it could be an include guard. 980 bool MaybeIncludeGuard = IfNDef; 981 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 982 for (auto &Line : Lines) { 983 if (!Line.Tokens.front().Tok->is(tok::comment)) { 984 MaybeIncludeGuard = false; 985 IncludeGuard = IG_Rejected; 986 break; 987 } 988 } 989 --PPBranchLevel; 990 parsePPUnknown(); 991 ++PPBranchLevel; 992 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 993 IncludeGuard = IG_IfNdefed; 994 IncludeGuardToken = IfCondition; 995 } 996 } 997 998 void UnwrappedLineParser::parsePPElse() { 999 // If a potential include guard has an #else, it's not an include guard. 1000 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1001 IncludeGuard = IG_Rejected; 1002 conditionalCompilationAlternative(); 1003 if (PPBranchLevel > -1) 1004 --PPBranchLevel; 1005 parsePPUnknown(); 1006 ++PPBranchLevel; 1007 } 1008 1009 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1010 1011 void UnwrappedLineParser::parsePPEndIf() { 1012 conditionalCompilationEnd(); 1013 parsePPUnknown(); 1014 // If the #endif of a potential include guard is the last thing in the file, 1015 // then we found an include guard. 1016 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1017 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1018 IncludeGuard = IG_Found; 1019 } 1020 1021 void UnwrappedLineParser::parsePPDefine() { 1022 nextToken(); 1023 1024 if (!FormatTok->Tok.getIdentifierInfo()) { 1025 IncludeGuard = IG_Rejected; 1026 IncludeGuardToken = nullptr; 1027 parsePPUnknown(); 1028 return; 1029 } 1030 1031 if (IncludeGuard == IG_IfNdefed && 1032 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1033 IncludeGuard = IG_Defined; 1034 IncludeGuardToken = nullptr; 1035 for (auto &Line : Lines) { 1036 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1037 IncludeGuard = IG_Rejected; 1038 break; 1039 } 1040 } 1041 } 1042 1043 nextToken(); 1044 if (FormatTok->Tok.getKind() == tok::l_paren && 1045 !FormatTok->hasWhitespaceBefore()) { 1046 parseParens(); 1047 } 1048 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1049 Line->Level += PPBranchLevel + 1; 1050 addUnwrappedLine(); 1051 ++Line->Level; 1052 1053 // Errors during a preprocessor directive can only affect the layout of the 1054 // preprocessor directive, and thus we ignore them. An alternative approach 1055 // would be to use the same approach we use on the file level (no 1056 // re-indentation if there was a structural error) within the macro 1057 // definition. 1058 parseFile(); 1059 } 1060 1061 void UnwrappedLineParser::parsePPUnknown() { 1062 do { 1063 nextToken(); 1064 } while (!eof()); 1065 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1066 Line->Level += PPBranchLevel + 1; 1067 addUnwrappedLine(); 1068 } 1069 1070 // Here we exclude certain tokens that are not usually the first token in an 1071 // unwrapped line. This is used in attempt to distinguish macro calls without 1072 // trailing semicolons from other constructs split to several lines. 1073 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1074 // Semicolon can be a null-statement, l_square can be a start of a macro or 1075 // a C++11 attribute, but this doesn't seem to be common. 1076 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1077 Tok.isNot(TT_AttributeSquare) && 1078 // Tokens that can only be used as binary operators and a part of 1079 // overloaded operator names. 1080 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1081 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1082 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1083 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1084 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1085 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1086 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1087 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1088 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1089 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1090 Tok.isNot(tok::lesslessequal) && 1091 // Colon is used in labels, base class lists, initializer lists, 1092 // range-based for loops, ternary operator, but should never be the 1093 // first token in an unwrapped line. 1094 Tok.isNot(tok::colon) && 1095 // 'noexcept' is a trailing annotation. 1096 Tok.isNot(tok::kw_noexcept); 1097 } 1098 1099 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1100 const FormatToken *FormatTok) { 1101 // FIXME: This returns true for C/C++ keywords like 'struct'. 1102 return FormatTok->is(tok::identifier) && 1103 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1104 !FormatTok->isOneOf( 1105 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1106 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1107 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1108 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1109 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1110 Keywords.kw_instanceof, Keywords.kw_interface, 1111 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1112 } 1113 1114 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1115 const FormatToken *FormatTok) { 1116 return FormatTok->Tok.isLiteral() || 1117 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1118 mustBeJSIdent(Keywords, FormatTok); 1119 } 1120 1121 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1122 // when encountered after a value (see mustBeJSIdentOrValue). 1123 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1124 const FormatToken *FormatTok) { 1125 return FormatTok->isOneOf( 1126 tok::kw_return, Keywords.kw_yield, 1127 // conditionals 1128 tok::kw_if, tok::kw_else, 1129 // loops 1130 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1131 // switch/case 1132 tok::kw_switch, tok::kw_case, 1133 // exceptions 1134 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1135 // declaration 1136 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1137 Keywords.kw_async, Keywords.kw_function, 1138 // import/export 1139 Keywords.kw_import, tok::kw_export); 1140 } 1141 1142 // Checks whether a token is a type in K&R C (aka C78). 1143 static bool isC78Type(const FormatToken &Tok) { 1144 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1145 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1146 tok::identifier); 1147 } 1148 1149 // This function checks whether a token starts the first parameter declaration 1150 // in a K&R C (aka C78) function definition, e.g.: 1151 // int f(a, b) 1152 // short a, b; 1153 // { 1154 // return a + b; 1155 // } 1156 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1157 const FormatToken *FuncName) { 1158 assert(Tok); 1159 assert(Next); 1160 assert(FuncName); 1161 1162 if (FuncName->isNot(tok::identifier)) 1163 return false; 1164 1165 const FormatToken *Prev = FuncName->Previous; 1166 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1167 return false; 1168 1169 if (!isC78Type(*Tok) && 1170 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1171 return false; 1172 1173 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1174 return false; 1175 1176 Tok = Tok->Previous; 1177 if (!Tok || Tok->isNot(tok::r_paren)) 1178 return false; 1179 1180 Tok = Tok->Previous; 1181 if (!Tok || Tok->isNot(tok::identifier)) 1182 return false; 1183 1184 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1185 } 1186 1187 void UnwrappedLineParser::parseModuleImport() { 1188 nextToken(); 1189 while (!eof()) { 1190 if (FormatTok->is(tok::colon)) { 1191 FormatTok->setType(TT_ModulePartitionColon); 1192 } 1193 // Handle import <foo/bar.h> as we would an include statement. 1194 else if (FormatTok->is(tok::less)) { 1195 nextToken(); 1196 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1197 // Mark tokens up to the trailing line comments as implicit string 1198 // literals. 1199 if (FormatTok->isNot(tok::comment) && 1200 !FormatTok->TokenText.startswith("//")) 1201 FormatTok->setType(TT_ImplicitStringLiteral); 1202 nextToken(); 1203 } 1204 } 1205 if (FormatTok->is(tok::semi)) { 1206 nextToken(); 1207 break; 1208 } 1209 nextToken(); 1210 } 1211 1212 addUnwrappedLine(); 1213 } 1214 1215 // readTokenWithJavaScriptASI reads the next token and terminates the current 1216 // line if JavaScript Automatic Semicolon Insertion must 1217 // happen between the current token and the next token. 1218 // 1219 // This method is conservative - it cannot cover all edge cases of JavaScript, 1220 // but only aims to correctly handle certain well known cases. It *must not* 1221 // return true in speculative cases. 1222 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1223 FormatToken *Previous = FormatTok; 1224 readToken(); 1225 FormatToken *Next = FormatTok; 1226 1227 bool IsOnSameLine = 1228 CommentsBeforeNextToken.empty() 1229 ? Next->NewlinesBefore == 0 1230 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1231 if (IsOnSameLine) 1232 return; 1233 1234 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1235 bool PreviousStartsTemplateExpr = 1236 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1237 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1238 // If the line contains an '@' sign, the previous token might be an 1239 // annotation, which can precede another identifier/value. 1240 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1241 return LineNode.Tok->is(tok::at); 1242 }); 1243 if (HasAt) 1244 return; 1245 } 1246 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1247 return addUnwrappedLine(); 1248 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1249 bool NextEndsTemplateExpr = 1250 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1251 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1252 (PreviousMustBeValue || 1253 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1254 tok::minusminus))) 1255 return addUnwrappedLine(); 1256 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1257 isJSDeclOrStmt(Keywords, Next)) 1258 return addUnwrappedLine(); 1259 } 1260 1261 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1262 bool IsTopLevel) { 1263 if (Style.Language == FormatStyle::LK_TableGen && 1264 FormatTok->is(tok::pp_include)) { 1265 nextToken(); 1266 if (FormatTok->is(tok::string_literal)) 1267 nextToken(); 1268 addUnwrappedLine(); 1269 return; 1270 } 1271 switch (FormatTok->Tok.getKind()) { 1272 case tok::kw_asm: 1273 nextToken(); 1274 if (FormatTok->is(tok::l_brace)) { 1275 FormatTok->setType(TT_InlineASMBrace); 1276 nextToken(); 1277 while (FormatTok && FormatTok->isNot(tok::eof)) { 1278 if (FormatTok->is(tok::r_brace)) { 1279 FormatTok->setType(TT_InlineASMBrace); 1280 nextToken(); 1281 addUnwrappedLine(); 1282 break; 1283 } 1284 FormatTok->Finalized = true; 1285 nextToken(); 1286 } 1287 } 1288 break; 1289 case tok::kw_namespace: 1290 parseNamespace(); 1291 return; 1292 case tok::kw_public: 1293 case tok::kw_protected: 1294 case tok::kw_private: 1295 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1296 Style.isCSharp()) 1297 nextToken(); 1298 else 1299 parseAccessSpecifier(); 1300 return; 1301 case tok::kw_if: 1302 if (Style.isJavaScript() && Line->MustBeDeclaration) 1303 // field/method declaration. 1304 break; 1305 parseIfThenElse(IfKind); 1306 return; 1307 case tok::kw_for: 1308 case tok::kw_while: 1309 if (Style.isJavaScript() && Line->MustBeDeclaration) 1310 // field/method declaration. 1311 break; 1312 parseForOrWhileLoop(); 1313 return; 1314 case tok::kw_do: 1315 if (Style.isJavaScript() && Line->MustBeDeclaration) 1316 // field/method declaration. 1317 break; 1318 parseDoWhile(); 1319 return; 1320 case tok::kw_switch: 1321 if (Style.isJavaScript() && Line->MustBeDeclaration) 1322 // 'switch: string' field declaration. 1323 break; 1324 parseSwitch(); 1325 return; 1326 case tok::kw_default: 1327 if (Style.isJavaScript() && Line->MustBeDeclaration) 1328 // 'default: string' field declaration. 1329 break; 1330 nextToken(); 1331 if (FormatTok->is(tok::colon)) { 1332 parseLabel(); 1333 return; 1334 } 1335 // e.g. "default void f() {}" in a Java interface. 1336 break; 1337 case tok::kw_case: 1338 if (Style.isJavaScript() && Line->MustBeDeclaration) 1339 // 'case: string' field declaration. 1340 break; 1341 parseCaseLabel(); 1342 return; 1343 case tok::kw_try: 1344 case tok::kw___try: 1345 if (Style.isJavaScript() && Line->MustBeDeclaration) 1346 // field/method declaration. 1347 break; 1348 parseTryCatch(); 1349 return; 1350 case tok::kw_extern: 1351 nextToken(); 1352 if (FormatTok->Tok.is(tok::string_literal)) { 1353 nextToken(); 1354 if (FormatTok->Tok.is(tok::l_brace)) { 1355 if (Style.BraceWrapping.AfterExternBlock) 1356 addUnwrappedLine(); 1357 // Either we indent or for backwards compatibility we follow the 1358 // AfterExternBlock style. 1359 unsigned AddLevels = 1360 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1361 (Style.BraceWrapping.AfterExternBlock && 1362 Style.IndentExternBlock == 1363 FormatStyle::IEBS_AfterExternBlock) 1364 ? 1u 1365 : 0u; 1366 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1367 addUnwrappedLine(); 1368 return; 1369 } 1370 } 1371 break; 1372 case tok::kw_export: 1373 if (Style.isJavaScript()) { 1374 parseJavaScriptEs6ImportExport(); 1375 return; 1376 } 1377 if (!Style.isCpp()) 1378 break; 1379 // Handle C++ "(inline|export) namespace". 1380 LLVM_FALLTHROUGH; 1381 case tok::kw_inline: 1382 nextToken(); 1383 if (FormatTok->Tok.is(tok::kw_namespace)) { 1384 parseNamespace(); 1385 return; 1386 } 1387 break; 1388 case tok::identifier: 1389 if (FormatTok->is(TT_ForEachMacro)) { 1390 parseForOrWhileLoop(); 1391 return; 1392 } 1393 if (FormatTok->is(TT_MacroBlockBegin)) { 1394 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1395 /*MunchSemi=*/false); 1396 return; 1397 } 1398 if (FormatTok->is(Keywords.kw_import)) { 1399 if (Style.isJavaScript()) { 1400 parseJavaScriptEs6ImportExport(); 1401 return; 1402 } 1403 if (Style.Language == FormatStyle::LK_Proto) { 1404 nextToken(); 1405 if (FormatTok->is(tok::kw_public)) 1406 nextToken(); 1407 if (!FormatTok->is(tok::string_literal)) 1408 return; 1409 nextToken(); 1410 if (FormatTok->is(tok::semi)) 1411 nextToken(); 1412 addUnwrappedLine(); 1413 return; 1414 } 1415 if (Style.isCpp()) { 1416 parseModuleImport(); 1417 return; 1418 } 1419 } 1420 if (Style.isCpp() && 1421 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1422 Keywords.kw_slots, Keywords.kw_qslots)) { 1423 nextToken(); 1424 if (FormatTok->is(tok::colon)) { 1425 nextToken(); 1426 addUnwrappedLine(); 1427 return; 1428 } 1429 } 1430 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1431 parseStatementMacro(); 1432 return; 1433 } 1434 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1435 parseNamespace(); 1436 return; 1437 } 1438 // In all other cases, parse the declaration. 1439 break; 1440 default: 1441 break; 1442 } 1443 do { 1444 const FormatToken *Previous = FormatTok->Previous; 1445 switch (FormatTok->Tok.getKind()) { 1446 case tok::at: 1447 nextToken(); 1448 if (FormatTok->Tok.is(tok::l_brace)) { 1449 nextToken(); 1450 parseBracedList(); 1451 break; 1452 } else if (Style.Language == FormatStyle::LK_Java && 1453 FormatTok->is(Keywords.kw_interface)) { 1454 nextToken(); 1455 break; 1456 } 1457 switch (FormatTok->Tok.getObjCKeywordID()) { 1458 case tok::objc_public: 1459 case tok::objc_protected: 1460 case tok::objc_package: 1461 case tok::objc_private: 1462 return parseAccessSpecifier(); 1463 case tok::objc_interface: 1464 case tok::objc_implementation: 1465 return parseObjCInterfaceOrImplementation(); 1466 case tok::objc_protocol: 1467 if (parseObjCProtocol()) 1468 return; 1469 break; 1470 case tok::objc_end: 1471 return; // Handled by the caller. 1472 case tok::objc_optional: 1473 case tok::objc_required: 1474 nextToken(); 1475 addUnwrappedLine(); 1476 return; 1477 case tok::objc_autoreleasepool: 1478 nextToken(); 1479 if (FormatTok->Tok.is(tok::l_brace)) { 1480 if (Style.BraceWrapping.AfterControlStatement == 1481 FormatStyle::BWACS_Always) 1482 addUnwrappedLine(); 1483 parseBlock(); 1484 } 1485 addUnwrappedLine(); 1486 return; 1487 case tok::objc_synchronized: 1488 nextToken(); 1489 if (FormatTok->Tok.is(tok::l_paren)) 1490 // Skip synchronization object 1491 parseParens(); 1492 if (FormatTok->Tok.is(tok::l_brace)) { 1493 if (Style.BraceWrapping.AfterControlStatement == 1494 FormatStyle::BWACS_Always) 1495 addUnwrappedLine(); 1496 parseBlock(); 1497 } 1498 addUnwrappedLine(); 1499 return; 1500 case tok::objc_try: 1501 // This branch isn't strictly necessary (the kw_try case below would 1502 // do this too after the tok::at is parsed above). But be explicit. 1503 parseTryCatch(); 1504 return; 1505 default: 1506 break; 1507 } 1508 break; 1509 case tok::kw_concept: 1510 parseConcept(); 1511 return; 1512 case tok::kw_requires: 1513 parseRequires(); 1514 return; 1515 case tok::kw_enum: 1516 // Ignore if this is part of "template <enum ...". 1517 if (Previous && Previous->is(tok::less)) { 1518 nextToken(); 1519 break; 1520 } 1521 1522 // parseEnum falls through and does not yet add an unwrapped line as an 1523 // enum definition can start a structural element. 1524 if (!parseEnum()) 1525 break; 1526 // This only applies for C++. 1527 if (!Style.isCpp()) { 1528 addUnwrappedLine(); 1529 return; 1530 } 1531 break; 1532 case tok::kw_typedef: 1533 nextToken(); 1534 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1535 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1536 Keywords.kw_CF_CLOSED_ENUM, 1537 Keywords.kw_NS_CLOSED_ENUM)) 1538 parseEnum(); 1539 break; 1540 case tok::kw_struct: 1541 case tok::kw_union: 1542 case tok::kw_class: 1543 if (parseStructLike()) { 1544 return; 1545 } 1546 break; 1547 case tok::period: 1548 nextToken(); 1549 // In Java, classes have an implicit static member "class". 1550 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1551 FormatTok->is(tok::kw_class)) 1552 nextToken(); 1553 if (Style.isJavaScript() && FormatTok && 1554 FormatTok->Tok.getIdentifierInfo()) 1555 // JavaScript only has pseudo keywords, all keywords are allowed to 1556 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1557 nextToken(); 1558 break; 1559 case tok::semi: 1560 nextToken(); 1561 addUnwrappedLine(); 1562 return; 1563 case tok::r_brace: 1564 addUnwrappedLine(); 1565 return; 1566 case tok::l_paren: { 1567 parseParens(); 1568 // Break the unwrapped line if a K&R C function definition has a parameter 1569 // declaration. 1570 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1571 break; 1572 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1573 addUnwrappedLine(); 1574 return; 1575 } 1576 break; 1577 } 1578 case tok::kw_operator: 1579 nextToken(); 1580 if (FormatTok->isBinaryOperator()) 1581 nextToken(); 1582 break; 1583 case tok::caret: 1584 nextToken(); 1585 if (FormatTok->Tok.isAnyIdentifier() || 1586 FormatTok->isSimpleTypeSpecifier()) 1587 nextToken(); 1588 if (FormatTok->is(tok::l_paren)) 1589 parseParens(); 1590 if (FormatTok->is(tok::l_brace)) 1591 parseChildBlock(); 1592 break; 1593 case tok::l_brace: 1594 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1595 // A block outside of parentheses must be the last part of a 1596 // structural element. 1597 // FIXME: Figure out cases where this is not true, and add projections 1598 // for them (the one we know is missing are lambdas). 1599 if (Style.Language == FormatStyle::LK_Java && 1600 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1601 // If necessary, we could set the type to something different than 1602 // TT_FunctionLBrace. 1603 if (Style.BraceWrapping.AfterControlStatement == 1604 FormatStyle::BWACS_Always) 1605 addUnwrappedLine(); 1606 } else if (Style.BraceWrapping.AfterFunction) { 1607 addUnwrappedLine(); 1608 } 1609 FormatTok->setType(TT_FunctionLBrace); 1610 parseBlock(); 1611 addUnwrappedLine(); 1612 return; 1613 } 1614 // Otherwise this was a braced init list, and the structural 1615 // element continues. 1616 break; 1617 case tok::kw_try: 1618 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1619 // field/method declaration. 1620 nextToken(); 1621 break; 1622 } 1623 // We arrive here when parsing function-try blocks. 1624 if (Style.BraceWrapping.AfterFunction) 1625 addUnwrappedLine(); 1626 parseTryCatch(); 1627 return; 1628 case tok::identifier: { 1629 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1630 Line->MustBeDeclaration) { 1631 addUnwrappedLine(); 1632 parseCSharpGenericTypeConstraint(); 1633 break; 1634 } 1635 if (FormatTok->is(TT_MacroBlockEnd)) { 1636 addUnwrappedLine(); 1637 return; 1638 } 1639 1640 // Function declarations (as opposed to function expressions) are parsed 1641 // on their own unwrapped line by continuing this loop. Function 1642 // expressions (functions that are not on their own line) must not create 1643 // a new unwrapped line, so they are special cased below. 1644 size_t TokenCount = Line->Tokens.size(); 1645 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1646 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1647 Keywords.kw_async)))) { 1648 tryToParseJSFunction(); 1649 break; 1650 } 1651 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1652 FormatTok->is(Keywords.kw_interface)) { 1653 if (Style.isJavaScript()) { 1654 // In JavaScript/TypeScript, "interface" can be used as a standalone 1655 // identifier, e.g. in `var interface = 1;`. If "interface" is 1656 // followed by another identifier, it is very like to be an actual 1657 // interface declaration. 1658 unsigned StoredPosition = Tokens->getPosition(); 1659 FormatToken *Next = Tokens->getNextToken(); 1660 FormatTok = Tokens->setPosition(StoredPosition); 1661 if (!mustBeJSIdent(Keywords, Next)) { 1662 nextToken(); 1663 break; 1664 } 1665 } 1666 parseRecord(); 1667 addUnwrappedLine(); 1668 return; 1669 } 1670 1671 if (FormatTok->is(Keywords.kw_interface)) { 1672 if (parseStructLike()) { 1673 return; 1674 } 1675 break; 1676 } 1677 1678 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1679 parseStatementMacro(); 1680 return; 1681 } 1682 1683 // See if the following token should start a new unwrapped line. 1684 StringRef Text = FormatTok->TokenText; 1685 1686 FormatToken *PreviousToken = FormatTok; 1687 nextToken(); 1688 1689 // JS doesn't have macros, and within classes colons indicate fields, not 1690 // labels. 1691 if (Style.isJavaScript()) 1692 break; 1693 1694 TokenCount = Line->Tokens.size(); 1695 if (TokenCount == 1 || 1696 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1697 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1698 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1699 parseLabel(!Style.IndentGotoLabels); 1700 return; 1701 } 1702 // Recognize function-like macro usages without trailing semicolon as 1703 // well as free-standing macros like Q_OBJECT. 1704 bool FunctionLike = FormatTok->is(tok::l_paren); 1705 if (FunctionLike) 1706 parseParens(); 1707 1708 bool FollowedByNewline = 1709 CommentsBeforeNextToken.empty() 1710 ? FormatTok->NewlinesBefore > 0 1711 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1712 1713 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1714 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1715 PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro); 1716 addUnwrappedLine(); 1717 return; 1718 } 1719 } 1720 break; 1721 } 1722 case tok::equal: 1723 if ((Style.isJavaScript() || Style.isCSharp()) && 1724 FormatTok->is(TT_FatArrow)) { 1725 tryToParseChildBlock(); 1726 break; 1727 } 1728 1729 nextToken(); 1730 if (FormatTok->Tok.is(tok::l_brace)) { 1731 // Block kind should probably be set to BK_BracedInit for any language. 1732 // C# needs this change to ensure that array initialisers and object 1733 // initialisers are indented the same way. 1734 if (Style.isCSharp()) 1735 FormatTok->setBlockKind(BK_BracedInit); 1736 nextToken(); 1737 parseBracedList(); 1738 } else if (Style.Language == FormatStyle::LK_Proto && 1739 FormatTok->Tok.is(tok::less)) { 1740 nextToken(); 1741 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1742 /*ClosingBraceKind=*/tok::greater); 1743 } 1744 break; 1745 case tok::l_square: 1746 parseSquare(); 1747 break; 1748 case tok::kw_new: 1749 parseNew(); 1750 break; 1751 default: 1752 nextToken(); 1753 break; 1754 } 1755 } while (!eof()); 1756 } 1757 1758 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1759 assert(FormatTok->is(tok::l_brace)); 1760 if (!Style.isCSharp()) 1761 return false; 1762 // See if it's a property accessor. 1763 if (FormatTok->Previous->isNot(tok::identifier)) 1764 return false; 1765 1766 // See if we are inside a property accessor. 1767 // 1768 // Record the current tokenPosition so that we can advance and 1769 // reset the current token. `Next` is not set yet so we need 1770 // another way to advance along the token stream. 1771 unsigned int StoredPosition = Tokens->getPosition(); 1772 FormatToken *Tok = Tokens->getNextToken(); 1773 1774 // A trivial property accessor is of the form: 1775 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } 1776 // Track these as they do not require line breaks to be introduced. 1777 bool HasGetOrSet = false; 1778 bool IsTrivialPropertyAccessor = true; 1779 while (!eof()) { 1780 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1781 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1782 Keywords.kw_set)) { 1783 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) 1784 HasGetOrSet = true; 1785 Tok = Tokens->getNextToken(); 1786 continue; 1787 } 1788 if (Tok->isNot(tok::r_brace)) 1789 IsTrivialPropertyAccessor = false; 1790 break; 1791 } 1792 1793 if (!HasGetOrSet) { 1794 Tokens->setPosition(StoredPosition); 1795 return false; 1796 } 1797 1798 // Try to parse the property accessor: 1799 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1800 Tokens->setPosition(StoredPosition); 1801 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1802 addUnwrappedLine(); 1803 nextToken(); 1804 do { 1805 switch (FormatTok->Tok.getKind()) { 1806 case tok::r_brace: 1807 nextToken(); 1808 if (FormatTok->is(tok::equal)) { 1809 while (!eof() && FormatTok->isNot(tok::semi)) 1810 nextToken(); 1811 nextToken(); 1812 } 1813 addUnwrappedLine(); 1814 return true; 1815 case tok::l_brace: 1816 ++Line->Level; 1817 parseBlock(/*MustBeDeclaration=*/true); 1818 addUnwrappedLine(); 1819 --Line->Level; 1820 break; 1821 case tok::equal: 1822 if (FormatTok->is(TT_FatArrow)) { 1823 ++Line->Level; 1824 do { 1825 nextToken(); 1826 } while (!eof() && FormatTok->isNot(tok::semi)); 1827 nextToken(); 1828 addUnwrappedLine(); 1829 --Line->Level; 1830 break; 1831 } 1832 nextToken(); 1833 break; 1834 default: 1835 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && 1836 !IsTrivialPropertyAccessor) { 1837 // Non-trivial get/set needs to be on its own line. 1838 addUnwrappedLine(); 1839 } 1840 nextToken(); 1841 } 1842 } while (!eof()); 1843 1844 // Unreachable for well-formed code (paired '{' and '}'). 1845 return true; 1846 } 1847 1848 bool UnwrappedLineParser::tryToParseLambda() { 1849 if (!Style.isCpp()) { 1850 nextToken(); 1851 return false; 1852 } 1853 assert(FormatTok->is(tok::l_square)); 1854 FormatToken &LSquare = *FormatTok; 1855 if (!tryToParseLambdaIntroducer()) 1856 return false; 1857 1858 bool SeenArrow = false; 1859 1860 while (FormatTok->isNot(tok::l_brace)) { 1861 if (FormatTok->isSimpleTypeSpecifier()) { 1862 nextToken(); 1863 continue; 1864 } 1865 switch (FormatTok->Tok.getKind()) { 1866 case tok::l_brace: 1867 break; 1868 case tok::l_paren: 1869 parseParens(); 1870 break; 1871 case tok::l_square: 1872 parseSquare(); 1873 break; 1874 case tok::amp: 1875 case tok::star: 1876 case tok::kw_const: 1877 case tok::comma: 1878 case tok::less: 1879 case tok::greater: 1880 case tok::identifier: 1881 case tok::numeric_constant: 1882 case tok::coloncolon: 1883 case tok::kw_class: 1884 case tok::kw_mutable: 1885 case tok::kw_noexcept: 1886 case tok::kw_template: 1887 case tok::kw_typename: 1888 nextToken(); 1889 break; 1890 // Specialization of a template with an integer parameter can contain 1891 // arithmetic, logical, comparison and ternary operators. 1892 // 1893 // FIXME: This also accepts sequences of operators that are not in the scope 1894 // of a template argument list. 1895 // 1896 // In a C++ lambda a template type can only occur after an arrow. We use 1897 // this as an heuristic to distinguish between Objective-C expressions 1898 // followed by an `a->b` expression, such as: 1899 // ([obj func:arg] + a->b) 1900 // Otherwise the code below would parse as a lambda. 1901 // 1902 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1903 // explicit template lists: []<bool b = true && false>(U &&u){} 1904 case tok::plus: 1905 case tok::minus: 1906 case tok::exclaim: 1907 case tok::tilde: 1908 case tok::slash: 1909 case tok::percent: 1910 case tok::lessless: 1911 case tok::pipe: 1912 case tok::pipepipe: 1913 case tok::ampamp: 1914 case tok::caret: 1915 case tok::equalequal: 1916 case tok::exclaimequal: 1917 case tok::greaterequal: 1918 case tok::lessequal: 1919 case tok::question: 1920 case tok::colon: 1921 case tok::ellipsis: 1922 case tok::kw_true: 1923 case tok::kw_false: 1924 if (SeenArrow) { 1925 nextToken(); 1926 break; 1927 } 1928 return true; 1929 case tok::arrow: 1930 // This might or might not actually be a lambda arrow (this could be an 1931 // ObjC method invocation followed by a dereferencing arrow). We might 1932 // reset this back to TT_Unknown in TokenAnnotator. 1933 FormatTok->setType(TT_LambdaArrow); 1934 SeenArrow = true; 1935 nextToken(); 1936 break; 1937 default: 1938 return true; 1939 } 1940 } 1941 FormatTok->setType(TT_LambdaLBrace); 1942 LSquare.setType(TT_LambdaLSquare); 1943 parseChildBlock(); 1944 return true; 1945 } 1946 1947 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1948 const FormatToken *Previous = FormatTok->Previous; 1949 if (Previous && 1950 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1951 tok::kw_delete, tok::l_square) || 1952 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1953 Previous->isSimpleTypeSpecifier())) { 1954 nextToken(); 1955 return false; 1956 } 1957 nextToken(); 1958 if (FormatTok->is(tok::l_square)) { 1959 return false; 1960 } 1961 parseSquare(/*LambdaIntroducer=*/true); 1962 return true; 1963 } 1964 1965 void UnwrappedLineParser::tryToParseJSFunction() { 1966 assert(FormatTok->is(Keywords.kw_function) || 1967 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1968 if (FormatTok->is(Keywords.kw_async)) 1969 nextToken(); 1970 // Consume "function". 1971 nextToken(); 1972 1973 // Consume * (generator function). Treat it like C++'s overloaded operators. 1974 if (FormatTok->is(tok::star)) { 1975 FormatTok->setType(TT_OverloadedOperator); 1976 nextToken(); 1977 } 1978 1979 // Consume function name. 1980 if (FormatTok->is(tok::identifier)) 1981 nextToken(); 1982 1983 if (FormatTok->isNot(tok::l_paren)) 1984 return; 1985 1986 // Parse formal parameter list. 1987 parseParens(); 1988 1989 if (FormatTok->is(tok::colon)) { 1990 // Parse a type definition. 1991 nextToken(); 1992 1993 // Eat the type declaration. For braced inline object types, balance braces, 1994 // otherwise just parse until finding an l_brace for the function body. 1995 if (FormatTok->is(tok::l_brace)) 1996 tryToParseBracedList(); 1997 else 1998 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1999 nextToken(); 2000 } 2001 2002 if (FormatTok->is(tok::semi)) 2003 return; 2004 2005 parseChildBlock(); 2006 } 2007 2008 bool UnwrappedLineParser::tryToParseBracedList() { 2009 if (FormatTok->is(BK_Unknown)) 2010 calculateBraceTypes(); 2011 assert(FormatTok->isNot(BK_Unknown)); 2012 if (FormatTok->is(BK_Block)) 2013 return false; 2014 nextToken(); 2015 parseBracedList(); 2016 return true; 2017 } 2018 2019 bool UnwrappedLineParser::tryToParseChildBlock() { 2020 assert(Style.isJavaScript() || Style.isCSharp()); 2021 assert(FormatTok->is(TT_FatArrow)); 2022 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2023 // They always start an expression or a child block if followed by a curly 2024 // brace. 2025 nextToken(); 2026 if (FormatTok->isNot(tok::l_brace)) 2027 return false; 2028 parseChildBlock(); 2029 return true; 2030 } 2031 2032 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2033 bool IsEnum, 2034 tok::TokenKind ClosingBraceKind) { 2035 bool HasError = false; 2036 2037 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2038 // replace this by using parseAssignmentExpression() inside. 2039 do { 2040 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2041 tryToParseChildBlock()) 2042 continue; 2043 if (Style.isJavaScript()) { 2044 if (FormatTok->is(Keywords.kw_function) || 2045 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2046 tryToParseJSFunction(); 2047 continue; 2048 } 2049 if (FormatTok->is(tok::l_brace)) { 2050 // Could be a method inside of a braced list `{a() { return 1; }}`. 2051 if (tryToParseBracedList()) 2052 continue; 2053 parseChildBlock(); 2054 } 2055 } 2056 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2057 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2058 addUnwrappedLine(); 2059 nextToken(); 2060 return !HasError; 2061 } 2062 switch (FormatTok->Tok.getKind()) { 2063 case tok::l_square: 2064 if (Style.isCSharp()) 2065 parseSquare(); 2066 else 2067 tryToParseLambda(); 2068 break; 2069 case tok::l_paren: 2070 parseParens(); 2071 // JavaScript can just have free standing methods and getters/setters in 2072 // object literals. Detect them by a "{" following ")". 2073 if (Style.isJavaScript()) { 2074 if (FormatTok->is(tok::l_brace)) 2075 parseChildBlock(); 2076 break; 2077 } 2078 break; 2079 case tok::l_brace: 2080 // Assume there are no blocks inside a braced init list apart 2081 // from the ones we explicitly parse out (like lambdas). 2082 FormatTok->setBlockKind(BK_BracedInit); 2083 nextToken(); 2084 parseBracedList(); 2085 break; 2086 case tok::less: 2087 if (Style.Language == FormatStyle::LK_Proto) { 2088 nextToken(); 2089 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2090 /*ClosingBraceKind=*/tok::greater); 2091 } else { 2092 nextToken(); 2093 } 2094 break; 2095 case tok::semi: 2096 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2097 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2098 // used for error recovery if we have otherwise determined that this is 2099 // a braced list. 2100 if (Style.isJavaScript()) { 2101 nextToken(); 2102 break; 2103 } 2104 HasError = true; 2105 if (!ContinueOnSemicolons) 2106 return !HasError; 2107 nextToken(); 2108 break; 2109 case tok::comma: 2110 nextToken(); 2111 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2112 addUnwrappedLine(); 2113 break; 2114 default: 2115 nextToken(); 2116 break; 2117 } 2118 } while (!eof()); 2119 return false; 2120 } 2121 2122 void UnwrappedLineParser::parseParens() { 2123 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 2124 nextToken(); 2125 do { 2126 switch (FormatTok->Tok.getKind()) { 2127 case tok::l_paren: 2128 parseParens(); 2129 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2130 parseChildBlock(); 2131 break; 2132 case tok::r_paren: 2133 nextToken(); 2134 return; 2135 case tok::r_brace: 2136 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2137 return; 2138 case tok::l_square: 2139 tryToParseLambda(); 2140 break; 2141 case tok::l_brace: 2142 if (!tryToParseBracedList()) 2143 parseChildBlock(); 2144 break; 2145 case tok::at: 2146 nextToken(); 2147 if (FormatTok->Tok.is(tok::l_brace)) { 2148 nextToken(); 2149 parseBracedList(); 2150 } 2151 break; 2152 case tok::equal: 2153 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2154 tryToParseChildBlock(); 2155 else 2156 nextToken(); 2157 break; 2158 case tok::kw_class: 2159 if (Style.isJavaScript()) 2160 parseRecord(/*ParseAsExpr=*/true); 2161 else 2162 nextToken(); 2163 break; 2164 case tok::identifier: 2165 if (Style.isJavaScript() && 2166 (FormatTok->is(Keywords.kw_function) || 2167 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2168 tryToParseJSFunction(); 2169 else 2170 nextToken(); 2171 break; 2172 default: 2173 nextToken(); 2174 break; 2175 } 2176 } while (!eof()); 2177 } 2178 2179 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2180 if (!LambdaIntroducer) { 2181 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 2182 if (tryToParseLambda()) 2183 return; 2184 } 2185 do { 2186 switch (FormatTok->Tok.getKind()) { 2187 case tok::l_paren: 2188 parseParens(); 2189 break; 2190 case tok::r_square: 2191 nextToken(); 2192 return; 2193 case tok::r_brace: 2194 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2195 return; 2196 case tok::l_square: 2197 parseSquare(); 2198 break; 2199 case tok::l_brace: { 2200 if (!tryToParseBracedList()) 2201 parseChildBlock(); 2202 break; 2203 } 2204 case tok::at: 2205 nextToken(); 2206 if (FormatTok->Tok.is(tok::l_brace)) { 2207 nextToken(); 2208 parseBracedList(); 2209 } 2210 break; 2211 default: 2212 nextToken(); 2213 break; 2214 } 2215 } while (!eof()); 2216 } 2217 2218 void UnwrappedLineParser::keepAncestorBraces() { 2219 if (!Style.RemoveBracesLLVM) 2220 return; 2221 2222 const int MaxNestingLevels = 2; 2223 const int Size = NestedTooDeep.size(); 2224 if (Size >= MaxNestingLevels) 2225 NestedTooDeep[Size - MaxNestingLevels] = true; 2226 NestedTooDeep.push_back(false); 2227 } 2228 2229 static void markOptionalBraces(FormatToken *LeftBrace) { 2230 if (!LeftBrace) 2231 return; 2232 2233 assert(LeftBrace->is(tok::l_brace)); 2234 2235 FormatToken *RightBrace = LeftBrace->MatchingParen; 2236 if (!RightBrace) { 2237 assert(!LeftBrace->Optional); 2238 return; 2239 } 2240 2241 assert(RightBrace->is(tok::r_brace)); 2242 assert(RightBrace->MatchingParen == LeftBrace); 2243 assert(LeftBrace->Optional == RightBrace->Optional); 2244 2245 LeftBrace->Optional = true; 2246 RightBrace->Optional = true; 2247 } 2248 2249 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2250 bool KeepBraces) { 2251 auto HandleAttributes = [this]() { 2252 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2253 if (FormatTok->is(TT_AttributeMacro)) 2254 nextToken(); 2255 // Handle [[likely]] / [[unlikely]] attributes. 2256 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) 2257 parseSquare(); 2258 }; 2259 2260 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 2261 nextToken(); 2262 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 2263 nextToken(); 2264 if (FormatTok->Tok.is(tok::l_paren)) 2265 parseParens(); 2266 HandleAttributes(); 2267 2268 bool NeedsUnwrappedLine = false; 2269 keepAncestorBraces(); 2270 2271 FormatToken *IfLeftBrace = nullptr; 2272 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2273 2274 if (FormatTok->Tok.is(tok::l_brace)) { 2275 IfLeftBrace = FormatTok; 2276 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2277 IfBlockKind = parseBlock(); 2278 if (Style.BraceWrapping.BeforeElse) 2279 addUnwrappedLine(); 2280 else 2281 NeedsUnwrappedLine = true; 2282 } else { 2283 addUnwrappedLine(); 2284 ++Line->Level; 2285 parseStructuralElement(); 2286 --Line->Level; 2287 } 2288 2289 bool KeepIfBraces = false; 2290 if (Style.RemoveBracesLLVM) { 2291 assert(!NestedTooDeep.empty()); 2292 KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2293 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2294 IfBlockKind == IfStmtKind::IfElseIf; 2295 } 2296 2297 FormatToken *ElseLeftBrace = nullptr; 2298 IfStmtKind Kind = IfStmtKind::IfOnly; 2299 2300 if (FormatTok->Tok.is(tok::kw_else)) { 2301 if (Style.RemoveBracesLLVM) { 2302 NestedTooDeep.back() = false; 2303 Kind = IfStmtKind::IfElse; 2304 } 2305 nextToken(); 2306 HandleAttributes(); 2307 if (FormatTok->Tok.is(tok::l_brace)) { 2308 ElseLeftBrace = FormatTok; 2309 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2310 if (parseBlock() == IfStmtKind::IfOnly) 2311 Kind = IfStmtKind::IfElseIf; 2312 addUnwrappedLine(); 2313 } else if (FormatTok->Tok.is(tok::kw_if)) { 2314 FormatToken *Previous = Tokens->getPreviousToken(); 2315 const bool IsPrecededByComment = Previous && Previous->is(tok::comment); 2316 if (IsPrecededByComment) { 2317 addUnwrappedLine(); 2318 ++Line->Level; 2319 } 2320 bool TooDeep = true; 2321 if (Style.RemoveBracesLLVM) { 2322 Kind = IfStmtKind::IfElseIf; 2323 TooDeep = NestedTooDeep.pop_back_val(); 2324 } 2325 ElseLeftBrace = 2326 parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces); 2327 if (Style.RemoveBracesLLVM) 2328 NestedTooDeep.push_back(TooDeep); 2329 if (IsPrecededByComment) 2330 --Line->Level; 2331 } else { 2332 addUnwrappedLine(); 2333 ++Line->Level; 2334 parseStructuralElement(); 2335 if (FormatTok->is(tok::eof)) 2336 addUnwrappedLine(); 2337 --Line->Level; 2338 } 2339 } else { 2340 if (Style.RemoveBracesLLVM) 2341 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2342 if (NeedsUnwrappedLine) 2343 addUnwrappedLine(); 2344 } 2345 2346 if (!Style.RemoveBracesLLVM) 2347 return nullptr; 2348 2349 assert(!NestedTooDeep.empty()); 2350 const bool KeepElseBraces = 2351 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back(); 2352 2353 NestedTooDeep.pop_back(); 2354 2355 if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) { 2356 markOptionalBraces(IfLeftBrace); 2357 markOptionalBraces(ElseLeftBrace); 2358 } else if (IfLeftBrace) { 2359 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2360 if (IfRightBrace) { 2361 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2362 assert(!IfLeftBrace->Optional); 2363 assert(!IfRightBrace->Optional); 2364 IfLeftBrace->MatchingParen = nullptr; 2365 IfRightBrace->MatchingParen = nullptr; 2366 } 2367 } 2368 2369 if (IfKind) 2370 *IfKind = Kind; 2371 2372 return IfLeftBrace; 2373 } 2374 2375 void UnwrappedLineParser::parseTryCatch() { 2376 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2377 nextToken(); 2378 bool NeedsUnwrappedLine = false; 2379 if (FormatTok->is(tok::colon)) { 2380 // We are in a function try block, what comes is an initializer list. 2381 nextToken(); 2382 2383 // In case identifiers were removed by clang-tidy, what might follow is 2384 // multiple commas in sequence - before the first identifier. 2385 while (FormatTok->is(tok::comma)) 2386 nextToken(); 2387 2388 while (FormatTok->is(tok::identifier)) { 2389 nextToken(); 2390 if (FormatTok->is(tok::l_paren)) 2391 parseParens(); 2392 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2393 FormatTok->is(tok::l_brace)) { 2394 do { 2395 nextToken(); 2396 } while (!FormatTok->is(tok::r_brace)); 2397 nextToken(); 2398 } 2399 2400 // In case identifiers were removed by clang-tidy, what might follow is 2401 // multiple commas in sequence - after the first identifier. 2402 while (FormatTok->is(tok::comma)) 2403 nextToken(); 2404 } 2405 } 2406 // Parse try with resource. 2407 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 2408 parseParens(); 2409 } 2410 2411 keepAncestorBraces(); 2412 2413 if (FormatTok->is(tok::l_brace)) { 2414 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2415 parseBlock(); 2416 if (Style.BraceWrapping.BeforeCatch) { 2417 addUnwrappedLine(); 2418 } else { 2419 NeedsUnwrappedLine = true; 2420 } 2421 } else if (!FormatTok->is(tok::kw_catch)) { 2422 // The C++ standard requires a compound-statement after a try. 2423 // If there's none, we try to assume there's a structuralElement 2424 // and try to continue. 2425 addUnwrappedLine(); 2426 ++Line->Level; 2427 parseStructuralElement(); 2428 --Line->Level; 2429 } 2430 while (true) { 2431 if (FormatTok->is(tok::at)) 2432 nextToken(); 2433 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2434 tok::kw___finally) || 2435 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2436 FormatTok->is(Keywords.kw_finally)) || 2437 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 2438 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 2439 break; 2440 nextToken(); 2441 while (FormatTok->isNot(tok::l_brace)) { 2442 if (FormatTok->is(tok::l_paren)) { 2443 parseParens(); 2444 continue; 2445 } 2446 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2447 if (Style.RemoveBracesLLVM) 2448 NestedTooDeep.pop_back(); 2449 return; 2450 } 2451 nextToken(); 2452 } 2453 NeedsUnwrappedLine = false; 2454 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2455 parseBlock(); 2456 if (Style.BraceWrapping.BeforeCatch) 2457 addUnwrappedLine(); 2458 else 2459 NeedsUnwrappedLine = true; 2460 } 2461 2462 if (Style.RemoveBracesLLVM) 2463 NestedTooDeep.pop_back(); 2464 2465 if (NeedsUnwrappedLine) 2466 addUnwrappedLine(); 2467 } 2468 2469 void UnwrappedLineParser::parseNamespace() { 2470 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2471 "'namespace' expected"); 2472 2473 const FormatToken &InitialToken = *FormatTok; 2474 nextToken(); 2475 if (InitialToken.is(TT_NamespaceMacro)) { 2476 parseParens(); 2477 } else { 2478 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2479 tok::l_square, tok::period)) { 2480 if (FormatTok->is(tok::l_square)) 2481 parseSquare(); 2482 else 2483 nextToken(); 2484 } 2485 } 2486 if (FormatTok->Tok.is(tok::l_brace)) { 2487 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2488 addUnwrappedLine(); 2489 2490 unsigned AddLevels = 2491 Style.NamespaceIndentation == FormatStyle::NI_All || 2492 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2493 DeclarationScopeStack.size() > 1) 2494 ? 1u 2495 : 0u; 2496 bool ManageWhitesmithsBraces = 2497 AddLevels == 0u && 2498 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2499 2500 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2501 // the whole block. 2502 if (ManageWhitesmithsBraces) 2503 ++Line->Level; 2504 2505 parseBlock(/*MustBeDeclaration=*/true, AddLevels, 2506 /*MunchSemi=*/true, 2507 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); 2508 2509 // Munch the semicolon after a namespace. This is more common than one would 2510 // think. Putting the semicolon into its own line is very ugly. 2511 if (FormatTok->Tok.is(tok::semi)) 2512 nextToken(); 2513 2514 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2515 2516 if (ManageWhitesmithsBraces) 2517 --Line->Level; 2518 } 2519 // FIXME: Add error handling. 2520 } 2521 2522 void UnwrappedLineParser::parseNew() { 2523 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2524 nextToken(); 2525 2526 if (Style.isCSharp()) { 2527 do { 2528 if (FormatTok->is(tok::l_brace)) 2529 parseBracedList(); 2530 2531 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2532 return; 2533 2534 nextToken(); 2535 } while (!eof()); 2536 } 2537 2538 if (Style.Language != FormatStyle::LK_Java) 2539 return; 2540 2541 // In Java, we can parse everything up to the parens, which aren't optional. 2542 do { 2543 // There should not be a ;, { or } before the new's open paren. 2544 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2545 return; 2546 2547 // Consume the parens. 2548 if (FormatTok->is(tok::l_paren)) { 2549 parseParens(); 2550 2551 // If there is a class body of an anonymous class, consume that as child. 2552 if (FormatTok->is(tok::l_brace)) 2553 parseChildBlock(); 2554 return; 2555 } 2556 nextToken(); 2557 } while (!eof()); 2558 } 2559 2560 void UnwrappedLineParser::parseForOrWhileLoop() { 2561 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2562 "'for', 'while' or foreach macro expected"); 2563 nextToken(); 2564 // JS' for await ( ... 2565 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2566 nextToken(); 2567 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2568 nextToken(); 2569 if (FormatTok->Tok.is(tok::l_paren)) 2570 parseParens(); 2571 2572 keepAncestorBraces(); 2573 2574 if (FormatTok->Tok.is(tok::l_brace)) { 2575 FormatToken *LeftBrace = FormatTok; 2576 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2577 parseBlock(); 2578 if (Style.RemoveBracesLLVM) { 2579 assert(!NestedTooDeep.empty()); 2580 if (!NestedTooDeep.back()) 2581 markOptionalBraces(LeftBrace); 2582 } 2583 addUnwrappedLine(); 2584 } else { 2585 addUnwrappedLine(); 2586 ++Line->Level; 2587 parseStructuralElement(); 2588 --Line->Level; 2589 } 2590 2591 if (Style.RemoveBracesLLVM) 2592 NestedTooDeep.pop_back(); 2593 } 2594 2595 void UnwrappedLineParser::parseDoWhile() { 2596 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 2597 nextToken(); 2598 2599 keepAncestorBraces(); 2600 2601 if (FormatTok->Tok.is(tok::l_brace)) { 2602 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2603 parseBlock(); 2604 if (Style.BraceWrapping.BeforeWhile) 2605 addUnwrappedLine(); 2606 } else { 2607 addUnwrappedLine(); 2608 ++Line->Level; 2609 parseStructuralElement(); 2610 --Line->Level; 2611 } 2612 2613 if (Style.RemoveBracesLLVM) 2614 NestedTooDeep.pop_back(); 2615 2616 // FIXME: Add error handling. 2617 if (!FormatTok->Tok.is(tok::kw_while)) { 2618 addUnwrappedLine(); 2619 return; 2620 } 2621 2622 // If in Whitesmiths mode, the line with the while() needs to be indented 2623 // to the same level as the block. 2624 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2625 ++Line->Level; 2626 2627 nextToken(); 2628 parseStructuralElement(); 2629 } 2630 2631 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2632 nextToken(); 2633 unsigned OldLineLevel = Line->Level; 2634 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2635 --Line->Level; 2636 if (LeftAlignLabel) 2637 Line->Level = 0; 2638 2639 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2640 FormatTok->Tok.is(tok::l_brace)) { 2641 2642 CompoundStatementIndenter Indenter(this, Line->Level, 2643 Style.BraceWrapping.AfterCaseLabel, 2644 Style.BraceWrapping.IndentBraces); 2645 parseBlock(); 2646 if (FormatTok->Tok.is(tok::kw_break)) { 2647 if (Style.BraceWrapping.AfterControlStatement == 2648 FormatStyle::BWACS_Always) { 2649 addUnwrappedLine(); 2650 if (!Style.IndentCaseBlocks && 2651 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2652 ++Line->Level; 2653 } 2654 } 2655 parseStructuralElement(); 2656 } 2657 addUnwrappedLine(); 2658 } else { 2659 if (FormatTok->is(tok::semi)) 2660 nextToken(); 2661 addUnwrappedLine(); 2662 } 2663 Line->Level = OldLineLevel; 2664 if (FormatTok->isNot(tok::l_brace)) { 2665 parseStructuralElement(); 2666 addUnwrappedLine(); 2667 } 2668 } 2669 2670 void UnwrappedLineParser::parseCaseLabel() { 2671 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2672 2673 // FIXME: fix handling of complex expressions here. 2674 do { 2675 nextToken(); 2676 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2677 parseLabel(); 2678 } 2679 2680 void UnwrappedLineParser::parseSwitch() { 2681 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2682 nextToken(); 2683 if (FormatTok->Tok.is(tok::l_paren)) 2684 parseParens(); 2685 2686 keepAncestorBraces(); 2687 2688 if (FormatTok->Tok.is(tok::l_brace)) { 2689 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2690 parseBlock(); 2691 addUnwrappedLine(); 2692 } else { 2693 addUnwrappedLine(); 2694 ++Line->Level; 2695 parseStructuralElement(); 2696 --Line->Level; 2697 } 2698 2699 if (Style.RemoveBracesLLVM) 2700 NestedTooDeep.pop_back(); 2701 } 2702 2703 void UnwrappedLineParser::parseAccessSpecifier() { 2704 nextToken(); 2705 // Understand Qt's slots. 2706 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2707 nextToken(); 2708 // Otherwise, we don't know what it is, and we'd better keep the next token. 2709 if (FormatTok->Tok.is(tok::colon)) 2710 nextToken(); 2711 addUnwrappedLine(); 2712 } 2713 2714 void UnwrappedLineParser::parseConcept() { 2715 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); 2716 nextToken(); 2717 if (!FormatTok->Tok.is(tok::identifier)) 2718 return; 2719 nextToken(); 2720 if (!FormatTok->Tok.is(tok::equal)) 2721 return; 2722 nextToken(); 2723 if (FormatTok->Tok.is(tok::kw_requires)) { 2724 nextToken(); 2725 parseRequiresExpression(Line->Level); 2726 } else { 2727 parseConstraintExpression(Line->Level); 2728 } 2729 } 2730 2731 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) { 2732 // requires (R range) 2733 if (FormatTok->Tok.is(tok::l_paren)) { 2734 parseParens(); 2735 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2736 addUnwrappedLine(); 2737 --Line->Level; 2738 } 2739 } 2740 2741 if (FormatTok->Tok.is(tok::l_brace)) { 2742 if (Style.BraceWrapping.AfterFunction) 2743 addUnwrappedLine(); 2744 FormatTok->setType(TT_FunctionLBrace); 2745 parseBlock(); 2746 addUnwrappedLine(); 2747 } else { 2748 parseConstraintExpression(OriginalLevel); 2749 } 2750 } 2751 2752 void UnwrappedLineParser::parseConstraintExpression( 2753 unsigned int OriginalLevel) { 2754 // requires Id<T> && Id<T> || Id<T> 2755 while ( 2756 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) { 2757 nextToken(); 2758 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less, 2759 tok::greater, tok::comma, tok::ellipsis)) { 2760 if (FormatTok->Tok.is(tok::less)) { 2761 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2762 /*ClosingBraceKind=*/tok::greater); 2763 continue; 2764 } 2765 nextToken(); 2766 } 2767 if (FormatTok->Tok.is(tok::kw_requires)) { 2768 parseRequiresExpression(OriginalLevel); 2769 } 2770 if (FormatTok->Tok.is(tok::less)) { 2771 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2772 /*ClosingBraceKind=*/tok::greater); 2773 } 2774 2775 if (FormatTok->Tok.is(tok::l_paren)) { 2776 parseParens(); 2777 } 2778 if (FormatTok->Tok.is(tok::l_brace)) { 2779 if (Style.BraceWrapping.AfterFunction) 2780 addUnwrappedLine(); 2781 FormatTok->setType(TT_FunctionLBrace); 2782 parseBlock(); 2783 } 2784 if (FormatTok->Tok.is(tok::semi)) { 2785 // Eat any trailing semi. 2786 nextToken(); 2787 addUnwrappedLine(); 2788 } 2789 if (FormatTok->Tok.is(tok::colon)) { 2790 return; 2791 } 2792 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { 2793 if (FormatTok->Previous && 2794 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires, 2795 tok::coloncolon)) { 2796 addUnwrappedLine(); 2797 } 2798 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2799 --Line->Level; 2800 } 2801 break; 2802 } else { 2803 FormatTok->setType(TT_ConstraintJunctions); 2804 } 2805 2806 nextToken(); 2807 } 2808 } 2809 2810 void UnwrappedLineParser::parseRequires() { 2811 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); 2812 2813 unsigned OriginalLevel = Line->Level; 2814 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { 2815 addUnwrappedLine(); 2816 if (Style.IndentRequires) { 2817 ++Line->Level; 2818 } 2819 } 2820 nextToken(); 2821 2822 parseRequiresExpression(OriginalLevel); 2823 } 2824 2825 bool UnwrappedLineParser::parseEnum() { 2826 const FormatToken &InitialToken = *FormatTok; 2827 2828 // Won't be 'enum' for NS_ENUMs. 2829 if (FormatTok->Tok.is(tok::kw_enum)) 2830 nextToken(); 2831 2832 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2833 // declarations. An "enum" keyword followed by a colon would be a syntax 2834 // error and thus assume it is just an identifier. 2835 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 2836 return false; 2837 2838 // In protobuf, "enum" can be used as a field name. 2839 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2840 return false; 2841 2842 // Eat up enum class ... 2843 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2844 nextToken(); 2845 2846 while (FormatTok->Tok.getIdentifierInfo() || 2847 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2848 tok::greater, tok::comma, tok::question)) { 2849 nextToken(); 2850 // We can have macros or attributes in between 'enum' and the enum name. 2851 if (FormatTok->is(tok::l_paren)) 2852 parseParens(); 2853 if (FormatTok->is(tok::identifier)) { 2854 nextToken(); 2855 // If there are two identifiers in a row, this is likely an elaborate 2856 // return type. In Java, this can be "implements", etc. 2857 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2858 return false; 2859 } 2860 } 2861 2862 // Just a declaration or something is wrong. 2863 if (FormatTok->isNot(tok::l_brace)) 2864 return true; 2865 FormatTok->setBlockKind(BK_Block); 2866 2867 if (Style.Language == FormatStyle::LK_Java) { 2868 // Java enums are different. 2869 parseJavaEnumBody(); 2870 return true; 2871 } 2872 if (Style.Language == FormatStyle::LK_Proto) { 2873 parseBlock(/*MustBeDeclaration=*/true); 2874 return true; 2875 } 2876 2877 if (!Style.AllowShortEnumsOnASingleLine && 2878 ShouldBreakBeforeBrace(Style, InitialToken)) 2879 addUnwrappedLine(); 2880 // Parse enum body. 2881 nextToken(); 2882 if (!Style.AllowShortEnumsOnASingleLine) { 2883 addUnwrappedLine(); 2884 Line->Level += 1; 2885 } 2886 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 2887 /*IsEnum=*/true); 2888 if (!Style.AllowShortEnumsOnASingleLine) 2889 Line->Level -= 1; 2890 if (HasError) { 2891 if (FormatTok->is(tok::semi)) 2892 nextToken(); 2893 addUnwrappedLine(); 2894 } 2895 return true; 2896 2897 // There is no addUnwrappedLine() here so that we fall through to parsing a 2898 // structural element afterwards. Thus, in "enum A {} n, m;", 2899 // "} n, m;" will end up in one unwrapped line. 2900 } 2901 2902 bool UnwrappedLineParser::parseStructLike() { 2903 // parseRecord falls through and does not yet add an unwrapped line as a 2904 // record declaration or definition can start a structural element. 2905 parseRecord(); 2906 // This does not apply to Java, JavaScript and C#. 2907 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 2908 Style.isCSharp()) { 2909 if (FormatTok->is(tok::semi)) 2910 nextToken(); 2911 addUnwrappedLine(); 2912 return true; 2913 } 2914 return false; 2915 } 2916 2917 namespace { 2918 // A class used to set and restore the Token position when peeking 2919 // ahead in the token source. 2920 class ScopedTokenPosition { 2921 unsigned StoredPosition; 2922 FormatTokenSource *Tokens; 2923 2924 public: 2925 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 2926 assert(Tokens && "Tokens expected to not be null"); 2927 StoredPosition = Tokens->getPosition(); 2928 } 2929 2930 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 2931 }; 2932 } // namespace 2933 2934 // Look to see if we have [[ by looking ahead, if 2935 // its not then rewind to the original position. 2936 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 2937 ScopedTokenPosition AutoPosition(Tokens); 2938 FormatToken *Tok = Tokens->getNextToken(); 2939 // We already read the first [ check for the second. 2940 if (!Tok->is(tok::l_square)) { 2941 return false; 2942 } 2943 // Double check that the attribute is just something 2944 // fairly simple. 2945 while (Tok->isNot(tok::eof)) { 2946 if (Tok->is(tok::r_square)) { 2947 break; 2948 } 2949 Tok = Tokens->getNextToken(); 2950 } 2951 if (Tok->is(tok::eof)) 2952 return false; 2953 Tok = Tokens->getNextToken(); 2954 if (!Tok->is(tok::r_square)) { 2955 return false; 2956 } 2957 Tok = Tokens->getNextToken(); 2958 if (Tok->is(tok::semi)) { 2959 return false; 2960 } 2961 return true; 2962 } 2963 2964 void UnwrappedLineParser::parseJavaEnumBody() { 2965 // Determine whether the enum is simple, i.e. does not have a semicolon or 2966 // constants with class bodies. Simple enums can be formatted like braced 2967 // lists, contracted to a single line, etc. 2968 unsigned StoredPosition = Tokens->getPosition(); 2969 bool IsSimple = true; 2970 FormatToken *Tok = Tokens->getNextToken(); 2971 while (!Tok->is(tok::eof)) { 2972 if (Tok->is(tok::r_brace)) 2973 break; 2974 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2975 IsSimple = false; 2976 break; 2977 } 2978 // FIXME: This will also mark enums with braces in the arguments to enum 2979 // constants as "not simple". This is probably fine in practice, though. 2980 Tok = Tokens->getNextToken(); 2981 } 2982 FormatTok = Tokens->setPosition(StoredPosition); 2983 2984 if (IsSimple) { 2985 nextToken(); 2986 parseBracedList(); 2987 addUnwrappedLine(); 2988 return; 2989 } 2990 2991 // Parse the body of a more complex enum. 2992 // First add a line for everything up to the "{". 2993 nextToken(); 2994 addUnwrappedLine(); 2995 ++Line->Level; 2996 2997 // Parse the enum constants. 2998 while (FormatTok) { 2999 if (FormatTok->is(tok::l_brace)) { 3000 // Parse the constant's class body. 3001 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3002 /*MunchSemi=*/false); 3003 } else if (FormatTok->is(tok::l_paren)) { 3004 parseParens(); 3005 } else if (FormatTok->is(tok::comma)) { 3006 nextToken(); 3007 addUnwrappedLine(); 3008 } else if (FormatTok->is(tok::semi)) { 3009 nextToken(); 3010 addUnwrappedLine(); 3011 break; 3012 } else if (FormatTok->is(tok::r_brace)) { 3013 addUnwrappedLine(); 3014 break; 3015 } else { 3016 nextToken(); 3017 } 3018 } 3019 3020 // Parse the class body after the enum's ";" if any. 3021 parseLevel(/*HasOpeningBrace=*/true); 3022 nextToken(); 3023 --Line->Level; 3024 addUnwrappedLine(); 3025 } 3026 3027 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3028 const FormatToken &InitialToken = *FormatTok; 3029 nextToken(); 3030 3031 // The actual identifier can be a nested name specifier, and in macros 3032 // it is often token-pasted. 3033 // An [[attribute]] can be before the identifier. 3034 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3035 tok::kw___attribute, tok::kw___declspec, 3036 tok::kw_alignas, tok::l_square, tok::r_square) || 3037 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3038 FormatTok->isOneOf(tok::period, tok::comma))) { 3039 if (Style.isJavaScript() && 3040 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3041 // JavaScript/TypeScript supports inline object types in 3042 // extends/implements positions: 3043 // class Foo implements {bar: number} { } 3044 nextToken(); 3045 if (FormatTok->is(tok::l_brace)) { 3046 tryToParseBracedList(); 3047 continue; 3048 } 3049 } 3050 bool IsNonMacroIdentifier = 3051 FormatTok->is(tok::identifier) && 3052 FormatTok->TokenText != FormatTok->TokenText.upper(); 3053 nextToken(); 3054 // We can have macros or attributes in between 'class' and the class name. 3055 if (!IsNonMacroIdentifier) { 3056 if (FormatTok->Tok.is(tok::l_paren)) { 3057 parseParens(); 3058 } else if (FormatTok->is(TT_AttributeSquare)) { 3059 parseSquare(); 3060 // Consume the closing TT_AttributeSquare. 3061 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3062 nextToken(); 3063 } 3064 } 3065 } 3066 3067 // Note that parsing away template declarations here leads to incorrectly 3068 // accepting function declarations as record declarations. 3069 // In general, we cannot solve this problem. Consider: 3070 // class A<int> B() {} 3071 // which can be a function definition or a class definition when B() is a 3072 // macro. If we find enough real-world cases where this is a problem, we 3073 // can parse for the 'template' keyword in the beginning of the statement, 3074 // and thus rule out the record production in case there is no template 3075 // (this would still leave us with an ambiguity between template function 3076 // and class declarations). 3077 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3078 while (!eof()) { 3079 if (FormatTok->is(tok::l_brace)) { 3080 calculateBraceTypes(/*ExpectClassBody=*/true); 3081 if (!tryToParseBracedList()) 3082 break; 3083 } 3084 if (FormatTok->is(tok::l_square) && !tryToParseLambda()) 3085 break; 3086 if (FormatTok->Tok.is(tok::semi)) 3087 return; 3088 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3089 addUnwrappedLine(); 3090 nextToken(); 3091 parseCSharpGenericTypeConstraint(); 3092 break; 3093 } 3094 nextToken(); 3095 } 3096 } 3097 if (FormatTok->Tok.is(tok::l_brace)) { 3098 if (ParseAsExpr) { 3099 parseChildBlock(); 3100 } else { 3101 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3102 addUnwrappedLine(); 3103 3104 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3105 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3106 } 3107 } 3108 // There is no addUnwrappedLine() here so that we fall through to parsing a 3109 // structural element afterwards. Thus, in "class A {} n, m;", 3110 // "} n, m;" will end up in one unwrapped line. 3111 } 3112 3113 void UnwrappedLineParser::parseObjCMethod() { 3114 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 3115 "'(' or identifier expected."); 3116 do { 3117 if (FormatTok->Tok.is(tok::semi)) { 3118 nextToken(); 3119 addUnwrappedLine(); 3120 return; 3121 } else if (FormatTok->Tok.is(tok::l_brace)) { 3122 if (Style.BraceWrapping.AfterFunction) 3123 addUnwrappedLine(); 3124 parseBlock(); 3125 addUnwrappedLine(); 3126 return; 3127 } else { 3128 nextToken(); 3129 } 3130 } while (!eof()); 3131 } 3132 3133 void UnwrappedLineParser::parseObjCProtocolList() { 3134 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 3135 do { 3136 nextToken(); 3137 // Early exit in case someone forgot a close angle. 3138 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3139 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3140 return; 3141 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 3142 nextToken(); // Skip '>'. 3143 } 3144 3145 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3146 do { 3147 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 3148 nextToken(); 3149 addUnwrappedLine(); 3150 break; 3151 } 3152 if (FormatTok->is(tok::l_brace)) { 3153 parseBlock(); 3154 // In ObjC interfaces, nothing should be following the "}". 3155 addUnwrappedLine(); 3156 } else if (FormatTok->is(tok::r_brace)) { 3157 // Ignore stray "}". parseStructuralElement doesn't consume them. 3158 nextToken(); 3159 addUnwrappedLine(); 3160 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3161 nextToken(); 3162 parseObjCMethod(); 3163 } else { 3164 parseStructuralElement(); 3165 } 3166 } while (!eof()); 3167 } 3168 3169 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3170 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3171 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3172 nextToken(); 3173 nextToken(); // interface name 3174 3175 // @interface can be followed by a lightweight generic 3176 // specialization list, then either a base class or a category. 3177 if (FormatTok->Tok.is(tok::less)) { 3178 parseObjCLightweightGenerics(); 3179 } 3180 if (FormatTok->Tok.is(tok::colon)) { 3181 nextToken(); 3182 nextToken(); // base class name 3183 // The base class can also have lightweight generics applied to it. 3184 if (FormatTok->Tok.is(tok::less)) { 3185 parseObjCLightweightGenerics(); 3186 } 3187 } else if (FormatTok->Tok.is(tok::l_paren)) 3188 // Skip category, if present. 3189 parseParens(); 3190 3191 if (FormatTok->Tok.is(tok::less)) 3192 parseObjCProtocolList(); 3193 3194 if (FormatTok->Tok.is(tok::l_brace)) { 3195 if (Style.BraceWrapping.AfterObjCDeclaration) 3196 addUnwrappedLine(); 3197 parseBlock(/*MustBeDeclaration=*/true); 3198 } 3199 3200 // With instance variables, this puts '}' on its own line. Without instance 3201 // variables, this ends the @interface line. 3202 addUnwrappedLine(); 3203 3204 parseObjCUntilAtEnd(); 3205 } 3206 3207 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3208 assert(FormatTok->Tok.is(tok::less)); 3209 // Unlike protocol lists, generic parameterizations support 3210 // nested angles: 3211 // 3212 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3213 // NSObject <NSCopying, NSSecureCoding> 3214 // 3215 // so we need to count how many open angles we have left. 3216 unsigned NumOpenAngles = 1; 3217 do { 3218 nextToken(); 3219 // Early exit in case someone forgot a close angle. 3220 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3221 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3222 break; 3223 if (FormatTok->Tok.is(tok::less)) 3224 ++NumOpenAngles; 3225 else if (FormatTok->Tok.is(tok::greater)) { 3226 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3227 --NumOpenAngles; 3228 } 3229 } while (!eof() && NumOpenAngles != 0); 3230 nextToken(); // Skip '>'. 3231 } 3232 3233 // Returns true for the declaration/definition form of @protocol, 3234 // false for the expression form. 3235 bool UnwrappedLineParser::parseObjCProtocol() { 3236 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3237 nextToken(); 3238 3239 if (FormatTok->is(tok::l_paren)) 3240 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3241 return false; 3242 3243 // The definition/declaration form, 3244 // @protocol Foo 3245 // - (int)someMethod; 3246 // @end 3247 3248 nextToken(); // protocol name 3249 3250 if (FormatTok->Tok.is(tok::less)) 3251 parseObjCProtocolList(); 3252 3253 // Check for protocol declaration. 3254 if (FormatTok->Tok.is(tok::semi)) { 3255 nextToken(); 3256 addUnwrappedLine(); 3257 return true; 3258 } 3259 3260 addUnwrappedLine(); 3261 parseObjCUntilAtEnd(); 3262 return true; 3263 } 3264 3265 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3266 bool IsImport = FormatTok->is(Keywords.kw_import); 3267 assert(IsImport || FormatTok->is(tok::kw_export)); 3268 nextToken(); 3269 3270 // Consume the "default" in "export default class/function". 3271 if (FormatTok->is(tok::kw_default)) 3272 nextToken(); 3273 3274 // Consume "async function", "function" and "default function", so that these 3275 // get parsed as free-standing JS functions, i.e. do not require a trailing 3276 // semicolon. 3277 if (FormatTok->is(Keywords.kw_async)) 3278 nextToken(); 3279 if (FormatTok->is(Keywords.kw_function)) { 3280 nextToken(); 3281 return; 3282 } 3283 3284 // For imports, `export *`, `export {...}`, consume the rest of the line up 3285 // to the terminating `;`. For everything else, just return and continue 3286 // parsing the structural element, i.e. the declaration or expression for 3287 // `export default`. 3288 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3289 !FormatTok->isStringLiteral()) 3290 return; 3291 3292 while (!eof()) { 3293 if (FormatTok->is(tok::semi)) 3294 return; 3295 if (Line->Tokens.empty()) { 3296 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3297 // import statement should terminate. 3298 return; 3299 } 3300 if (FormatTok->is(tok::l_brace)) { 3301 FormatTok->setBlockKind(BK_Block); 3302 nextToken(); 3303 parseBracedList(); 3304 } else { 3305 nextToken(); 3306 } 3307 } 3308 } 3309 3310 void UnwrappedLineParser::parseStatementMacro() { 3311 nextToken(); 3312 if (FormatTok->is(tok::l_paren)) 3313 parseParens(); 3314 if (FormatTok->is(tok::semi)) 3315 nextToken(); 3316 addUnwrappedLine(); 3317 } 3318 3319 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3320 StringRef Prefix = "") { 3321 llvm::dbgs() << Prefix << "Line(" << Line.Level 3322 << ", FSC=" << Line.FirstStartColumn << ")" 3323 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3324 for (const auto &Node : Line.Tokens) { 3325 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3326 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3327 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3328 } 3329 for (const auto &Node : Line.Tokens) 3330 for (const auto &ChildNode : Node.Children) 3331 printDebugInfo(ChildNode, "\nChild: "); 3332 3333 llvm::dbgs() << "\n"; 3334 } 3335 3336 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3337 if (Line->Tokens.empty()) 3338 return; 3339 LLVM_DEBUG({ 3340 if (CurrentLines == &Lines) 3341 printDebugInfo(*Line); 3342 }); 3343 3344 // If this line closes a block when in Whitesmiths mode, remember that 3345 // information so that the level can be decreased after the line is added. 3346 // This has to happen after the addition of the line since the line itself 3347 // needs to be indented. 3348 bool ClosesWhitesmithsBlock = 3349 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3350 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3351 3352 CurrentLines->push_back(std::move(*Line)); 3353 Line->Tokens.clear(); 3354 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3355 Line->FirstStartColumn = 0; 3356 3357 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3358 --Line->Level; 3359 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3360 CurrentLines->append( 3361 std::make_move_iterator(PreprocessorDirectives.begin()), 3362 std::make_move_iterator(PreprocessorDirectives.end())); 3363 PreprocessorDirectives.clear(); 3364 } 3365 // Disconnect the current token from the last token on the previous line. 3366 FormatTok->Previous = nullptr; 3367 } 3368 3369 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 3370 3371 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3372 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3373 FormatTok.NewlinesBefore > 0; 3374 } 3375 3376 // Checks if \p FormatTok is a line comment that continues the line comment 3377 // section on \p Line. 3378 static bool 3379 continuesLineCommentSection(const FormatToken &FormatTok, 3380 const UnwrappedLine &Line, 3381 const llvm::Regex &CommentPragmasRegex) { 3382 if (Line.Tokens.empty()) 3383 return false; 3384 3385 StringRef IndentContent = FormatTok.TokenText; 3386 if (FormatTok.TokenText.startswith("//") || 3387 FormatTok.TokenText.startswith("/*")) 3388 IndentContent = FormatTok.TokenText.substr(2); 3389 if (CommentPragmasRegex.match(IndentContent)) 3390 return false; 3391 3392 // If Line starts with a line comment, then FormatTok continues the comment 3393 // section if its original column is greater or equal to the original start 3394 // column of the line. 3395 // 3396 // Define the min column token of a line as follows: if a line ends in '{' or 3397 // contains a '{' followed by a line comment, then the min column token is 3398 // that '{'. Otherwise, the min column token of the line is the first token of 3399 // the line. 3400 // 3401 // If Line starts with a token other than a line comment, then FormatTok 3402 // continues the comment section if its original column is greater than the 3403 // original start column of the min column token of the line. 3404 // 3405 // For example, the second line comment continues the first in these cases: 3406 // 3407 // // first line 3408 // // second line 3409 // 3410 // and: 3411 // 3412 // // first line 3413 // // second line 3414 // 3415 // and: 3416 // 3417 // int i; // first line 3418 // // second line 3419 // 3420 // and: 3421 // 3422 // do { // first line 3423 // // second line 3424 // int i; 3425 // } while (true); 3426 // 3427 // and: 3428 // 3429 // enum { 3430 // a, // first line 3431 // // second line 3432 // b 3433 // }; 3434 // 3435 // The second line comment doesn't continue the first in these cases: 3436 // 3437 // // first line 3438 // // second line 3439 // 3440 // and: 3441 // 3442 // int i; // first line 3443 // // second line 3444 // 3445 // and: 3446 // 3447 // do { // first line 3448 // // second line 3449 // int i; 3450 // } while (true); 3451 // 3452 // and: 3453 // 3454 // enum { 3455 // a, // first line 3456 // // second line 3457 // }; 3458 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3459 3460 // Scan for '{//'. If found, use the column of '{' as a min column for line 3461 // comment section continuation. 3462 const FormatToken *PreviousToken = nullptr; 3463 for (const UnwrappedLineNode &Node : Line.Tokens) { 3464 if (PreviousToken && PreviousToken->is(tok::l_brace) && 3465 isLineComment(*Node.Tok)) { 3466 MinColumnToken = PreviousToken; 3467 break; 3468 } 3469 PreviousToken = Node.Tok; 3470 3471 // Grab the last newline preceding a token in this unwrapped line. 3472 if (Node.Tok->NewlinesBefore > 0) { 3473 MinColumnToken = Node.Tok; 3474 } 3475 } 3476 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 3477 MinColumnToken = PreviousToken; 3478 } 3479 3480 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 3481 MinColumnToken); 3482 } 3483 3484 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 3485 bool JustComments = Line->Tokens.empty(); 3486 for (FormatToken *Tok : CommentsBeforeNextToken) { 3487 // Line comments that belong to the same line comment section are put on the 3488 // same line since later we might want to reflow content between them. 3489 // Additional fine-grained breaking of line comment sections is controlled 3490 // by the class BreakableLineCommentSection in case it is desirable to keep 3491 // several line comment sections in the same unwrapped line. 3492 // 3493 // FIXME: Consider putting separate line comment sections as children to the 3494 // unwrapped line instead. 3495 Tok->ContinuesLineCommentSection = 3496 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 3497 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 3498 addUnwrappedLine(); 3499 pushToken(Tok); 3500 } 3501 if (NewlineBeforeNext && JustComments) 3502 addUnwrappedLine(); 3503 CommentsBeforeNextToken.clear(); 3504 } 3505 3506 void UnwrappedLineParser::nextToken(int LevelDifference) { 3507 if (eof()) 3508 return; 3509 flushComments(isOnNewLine(*FormatTok)); 3510 pushToken(FormatTok); 3511 FormatToken *Previous = FormatTok; 3512 if (!Style.isJavaScript()) 3513 readToken(LevelDifference); 3514 else 3515 readTokenWithJavaScriptASI(); 3516 FormatTok->Previous = Previous; 3517 } 3518 3519 void UnwrappedLineParser::distributeComments( 3520 const SmallVectorImpl<FormatToken *> &Comments, 3521 const FormatToken *NextTok) { 3522 // Whether or not a line comment token continues a line is controlled by 3523 // the method continuesLineCommentSection, with the following caveat: 3524 // 3525 // Define a trail of Comments to be a nonempty proper postfix of Comments such 3526 // that each comment line from the trail is aligned with the next token, if 3527 // the next token exists. If a trail exists, the beginning of the maximal 3528 // trail is marked as a start of a new comment section. 3529 // 3530 // For example in this code: 3531 // 3532 // int a; // line about a 3533 // // line 1 about b 3534 // // line 2 about b 3535 // int b; 3536 // 3537 // the two lines about b form a maximal trail, so there are two sections, the 3538 // first one consisting of the single comment "// line about a" and the 3539 // second one consisting of the next two comments. 3540 if (Comments.empty()) 3541 return; 3542 bool ShouldPushCommentsInCurrentLine = true; 3543 bool HasTrailAlignedWithNextToken = false; 3544 unsigned StartOfTrailAlignedWithNextToken = 0; 3545 if (NextTok) { 3546 // We are skipping the first element intentionally. 3547 for (unsigned i = Comments.size() - 1; i > 0; --i) { 3548 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 3549 HasTrailAlignedWithNextToken = true; 3550 StartOfTrailAlignedWithNextToken = i; 3551 } 3552 } 3553 } 3554 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 3555 FormatToken *FormatTok = Comments[i]; 3556 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 3557 FormatTok->ContinuesLineCommentSection = false; 3558 } else { 3559 FormatTok->ContinuesLineCommentSection = 3560 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 3561 } 3562 if (!FormatTok->ContinuesLineCommentSection && 3563 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 3564 ShouldPushCommentsInCurrentLine = false; 3565 } 3566 if (ShouldPushCommentsInCurrentLine) { 3567 pushToken(FormatTok); 3568 } else { 3569 CommentsBeforeNextToken.push_back(FormatTok); 3570 } 3571 } 3572 } 3573 3574 void UnwrappedLineParser::readToken(int LevelDifference) { 3575 SmallVector<FormatToken *, 1> Comments; 3576 do { 3577 FormatTok = Tokens->getNextToken(); 3578 assert(FormatTok); 3579 while (FormatTok->getType() == TT_ConflictStart || 3580 FormatTok->getType() == TT_ConflictEnd || 3581 FormatTok->getType() == TT_ConflictAlternative) { 3582 if (FormatTok->getType() == TT_ConflictStart) { 3583 conditionalCompilationStart(/*Unreachable=*/false); 3584 } else if (FormatTok->getType() == TT_ConflictAlternative) { 3585 conditionalCompilationAlternative(); 3586 } else if (FormatTok->getType() == TT_ConflictEnd) { 3587 conditionalCompilationEnd(); 3588 } 3589 FormatTok = Tokens->getNextToken(); 3590 FormatTok->MustBreakBefore = true; 3591 } 3592 3593 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 3594 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 3595 distributeComments(Comments, FormatTok); 3596 Comments.clear(); 3597 // If there is an unfinished unwrapped line, we flush the preprocessor 3598 // directives only after that unwrapped line was finished later. 3599 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 3600 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 3601 assert((LevelDifference >= 0 || 3602 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 3603 "LevelDifference makes Line->Level negative"); 3604 Line->Level += LevelDifference; 3605 // Comments stored before the preprocessor directive need to be output 3606 // before the preprocessor directive, at the same level as the 3607 // preprocessor directive, as we consider them to apply to the directive. 3608 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 3609 PPBranchLevel > 0) 3610 Line->Level += PPBranchLevel; 3611 flushComments(isOnNewLine(*FormatTok)); 3612 parsePPDirective(); 3613 } 3614 3615 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 3616 !Line->InPPDirective) { 3617 continue; 3618 } 3619 3620 if (!FormatTok->Tok.is(tok::comment)) { 3621 distributeComments(Comments, FormatTok); 3622 Comments.clear(); 3623 return; 3624 } 3625 3626 Comments.push_back(FormatTok); 3627 } while (!eof()); 3628 3629 distributeComments(Comments, nullptr); 3630 Comments.clear(); 3631 } 3632 3633 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 3634 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 3635 if (MustBreakBeforeNextToken) { 3636 Line->Tokens.back().Tok->MustBreakBefore = true; 3637 MustBreakBeforeNextToken = false; 3638 } 3639 } 3640 3641 } // end namespace format 3642 } // end namespace clang 3643