1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-parser" 25 26 namespace clang { 27 namespace format { 28 29 class FormatTokenSource { 30 public: 31 virtual ~FormatTokenSource() {} 32 33 // Returns the next token in the token stream. 34 virtual FormatToken *getNextToken() = 0; 35 36 // Returns the token preceding the token returned by the last call to 37 // getNextToken() in the token stream, or nullptr if no such token exists. 38 virtual FormatToken *getPreviousToken() = 0; 39 40 // Returns the token that would be returned by the next call to 41 // getNextToken(). 42 virtual FormatToken *peekNextToken() = 0; 43 44 // Returns whether we are at the end of the file. 45 // This can be different from whether getNextToken() returned an eof token 46 // when the FormatTokenSource is a view on a part of the token stream. 47 virtual bool isEOF() = 0; 48 49 // Gets the current position in the token stream, to be used by setPosition(). 50 virtual unsigned getPosition() = 0; 51 52 // Resets the token stream to the state it was in when getPosition() returned 53 // Position, and return the token at that position in the stream. 54 virtual FormatToken *setPosition(unsigned Position) = 0; 55 }; 56 57 namespace { 58 59 class ScopedDeclarationState { 60 public: 61 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 62 bool MustBeDeclaration) 63 : Line(Line), Stack(Stack) { 64 Line.MustBeDeclaration = MustBeDeclaration; 65 Stack.push_back(MustBeDeclaration); 66 } 67 ~ScopedDeclarationState() { 68 Stack.pop_back(); 69 if (!Stack.empty()) 70 Line.MustBeDeclaration = Stack.back(); 71 else 72 Line.MustBeDeclaration = true; 73 } 74 75 private: 76 UnwrappedLine &Line; 77 llvm::BitVector &Stack; 78 }; 79 80 static bool isLineComment(const FormatToken &FormatTok) { 81 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 82 } 83 84 // Checks if \p FormatTok is a line comment that continues the line comment 85 // \p Previous. The original column of \p MinColumnToken is used to determine 86 // whether \p FormatTok is indented enough to the right to continue \p Previous. 87 static bool continuesLineComment(const FormatToken &FormatTok, 88 const FormatToken *Previous, 89 const FormatToken *MinColumnToken) { 90 if (!Previous || !MinColumnToken) 91 return false; 92 unsigned MinContinueColumn = 93 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 94 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 95 isLineComment(*Previous) && 96 FormatTok.OriginalColumn >= MinContinueColumn; 97 } 98 99 class ScopedMacroState : public FormatTokenSource { 100 public: 101 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 102 FormatToken *&ResetToken) 103 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 104 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 105 Token(nullptr), PreviousToken(nullptr) { 106 FakeEOF.Tok.startToken(); 107 FakeEOF.Tok.setKind(tok::eof); 108 TokenSource = this; 109 Line.Level = 0; 110 Line.InPPDirective = true; 111 } 112 113 ~ScopedMacroState() override { 114 TokenSource = PreviousTokenSource; 115 ResetToken = Token; 116 Line.InPPDirective = false; 117 Line.Level = PreviousLineLevel; 118 } 119 120 FormatToken *getNextToken() override { 121 // The \c UnwrappedLineParser guards against this by never calling 122 // \c getNextToken() after it has encountered the first eof token. 123 assert(!eof()); 124 PreviousToken = Token; 125 Token = PreviousTokenSource->getNextToken(); 126 if (eof()) 127 return &FakeEOF; 128 return Token; 129 } 130 131 FormatToken *getPreviousToken() override { 132 return PreviousTokenSource->getPreviousToken(); 133 } 134 135 FormatToken *peekNextToken() override { 136 if (eof()) 137 return &FakeEOF; 138 return PreviousTokenSource->peekNextToken(); 139 } 140 141 bool isEOF() override { return PreviousTokenSource->isEOF(); } 142 143 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 144 145 FormatToken *setPosition(unsigned Position) override { 146 PreviousToken = nullptr; 147 Token = PreviousTokenSource->setPosition(Position); 148 return Token; 149 } 150 151 private: 152 bool eof() { 153 return Token && Token->HasUnescapedNewline && 154 !continuesLineComment(*Token, PreviousToken, 155 /*MinColumnToken=*/PreviousToken); 156 } 157 158 FormatToken FakeEOF; 159 UnwrappedLine &Line; 160 FormatTokenSource *&TokenSource; 161 FormatToken *&ResetToken; 162 unsigned PreviousLineLevel; 163 FormatTokenSource *PreviousTokenSource; 164 165 FormatToken *Token; 166 FormatToken *PreviousToken; 167 }; 168 169 } // end anonymous namespace 170 171 class ScopedLineState { 172 public: 173 ScopedLineState(UnwrappedLineParser &Parser, 174 bool SwitchToPreprocessorLines = false) 175 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 176 if (SwitchToPreprocessorLines) 177 Parser.CurrentLines = &Parser.PreprocessorDirectives; 178 else if (!Parser.Line->Tokens.empty()) 179 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 180 PreBlockLine = std::move(Parser.Line); 181 Parser.Line = std::make_unique<UnwrappedLine>(); 182 Parser.Line->Level = PreBlockLine->Level; 183 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 184 } 185 186 ~ScopedLineState() { 187 if (!Parser.Line->Tokens.empty()) { 188 Parser.addUnwrappedLine(); 189 } 190 assert(Parser.Line->Tokens.empty()); 191 Parser.Line = std::move(PreBlockLine); 192 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 193 Parser.MustBreakBeforeNextToken = true; 194 Parser.CurrentLines = OriginalLines; 195 } 196 197 private: 198 UnwrappedLineParser &Parser; 199 200 std::unique_ptr<UnwrappedLine> PreBlockLine; 201 SmallVectorImpl<UnwrappedLine> *OriginalLines; 202 }; 203 204 class CompoundStatementIndenter { 205 public: 206 CompoundStatementIndenter(UnwrappedLineParser *Parser, 207 const FormatStyle &Style, unsigned &LineLevel) 208 : CompoundStatementIndenter(Parser, LineLevel, 209 Style.BraceWrapping.AfterControlStatement, 210 Style.BraceWrapping.IndentBraces) {} 211 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 212 bool WrapBrace, bool IndentBrace) 213 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 214 if (WrapBrace) 215 Parser->addUnwrappedLine(); 216 if (IndentBrace) 217 ++LineLevel; 218 } 219 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 220 221 private: 222 unsigned &LineLevel; 223 unsigned OldLineLevel; 224 }; 225 226 namespace { 227 228 class IndexedTokenSource : public FormatTokenSource { 229 public: 230 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 231 : Tokens(Tokens), Position(-1) {} 232 233 FormatToken *getNextToken() override { 234 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 235 LLVM_DEBUG({ 236 llvm::dbgs() << "Next "; 237 dbgToken(Position); 238 }); 239 return Tokens[Position]; 240 } 241 ++Position; 242 LLVM_DEBUG({ 243 llvm::dbgs() << "Next "; 244 dbgToken(Position); 245 }); 246 return Tokens[Position]; 247 } 248 249 FormatToken *getPreviousToken() override { 250 return Position > 0 ? Tokens[Position - 1] : nullptr; 251 } 252 253 FormatToken *peekNextToken() override { 254 int Next = Position + 1; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Peeking "; 257 dbgToken(Next); 258 }); 259 return Tokens[Next]; 260 } 261 262 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 263 264 unsigned getPosition() override { 265 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 266 assert(Position >= 0); 267 return Position; 268 } 269 270 FormatToken *setPosition(unsigned P) override { 271 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 272 Position = P; 273 return Tokens[Position]; 274 } 275 276 void reset() { Position = -1; } 277 278 private: 279 void dbgToken(int Position, llvm::StringRef Indent = "") { 280 FormatToken *Tok = Tokens[Position]; 281 llvm::dbgs() << Indent << "[" << Position 282 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 283 << ", Macro: " << !!Tok->MacroCtx << "\n"; 284 } 285 286 ArrayRef<FormatToken *> Tokens; 287 int Position; 288 }; 289 290 } // end anonymous namespace 291 292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 293 const AdditionalKeywords &Keywords, 294 unsigned FirstStartColumn, 295 ArrayRef<FormatToken *> Tokens, 296 UnwrappedLineConsumer &Callback) 297 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 298 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 299 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 300 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 301 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 302 ? IG_Rejected 303 : IG_Inited), 304 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 305 306 void UnwrappedLineParser::reset() { 307 PPBranchLevel = -1; 308 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 309 ? IG_Rejected 310 : IG_Inited; 311 IncludeGuardToken = nullptr; 312 Line.reset(new UnwrappedLine); 313 CommentsBeforeNextToken.clear(); 314 FormatTok = nullptr; 315 MustBreakBeforeNextToken = false; 316 PreprocessorDirectives.clear(); 317 CurrentLines = &Lines; 318 DeclarationScopeStack.clear(); 319 NestedTooDeep.clear(); 320 PPStack.clear(); 321 Line->FirstStartColumn = FirstStartColumn; 322 } 323 324 void UnwrappedLineParser::parse() { 325 IndexedTokenSource TokenSource(AllTokens); 326 Line->FirstStartColumn = FirstStartColumn; 327 do { 328 LLVM_DEBUG(llvm::dbgs() << "----\n"); 329 reset(); 330 Tokens = &TokenSource; 331 TokenSource.reset(); 332 333 readToken(); 334 parseFile(); 335 336 // If we found an include guard then all preprocessor directives (other than 337 // the guard) are over-indented by one. 338 if (IncludeGuard == IG_Found) 339 for (auto &Line : Lines) 340 if (Line.InPPDirective && Line.Level > 0) 341 --Line.Level; 342 343 // Create line with eof token. 344 pushToken(FormatTok); 345 addUnwrappedLine(); 346 347 for (const UnwrappedLine &Line : Lines) 348 Callback.consumeUnwrappedLine(Line); 349 350 Callback.finishRun(); 351 Lines.clear(); 352 while (!PPLevelBranchIndex.empty() && 353 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 354 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 355 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 356 } 357 if (!PPLevelBranchIndex.empty()) { 358 ++PPLevelBranchIndex.back(); 359 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 360 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 361 } 362 } while (!PPLevelBranchIndex.empty()); 363 } 364 365 void UnwrappedLineParser::parseFile() { 366 // The top-level context in a file always has declarations, except for pre- 367 // processor directives and JavaScript files. 368 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 369 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 370 MustBeDeclaration); 371 if (Style.Language == FormatStyle::LK_TextProto) 372 parseBracedList(); 373 else 374 parseLevel(/*HasOpeningBrace=*/false); 375 // Make sure to format the remaining tokens. 376 // 377 // LK_TextProto is special since its top-level is parsed as the body of a 378 // braced list, which does not necessarily have natural line separators such 379 // as a semicolon. Comments after the last entry that have been determined to 380 // not belong to that line, as in: 381 // key: value 382 // // endfile comment 383 // do not have a chance to be put on a line of their own until this point. 384 // Here we add this newline before end-of-file comments. 385 if (Style.Language == FormatStyle::LK_TextProto && 386 !CommentsBeforeNextToken.empty()) 387 addUnwrappedLine(); 388 flushComments(true); 389 addUnwrappedLine(); 390 } 391 392 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 393 do { 394 switch (FormatTok->Tok.getKind()) { 395 case tok::l_brace: 396 return; 397 default: 398 if (FormatTok->is(Keywords.kw_where)) { 399 addUnwrappedLine(); 400 nextToken(); 401 parseCSharpGenericTypeConstraint(); 402 break; 403 } 404 nextToken(); 405 break; 406 } 407 } while (!eof()); 408 } 409 410 void UnwrappedLineParser::parseCSharpAttribute() { 411 int UnpairedSquareBrackets = 1; 412 do { 413 switch (FormatTok->Tok.getKind()) { 414 case tok::r_square: 415 nextToken(); 416 --UnpairedSquareBrackets; 417 if (UnpairedSquareBrackets == 0) { 418 addUnwrappedLine(); 419 return; 420 } 421 break; 422 case tok::l_square: 423 ++UnpairedSquareBrackets; 424 nextToken(); 425 break; 426 default: 427 nextToken(); 428 break; 429 } 430 } while (!eof()); 431 } 432 433 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 434 if (!Lines.empty() && Lines.back().InPPDirective) 435 return true; 436 437 const FormatToken *Previous = Tokens->getPreviousToken(); 438 return Previous && Previous->is(tok::comment) && 439 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 440 } 441 442 bool UnwrappedLineParser::mightFitOnOneLine() const { 443 const auto ColumnLimit = Style.ColumnLimit; 444 if (ColumnLimit == 0) 445 return true; 446 447 if (Lines.empty()) 448 return true; 449 450 const auto &PreviousLine = Lines.back(); 451 const auto &Tokens = PreviousLine.Tokens; 452 assert(!Tokens.empty()); 453 const auto *LastToken = Tokens.back().Tok; 454 assert(LastToken); 455 if (!LastToken->isOneOf(tok::semi, tok::comment)) 456 return true; 457 458 AnnotatedLine Line(PreviousLine); 459 assert(Line.Last == LastToken); 460 461 TokenAnnotator Annotator(Style, Keywords); 462 Annotator.annotate(Line); 463 Annotator.calculateFormattingInformation(Line); 464 465 return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit; 466 } 467 468 // Returns true if a simple block, or false otherwise. (A simple block has a 469 // single statement that fits on a single line.) 470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) { 471 const bool IsPrecededByCommentOrPPDirective = 472 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 473 unsigned StatementCount = 0; 474 bool SwitchLabelEncountered = false; 475 do { 476 tok::TokenKind kind = FormatTok->Tok.getKind(); 477 if (FormatTok->getType() == TT_MacroBlockBegin) { 478 kind = tok::l_brace; 479 } else if (FormatTok->getType() == TT_MacroBlockEnd) { 480 kind = tok::r_brace; 481 } 482 483 switch (kind) { 484 case tok::comment: 485 nextToken(); 486 addUnwrappedLine(); 487 break; 488 case tok::l_brace: 489 // FIXME: Add parameter whether this can happen - if this happens, we must 490 // be in a non-declaration context. 491 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 492 continue; 493 parseBlock(); 494 ++StatementCount; 495 assert(StatementCount > 0 && "StatementCount overflow!"); 496 addUnwrappedLine(); 497 break; 498 case tok::r_brace: 499 if (HasOpeningBrace) { 500 if (!Style.RemoveBracesLLVM) 501 return false; 502 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || 503 IsPrecededByCommentOrPPDirective || 504 precededByCommentOrPPDirective()) { 505 return false; 506 } 507 const FormatToken *Next = Tokens->peekNextToken(); 508 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 509 return false; 510 return mightFitOnOneLine(); 511 } 512 nextToken(); 513 addUnwrappedLine(); 514 break; 515 case tok::kw_default: { 516 unsigned StoredPosition = Tokens->getPosition(); 517 FormatToken *Next; 518 do { 519 Next = Tokens->getNextToken(); 520 } while (Next->is(tok::comment)); 521 FormatTok = Tokens->setPosition(StoredPosition); 522 if (Next && Next->isNot(tok::colon)) { 523 // default not followed by ':' is not a case label; treat it like 524 // an identifier. 525 parseStructuralElement(); 526 break; 527 } 528 // Else, if it is 'default:', fall through to the case handling. 529 LLVM_FALLTHROUGH; 530 } 531 case tok::kw_case: 532 if (Style.isJavaScript() && Line->MustBeDeclaration) { 533 // A 'case: string' style field declaration. 534 parseStructuralElement(); 535 break; 536 } 537 if (!SwitchLabelEncountered && 538 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 539 ++Line->Level; 540 SwitchLabelEncountered = true; 541 parseStructuralElement(); 542 break; 543 case tok::l_square: 544 if (Style.isCSharp()) { 545 nextToken(); 546 parseCSharpAttribute(); 547 break; 548 } 549 LLVM_FALLTHROUGH; 550 default: 551 parseStructuralElement(IfKind, !HasOpeningBrace); 552 ++StatementCount; 553 assert(StatementCount > 0 && "StatementCount overflow!"); 554 break; 555 } 556 } while (!eof()); 557 return false; 558 } 559 560 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 561 // We'll parse forward through the tokens until we hit 562 // a closing brace or eof - note that getNextToken() will 563 // parse macros, so this will magically work inside macro 564 // definitions, too. 565 unsigned StoredPosition = Tokens->getPosition(); 566 FormatToken *Tok = FormatTok; 567 const FormatToken *PrevTok = Tok->Previous; 568 // Keep a stack of positions of lbrace tokens. We will 569 // update information about whether an lbrace starts a 570 // braced init list or a different block during the loop. 571 SmallVector<FormatToken *, 8> LBraceStack; 572 assert(Tok->Tok.is(tok::l_brace)); 573 do { 574 // Get next non-comment token. 575 FormatToken *NextTok; 576 unsigned ReadTokens = 0; 577 do { 578 NextTok = Tokens->getNextToken(); 579 ++ReadTokens; 580 } while (NextTok->is(tok::comment)); 581 582 switch (Tok->Tok.getKind()) { 583 case tok::l_brace: 584 if (Style.isJavaScript() && PrevTok) { 585 if (PrevTok->isOneOf(tok::colon, tok::less)) 586 // A ':' indicates this code is in a type, or a braced list 587 // following a label in an object literal ({a: {b: 1}}). 588 // A '<' could be an object used in a comparison, but that is nonsense 589 // code (can never return true), so more likely it is a generic type 590 // argument (`X<{a: string; b: number}>`). 591 // The code below could be confused by semicolons between the 592 // individual members in a type member list, which would normally 593 // trigger BK_Block. In both cases, this must be parsed as an inline 594 // braced init. 595 Tok->setBlockKind(BK_BracedInit); 596 else if (PrevTok->is(tok::r_paren)) 597 // `) { }` can only occur in function or method declarations in JS. 598 Tok->setBlockKind(BK_Block); 599 } else { 600 Tok->setBlockKind(BK_Unknown); 601 } 602 LBraceStack.push_back(Tok); 603 break; 604 case tok::r_brace: 605 if (LBraceStack.empty()) 606 break; 607 if (LBraceStack.back()->is(BK_Unknown)) { 608 bool ProbablyBracedList = false; 609 if (Style.Language == FormatStyle::LK_Proto) { 610 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 611 } else { 612 // Skip NextTok over preprocessor lines, otherwise we may not 613 // properly diagnose the block as a braced intializer 614 // if the comma separator appears after the pp directive. 615 while (NextTok->is(tok::hash)) { 616 ScopedMacroState MacroState(*Line, Tokens, NextTok); 617 do { 618 NextTok = Tokens->getNextToken(); 619 ++ReadTokens; 620 } while (NextTok->isNot(tok::eof)); 621 } 622 623 // Using OriginalColumn to distinguish between ObjC methods and 624 // binary operators is a bit hacky. 625 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 626 NextTok->OriginalColumn == 0; 627 628 // If there is a comma, semicolon or right paren after the closing 629 // brace, we assume this is a braced initializer list. Note that 630 // regardless how we mark inner braces here, we will overwrite the 631 // BlockKind later if we parse a braced list (where all blocks 632 // inside are by default braced lists), or when we explicitly detect 633 // blocks (for example while parsing lambdas). 634 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 635 // braced list in JS. 636 ProbablyBracedList = 637 (Style.isJavaScript() && 638 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 639 Keywords.kw_as)) || 640 (Style.isCpp() && NextTok->is(tok::l_paren)) || 641 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 642 tok::r_paren, tok::r_square, tok::l_brace, 643 tok::ellipsis) || 644 (NextTok->is(tok::identifier) && 645 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 646 (NextTok->is(tok::semi) && 647 (!ExpectClassBody || LBraceStack.size() != 1)) || 648 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 649 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 650 // We can have an array subscript after a braced init 651 // list, but C++11 attributes are expected after blocks. 652 NextTok = Tokens->getNextToken(); 653 ++ReadTokens; 654 ProbablyBracedList = NextTok->isNot(tok::l_square); 655 } 656 } 657 if (ProbablyBracedList) { 658 Tok->setBlockKind(BK_BracedInit); 659 LBraceStack.back()->setBlockKind(BK_BracedInit); 660 } else { 661 Tok->setBlockKind(BK_Block); 662 LBraceStack.back()->setBlockKind(BK_Block); 663 } 664 } 665 LBraceStack.pop_back(); 666 break; 667 case tok::identifier: 668 if (!Tok->is(TT_StatementMacro)) 669 break; 670 LLVM_FALLTHROUGH; 671 case tok::at: 672 case tok::semi: 673 case tok::kw_if: 674 case tok::kw_while: 675 case tok::kw_for: 676 case tok::kw_switch: 677 case tok::kw_try: 678 case tok::kw___try: 679 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 680 LBraceStack.back()->setBlockKind(BK_Block); 681 break; 682 default: 683 break; 684 } 685 PrevTok = Tok; 686 Tok = NextTok; 687 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 688 689 // Assume other blocks for all unclosed opening braces. 690 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 691 if (LBraceStack[i]->is(BK_Unknown)) 692 LBraceStack[i]->setBlockKind(BK_Block); 693 } 694 695 FormatTok = Tokens->setPosition(StoredPosition); 696 } 697 698 template <class T> 699 static inline void hash_combine(std::size_t &seed, const T &v) { 700 std::hash<T> hasher; 701 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 702 } 703 704 size_t UnwrappedLineParser::computePPHash() const { 705 size_t h = 0; 706 for (const auto &i : PPStack) { 707 hash_combine(h, size_t(i.Kind)); 708 hash_combine(h, i.Line); 709 } 710 return h; 711 } 712 713 UnwrappedLineParser::IfStmtKind 714 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, 715 bool MunchSemi, 716 bool UnindentWhitesmithsBraces) { 717 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 718 "'{' or macro block token expected"); 719 FormatToken *Tok = FormatTok; 720 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 721 FormatTok->setBlockKind(BK_Block); 722 723 // For Whitesmiths mode, jump to the next level prior to skipping over the 724 // braces. 725 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 726 ++Line->Level; 727 728 size_t PPStartHash = computePPHash(); 729 730 unsigned InitialLevel = Line->Level; 731 nextToken(/*LevelDifference=*/AddLevels); 732 733 if (MacroBlock && FormatTok->is(tok::l_paren)) 734 parseParens(); 735 736 size_t NbPreprocessorDirectives = 737 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 738 addUnwrappedLine(); 739 size_t OpeningLineIndex = 740 CurrentLines->empty() 741 ? (UnwrappedLine::kInvalidIndex) 742 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 743 744 // Whitesmiths is weird here. The brace needs to be indented for the namespace 745 // block, but the block itself may not be indented depending on the style 746 // settings. This allows the format to back up one level in those cases. 747 if (UnindentWhitesmithsBraces) 748 --Line->Level; 749 750 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 751 MustBeDeclaration); 752 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 753 Line->Level += AddLevels; 754 755 IfStmtKind IfKind = IfStmtKind::NotIf; 756 const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind); 757 758 if (eof()) 759 return IfKind; 760 761 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 762 : !FormatTok->is(tok::r_brace)) { 763 Line->Level = InitialLevel; 764 FormatTok->setBlockKind(BK_Block); 765 return IfKind; 766 } 767 768 if (SimpleBlock && Tok->is(tok::l_brace)) { 769 assert(FormatTok->is(tok::r_brace)); 770 const FormatToken *Previous = Tokens->getPreviousToken(); 771 assert(Previous); 772 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 773 Tok->MatchingParen = FormatTok; 774 FormatTok->MatchingParen = Tok; 775 } 776 } 777 778 size_t PPEndHash = computePPHash(); 779 780 // Munch the closing brace. 781 nextToken(/*LevelDifference=*/-AddLevels); 782 783 if (MacroBlock && FormatTok->is(tok::l_paren)) 784 parseParens(); 785 786 if (FormatTok->is(tok::arrow)) { 787 // Following the } we can find a trailing return type arrow 788 // as part of an implicit conversion constraint. 789 nextToken(); 790 parseStructuralElement(); 791 } 792 793 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 794 nextToken(); 795 796 Line->Level = InitialLevel; 797 798 if (PPStartHash == PPEndHash) { 799 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 800 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 801 // Update the opening line to add the forward reference as well 802 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 803 CurrentLines->size() - 1; 804 } 805 } 806 807 return IfKind; 808 } 809 810 static bool isGoogScope(const UnwrappedLine &Line) { 811 // FIXME: Closure-library specific stuff should not be hard-coded but be 812 // configurable. 813 if (Line.Tokens.size() < 4) 814 return false; 815 auto I = Line.Tokens.begin(); 816 if (I->Tok->TokenText != "goog") 817 return false; 818 ++I; 819 if (I->Tok->isNot(tok::period)) 820 return false; 821 ++I; 822 if (I->Tok->TokenText != "scope") 823 return false; 824 ++I; 825 return I->Tok->is(tok::l_paren); 826 } 827 828 static bool isIIFE(const UnwrappedLine &Line, 829 const AdditionalKeywords &Keywords) { 830 // Look for the start of an immediately invoked anonymous function. 831 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 832 // This is commonly done in JavaScript to create a new, anonymous scope. 833 // Example: (function() { ... })() 834 if (Line.Tokens.size() < 3) 835 return false; 836 auto I = Line.Tokens.begin(); 837 if (I->Tok->isNot(tok::l_paren)) 838 return false; 839 ++I; 840 if (I->Tok->isNot(Keywords.kw_function)) 841 return false; 842 ++I; 843 return I->Tok->is(tok::l_paren); 844 } 845 846 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 847 const FormatToken &InitialToken) { 848 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 849 return Style.BraceWrapping.AfterNamespace; 850 if (InitialToken.is(tok::kw_class)) 851 return Style.BraceWrapping.AfterClass; 852 if (InitialToken.is(tok::kw_union)) 853 return Style.BraceWrapping.AfterUnion; 854 if (InitialToken.is(tok::kw_struct)) 855 return Style.BraceWrapping.AfterStruct; 856 if (InitialToken.is(tok::kw_enum)) 857 return Style.BraceWrapping.AfterEnum; 858 return false; 859 } 860 861 void UnwrappedLineParser::parseChildBlock() { 862 FormatTok->setBlockKind(BK_Block); 863 nextToken(); 864 { 865 bool SkipIndent = (Style.isJavaScript() && 866 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 867 ScopedLineState LineState(*this); 868 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 869 /*MustBeDeclaration=*/false); 870 Line->Level += SkipIndent ? 0 : 1; 871 parseLevel(/*HasOpeningBrace=*/true); 872 flushComments(isOnNewLine(*FormatTok)); 873 Line->Level -= SkipIndent ? 0 : 1; 874 } 875 nextToken(); 876 } 877 878 void UnwrappedLineParser::parsePPDirective() { 879 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 880 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 881 882 nextToken(); 883 884 if (!FormatTok->Tok.getIdentifierInfo()) { 885 parsePPUnknown(); 886 return; 887 } 888 889 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 890 case tok::pp_define: 891 parsePPDefine(); 892 return; 893 case tok::pp_if: 894 parsePPIf(/*IfDef=*/false); 895 break; 896 case tok::pp_ifdef: 897 case tok::pp_ifndef: 898 parsePPIf(/*IfDef=*/true); 899 break; 900 case tok::pp_else: 901 parsePPElse(); 902 break; 903 case tok::pp_elifdef: 904 case tok::pp_elifndef: 905 case tok::pp_elif: 906 parsePPElIf(); 907 break; 908 case tok::pp_endif: 909 parsePPEndIf(); 910 break; 911 default: 912 parsePPUnknown(); 913 break; 914 } 915 } 916 917 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 918 size_t Line = CurrentLines->size(); 919 if (CurrentLines == &PreprocessorDirectives) 920 Line += Lines.size(); 921 922 if (Unreachable || 923 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 924 PPStack.push_back({PP_Unreachable, Line}); 925 else 926 PPStack.push_back({PP_Conditional, Line}); 927 } 928 929 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 930 ++PPBranchLevel; 931 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 932 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 933 PPLevelBranchIndex.push_back(0); 934 PPLevelBranchCount.push_back(0); 935 } 936 PPChainBranchIndex.push(0); 937 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 938 conditionalCompilationCondition(Unreachable || Skip); 939 } 940 941 void UnwrappedLineParser::conditionalCompilationAlternative() { 942 if (!PPStack.empty()) 943 PPStack.pop_back(); 944 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 945 if (!PPChainBranchIndex.empty()) 946 ++PPChainBranchIndex.top(); 947 conditionalCompilationCondition( 948 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 949 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 950 } 951 952 void UnwrappedLineParser::conditionalCompilationEnd() { 953 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 954 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 955 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 956 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 957 } 958 } 959 // Guard against #endif's without #if. 960 if (PPBranchLevel > -1) 961 --PPBranchLevel; 962 if (!PPChainBranchIndex.empty()) 963 PPChainBranchIndex.pop(); 964 if (!PPStack.empty()) 965 PPStack.pop_back(); 966 } 967 968 void UnwrappedLineParser::parsePPIf(bool IfDef) { 969 bool IfNDef = FormatTok->is(tok::pp_ifndef); 970 nextToken(); 971 bool Unreachable = false; 972 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 973 Unreachable = true; 974 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 975 Unreachable = true; 976 conditionalCompilationStart(Unreachable); 977 FormatToken *IfCondition = FormatTok; 978 // If there's a #ifndef on the first line, and the only lines before it are 979 // comments, it could be an include guard. 980 bool MaybeIncludeGuard = IfNDef; 981 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 982 for (auto &Line : Lines) { 983 if (!Line.Tokens.front().Tok->is(tok::comment)) { 984 MaybeIncludeGuard = false; 985 IncludeGuard = IG_Rejected; 986 break; 987 } 988 } 989 --PPBranchLevel; 990 parsePPUnknown(); 991 ++PPBranchLevel; 992 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 993 IncludeGuard = IG_IfNdefed; 994 IncludeGuardToken = IfCondition; 995 } 996 } 997 998 void UnwrappedLineParser::parsePPElse() { 999 // If a potential include guard has an #else, it's not an include guard. 1000 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1001 IncludeGuard = IG_Rejected; 1002 conditionalCompilationAlternative(); 1003 if (PPBranchLevel > -1) 1004 --PPBranchLevel; 1005 parsePPUnknown(); 1006 ++PPBranchLevel; 1007 } 1008 1009 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1010 1011 void UnwrappedLineParser::parsePPEndIf() { 1012 conditionalCompilationEnd(); 1013 parsePPUnknown(); 1014 // If the #endif of a potential include guard is the last thing in the file, 1015 // then we found an include guard. 1016 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1017 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1018 IncludeGuard = IG_Found; 1019 } 1020 1021 void UnwrappedLineParser::parsePPDefine() { 1022 nextToken(); 1023 1024 if (!FormatTok->Tok.getIdentifierInfo()) { 1025 IncludeGuard = IG_Rejected; 1026 IncludeGuardToken = nullptr; 1027 parsePPUnknown(); 1028 return; 1029 } 1030 1031 if (IncludeGuard == IG_IfNdefed && 1032 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1033 IncludeGuard = IG_Defined; 1034 IncludeGuardToken = nullptr; 1035 for (auto &Line : Lines) { 1036 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1037 IncludeGuard = IG_Rejected; 1038 break; 1039 } 1040 } 1041 } 1042 1043 nextToken(); 1044 if (FormatTok->Tok.getKind() == tok::l_paren && 1045 !FormatTok->hasWhitespaceBefore()) { 1046 parseParens(); 1047 } 1048 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1049 Line->Level += PPBranchLevel + 1; 1050 addUnwrappedLine(); 1051 ++Line->Level; 1052 1053 // Errors during a preprocessor directive can only affect the layout of the 1054 // preprocessor directive, and thus we ignore them. An alternative approach 1055 // would be to use the same approach we use on the file level (no 1056 // re-indentation if there was a structural error) within the macro 1057 // definition. 1058 parseFile(); 1059 } 1060 1061 void UnwrappedLineParser::parsePPUnknown() { 1062 do { 1063 nextToken(); 1064 } while (!eof()); 1065 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1066 Line->Level += PPBranchLevel + 1; 1067 addUnwrappedLine(); 1068 } 1069 1070 // Here we exclude certain tokens that are not usually the first token in an 1071 // unwrapped line. This is used in attempt to distinguish macro calls without 1072 // trailing semicolons from other constructs split to several lines. 1073 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1074 // Semicolon can be a null-statement, l_square can be a start of a macro or 1075 // a C++11 attribute, but this doesn't seem to be common. 1076 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1077 Tok.isNot(TT_AttributeSquare) && 1078 // Tokens that can only be used as binary operators and a part of 1079 // overloaded operator names. 1080 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1081 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1082 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1083 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1084 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1085 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1086 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1087 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1088 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1089 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1090 Tok.isNot(tok::lesslessequal) && 1091 // Colon is used in labels, base class lists, initializer lists, 1092 // range-based for loops, ternary operator, but should never be the 1093 // first token in an unwrapped line. 1094 Tok.isNot(tok::colon) && 1095 // 'noexcept' is a trailing annotation. 1096 Tok.isNot(tok::kw_noexcept); 1097 } 1098 1099 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1100 const FormatToken *FormatTok) { 1101 // FIXME: This returns true for C/C++ keywords like 'struct'. 1102 return FormatTok->is(tok::identifier) && 1103 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1104 !FormatTok->isOneOf( 1105 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1106 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1107 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1108 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1109 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1110 Keywords.kw_instanceof, Keywords.kw_interface, 1111 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1112 } 1113 1114 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1115 const FormatToken *FormatTok) { 1116 return FormatTok->Tok.isLiteral() || 1117 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1118 mustBeJSIdent(Keywords, FormatTok); 1119 } 1120 1121 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1122 // when encountered after a value (see mustBeJSIdentOrValue). 1123 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1124 const FormatToken *FormatTok) { 1125 return FormatTok->isOneOf( 1126 tok::kw_return, Keywords.kw_yield, 1127 // conditionals 1128 tok::kw_if, tok::kw_else, 1129 // loops 1130 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1131 // switch/case 1132 tok::kw_switch, tok::kw_case, 1133 // exceptions 1134 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1135 // declaration 1136 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1137 Keywords.kw_async, Keywords.kw_function, 1138 // import/export 1139 Keywords.kw_import, tok::kw_export); 1140 } 1141 1142 // Checks whether a token is a type in K&R C (aka C78). 1143 static bool isC78Type(const FormatToken &Tok) { 1144 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1145 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1146 tok::identifier); 1147 } 1148 1149 // This function checks whether a token starts the first parameter declaration 1150 // in a K&R C (aka C78) function definition, e.g.: 1151 // int f(a, b) 1152 // short a, b; 1153 // { 1154 // return a + b; 1155 // } 1156 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1157 const FormatToken *FuncName) { 1158 assert(Tok); 1159 assert(Next); 1160 assert(FuncName); 1161 1162 if (FuncName->isNot(tok::identifier)) 1163 return false; 1164 1165 const FormatToken *Prev = FuncName->Previous; 1166 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1167 return false; 1168 1169 if (!isC78Type(*Tok) && 1170 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1171 return false; 1172 1173 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1174 return false; 1175 1176 Tok = Tok->Previous; 1177 if (!Tok || Tok->isNot(tok::r_paren)) 1178 return false; 1179 1180 Tok = Tok->Previous; 1181 if (!Tok || Tok->isNot(tok::identifier)) 1182 return false; 1183 1184 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1185 } 1186 1187 void UnwrappedLineParser::parseModuleImport() { 1188 nextToken(); 1189 while (!eof()) { 1190 if (FormatTok->is(tok::colon)) { 1191 FormatTok->setType(TT_ModulePartitionColon); 1192 } 1193 // Handle import <foo/bar.h> as we would an include statement. 1194 else if (FormatTok->is(tok::less)) { 1195 nextToken(); 1196 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1197 // Mark tokens up to the trailing line comments as implicit string 1198 // literals. 1199 if (FormatTok->isNot(tok::comment) && 1200 !FormatTok->TokenText.startswith("//")) 1201 FormatTok->setType(TT_ImplicitStringLiteral); 1202 nextToken(); 1203 } 1204 } 1205 if (FormatTok->is(tok::semi)) { 1206 nextToken(); 1207 break; 1208 } 1209 nextToken(); 1210 } 1211 1212 addUnwrappedLine(); 1213 } 1214 1215 // readTokenWithJavaScriptASI reads the next token and terminates the current 1216 // line if JavaScript Automatic Semicolon Insertion must 1217 // happen between the current token and the next token. 1218 // 1219 // This method is conservative - it cannot cover all edge cases of JavaScript, 1220 // but only aims to correctly handle certain well known cases. It *must not* 1221 // return true in speculative cases. 1222 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1223 FormatToken *Previous = FormatTok; 1224 readToken(); 1225 FormatToken *Next = FormatTok; 1226 1227 bool IsOnSameLine = 1228 CommentsBeforeNextToken.empty() 1229 ? Next->NewlinesBefore == 0 1230 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1231 if (IsOnSameLine) 1232 return; 1233 1234 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1235 bool PreviousStartsTemplateExpr = 1236 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1237 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1238 // If the line contains an '@' sign, the previous token might be an 1239 // annotation, which can precede another identifier/value. 1240 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1241 return LineNode.Tok->is(tok::at); 1242 }); 1243 if (HasAt) 1244 return; 1245 } 1246 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1247 return addUnwrappedLine(); 1248 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1249 bool NextEndsTemplateExpr = 1250 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1251 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1252 (PreviousMustBeValue || 1253 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1254 tok::minusminus))) 1255 return addUnwrappedLine(); 1256 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1257 isJSDeclOrStmt(Keywords, Next)) 1258 return addUnwrappedLine(); 1259 } 1260 1261 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1262 bool IsTopLevel) { 1263 if (Style.Language == FormatStyle::LK_TableGen && 1264 FormatTok->is(tok::pp_include)) { 1265 nextToken(); 1266 if (FormatTok->is(tok::string_literal)) 1267 nextToken(); 1268 addUnwrappedLine(); 1269 return; 1270 } 1271 switch (FormatTok->Tok.getKind()) { 1272 case tok::kw_asm: 1273 nextToken(); 1274 if (FormatTok->is(tok::l_brace)) { 1275 FormatTok->setType(TT_InlineASMBrace); 1276 nextToken(); 1277 while (FormatTok && FormatTok->isNot(tok::eof)) { 1278 if (FormatTok->is(tok::r_brace)) { 1279 FormatTok->setType(TT_InlineASMBrace); 1280 nextToken(); 1281 addUnwrappedLine(); 1282 break; 1283 } 1284 FormatTok->Finalized = true; 1285 nextToken(); 1286 } 1287 } 1288 break; 1289 case tok::kw_namespace: 1290 parseNamespace(); 1291 return; 1292 case tok::kw_public: 1293 case tok::kw_protected: 1294 case tok::kw_private: 1295 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1296 Style.isCSharp()) 1297 nextToken(); 1298 else 1299 parseAccessSpecifier(); 1300 return; 1301 case tok::kw_if: 1302 if (Style.isJavaScript() && Line->MustBeDeclaration) 1303 // field/method declaration. 1304 break; 1305 parseIfThenElse(IfKind); 1306 return; 1307 case tok::kw_for: 1308 case tok::kw_while: 1309 if (Style.isJavaScript() && Line->MustBeDeclaration) 1310 // field/method declaration. 1311 break; 1312 parseForOrWhileLoop(); 1313 return; 1314 case tok::kw_do: 1315 if (Style.isJavaScript() && Line->MustBeDeclaration) 1316 // field/method declaration. 1317 break; 1318 parseDoWhile(); 1319 return; 1320 case tok::kw_switch: 1321 if (Style.isJavaScript() && Line->MustBeDeclaration) 1322 // 'switch: string' field declaration. 1323 break; 1324 parseSwitch(); 1325 return; 1326 case tok::kw_default: 1327 if (Style.isJavaScript() && Line->MustBeDeclaration) 1328 // 'default: string' field declaration. 1329 break; 1330 nextToken(); 1331 if (FormatTok->is(tok::colon)) { 1332 parseLabel(); 1333 return; 1334 } 1335 // e.g. "default void f() {}" in a Java interface. 1336 break; 1337 case tok::kw_case: 1338 if (Style.isJavaScript() && Line->MustBeDeclaration) 1339 // 'case: string' field declaration. 1340 break; 1341 parseCaseLabel(); 1342 return; 1343 case tok::kw_try: 1344 case tok::kw___try: 1345 if (Style.isJavaScript() && Line->MustBeDeclaration) 1346 // field/method declaration. 1347 break; 1348 parseTryCatch(); 1349 return; 1350 case tok::kw_extern: 1351 nextToken(); 1352 if (FormatTok->Tok.is(tok::string_literal)) { 1353 nextToken(); 1354 if (FormatTok->Tok.is(tok::l_brace)) { 1355 if (Style.BraceWrapping.AfterExternBlock) 1356 addUnwrappedLine(); 1357 // Either we indent or for backwards compatibility we follow the 1358 // AfterExternBlock style. 1359 unsigned AddLevels = 1360 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1361 (Style.BraceWrapping.AfterExternBlock && 1362 Style.IndentExternBlock == 1363 FormatStyle::IEBS_AfterExternBlock) 1364 ? 1u 1365 : 0u; 1366 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1367 addUnwrappedLine(); 1368 return; 1369 } 1370 } 1371 break; 1372 case tok::kw_export: 1373 if (Style.isJavaScript()) { 1374 parseJavaScriptEs6ImportExport(); 1375 return; 1376 } 1377 if (!Style.isCpp()) 1378 break; 1379 // Handle C++ "(inline|export) namespace". 1380 LLVM_FALLTHROUGH; 1381 case tok::kw_inline: 1382 nextToken(); 1383 if (FormatTok->Tok.is(tok::kw_namespace)) { 1384 parseNamespace(); 1385 return; 1386 } 1387 break; 1388 case tok::identifier: 1389 if (FormatTok->is(TT_ForEachMacro)) { 1390 parseForOrWhileLoop(); 1391 return; 1392 } 1393 if (FormatTok->is(TT_MacroBlockBegin)) { 1394 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1395 /*MunchSemi=*/false); 1396 return; 1397 } 1398 if (FormatTok->is(Keywords.kw_import)) { 1399 if (Style.isJavaScript()) { 1400 parseJavaScriptEs6ImportExport(); 1401 return; 1402 } 1403 if (Style.Language == FormatStyle::LK_Proto) { 1404 nextToken(); 1405 if (FormatTok->is(tok::kw_public)) 1406 nextToken(); 1407 if (!FormatTok->is(tok::string_literal)) 1408 return; 1409 nextToken(); 1410 if (FormatTok->is(tok::semi)) 1411 nextToken(); 1412 addUnwrappedLine(); 1413 return; 1414 } 1415 if (Style.isCpp()) { 1416 parseModuleImport(); 1417 return; 1418 } 1419 } 1420 if (Style.isCpp() && 1421 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1422 Keywords.kw_slots, Keywords.kw_qslots)) { 1423 nextToken(); 1424 if (FormatTok->is(tok::colon)) { 1425 nextToken(); 1426 addUnwrappedLine(); 1427 return; 1428 } 1429 } 1430 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1431 parseStatementMacro(); 1432 return; 1433 } 1434 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1435 parseNamespace(); 1436 return; 1437 } 1438 // In all other cases, parse the declaration. 1439 break; 1440 default: 1441 break; 1442 } 1443 do { 1444 const FormatToken *Previous = FormatTok->Previous; 1445 switch (FormatTok->Tok.getKind()) { 1446 case tok::at: 1447 nextToken(); 1448 if (FormatTok->Tok.is(tok::l_brace)) { 1449 nextToken(); 1450 parseBracedList(); 1451 break; 1452 } else if (Style.Language == FormatStyle::LK_Java && 1453 FormatTok->is(Keywords.kw_interface)) { 1454 nextToken(); 1455 break; 1456 } 1457 switch (FormatTok->Tok.getObjCKeywordID()) { 1458 case tok::objc_public: 1459 case tok::objc_protected: 1460 case tok::objc_package: 1461 case tok::objc_private: 1462 return parseAccessSpecifier(); 1463 case tok::objc_interface: 1464 case tok::objc_implementation: 1465 return parseObjCInterfaceOrImplementation(); 1466 case tok::objc_protocol: 1467 if (parseObjCProtocol()) 1468 return; 1469 break; 1470 case tok::objc_end: 1471 return; // Handled by the caller. 1472 case tok::objc_optional: 1473 case tok::objc_required: 1474 nextToken(); 1475 addUnwrappedLine(); 1476 return; 1477 case tok::objc_autoreleasepool: 1478 nextToken(); 1479 if (FormatTok->Tok.is(tok::l_brace)) { 1480 if (Style.BraceWrapping.AfterControlStatement == 1481 FormatStyle::BWACS_Always) 1482 addUnwrappedLine(); 1483 parseBlock(); 1484 } 1485 addUnwrappedLine(); 1486 return; 1487 case tok::objc_synchronized: 1488 nextToken(); 1489 if (FormatTok->Tok.is(tok::l_paren)) 1490 // Skip synchronization object 1491 parseParens(); 1492 if (FormatTok->Tok.is(tok::l_brace)) { 1493 if (Style.BraceWrapping.AfterControlStatement == 1494 FormatStyle::BWACS_Always) 1495 addUnwrappedLine(); 1496 parseBlock(); 1497 } 1498 addUnwrappedLine(); 1499 return; 1500 case tok::objc_try: 1501 // This branch isn't strictly necessary (the kw_try case below would 1502 // do this too after the tok::at is parsed above). But be explicit. 1503 parseTryCatch(); 1504 return; 1505 default: 1506 break; 1507 } 1508 break; 1509 case tok::kw_concept: 1510 parseConcept(); 1511 return; 1512 case tok::kw_requires: 1513 parseRequires(); 1514 return; 1515 case tok::kw_enum: 1516 // Ignore if this is part of "template <enum ...". 1517 if (Previous && Previous->is(tok::less)) { 1518 nextToken(); 1519 break; 1520 } 1521 1522 // parseEnum falls through and does not yet add an unwrapped line as an 1523 // enum definition can start a structural element. 1524 if (!parseEnum()) 1525 break; 1526 // This only applies for C++. 1527 if (!Style.isCpp()) { 1528 addUnwrappedLine(); 1529 return; 1530 } 1531 break; 1532 case tok::kw_typedef: 1533 nextToken(); 1534 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1535 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1536 Keywords.kw_CF_CLOSED_ENUM, 1537 Keywords.kw_NS_CLOSED_ENUM)) 1538 parseEnum(); 1539 break; 1540 case tok::kw_struct: 1541 case tok::kw_union: 1542 case tok::kw_class: 1543 if (parseStructLike()) { 1544 return; 1545 } 1546 break; 1547 case tok::period: 1548 nextToken(); 1549 // In Java, classes have an implicit static member "class". 1550 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1551 FormatTok->is(tok::kw_class)) 1552 nextToken(); 1553 if (Style.isJavaScript() && FormatTok && 1554 FormatTok->Tok.getIdentifierInfo()) 1555 // JavaScript only has pseudo keywords, all keywords are allowed to 1556 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1557 nextToken(); 1558 break; 1559 case tok::semi: 1560 nextToken(); 1561 addUnwrappedLine(); 1562 return; 1563 case tok::r_brace: 1564 addUnwrappedLine(); 1565 return; 1566 case tok::l_paren: { 1567 parseParens(); 1568 // Break the unwrapped line if a K&R C function definition has a parameter 1569 // declaration. 1570 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1571 break; 1572 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1573 addUnwrappedLine(); 1574 return; 1575 } 1576 break; 1577 } 1578 case tok::kw_operator: 1579 nextToken(); 1580 if (FormatTok->isBinaryOperator()) 1581 nextToken(); 1582 break; 1583 case tok::caret: 1584 nextToken(); 1585 if (FormatTok->Tok.isAnyIdentifier() || 1586 FormatTok->isSimpleTypeSpecifier()) 1587 nextToken(); 1588 if (FormatTok->is(tok::l_paren)) 1589 parseParens(); 1590 if (FormatTok->is(tok::l_brace)) 1591 parseChildBlock(); 1592 break; 1593 case tok::l_brace: 1594 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1595 // A block outside of parentheses must be the last part of a 1596 // structural element. 1597 // FIXME: Figure out cases where this is not true, and add projections 1598 // for them (the one we know is missing are lambdas). 1599 if (Style.Language == FormatStyle::LK_Java && 1600 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1601 // If necessary, we could set the type to something different than 1602 // TT_FunctionLBrace. 1603 if (Style.BraceWrapping.AfterControlStatement == 1604 FormatStyle::BWACS_Always) 1605 addUnwrappedLine(); 1606 } else if (Style.BraceWrapping.AfterFunction) { 1607 addUnwrappedLine(); 1608 } 1609 FormatTok->setType(TT_FunctionLBrace); 1610 parseBlock(); 1611 addUnwrappedLine(); 1612 return; 1613 } 1614 // Otherwise this was a braced init list, and the structural 1615 // element continues. 1616 break; 1617 case tok::kw_try: 1618 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1619 // field/method declaration. 1620 nextToken(); 1621 break; 1622 } 1623 // We arrive here when parsing function-try blocks. 1624 if (Style.BraceWrapping.AfterFunction) 1625 addUnwrappedLine(); 1626 parseTryCatch(); 1627 return; 1628 case tok::identifier: { 1629 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1630 Line->MustBeDeclaration) { 1631 addUnwrappedLine(); 1632 parseCSharpGenericTypeConstraint(); 1633 break; 1634 } 1635 if (FormatTok->is(TT_MacroBlockEnd)) { 1636 addUnwrappedLine(); 1637 return; 1638 } 1639 1640 // Function declarations (as opposed to function expressions) are parsed 1641 // on their own unwrapped line by continuing this loop. Function 1642 // expressions (functions that are not on their own line) must not create 1643 // a new unwrapped line, so they are special cased below. 1644 size_t TokenCount = Line->Tokens.size(); 1645 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1646 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1647 Keywords.kw_async)))) { 1648 tryToParseJSFunction(); 1649 break; 1650 } 1651 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1652 FormatTok->is(Keywords.kw_interface)) { 1653 if (Style.isJavaScript()) { 1654 // In JavaScript/TypeScript, "interface" can be used as a standalone 1655 // identifier, e.g. in `var interface = 1;`. If "interface" is 1656 // followed by another identifier, it is very like to be an actual 1657 // interface declaration. 1658 unsigned StoredPosition = Tokens->getPosition(); 1659 FormatToken *Next = Tokens->getNextToken(); 1660 FormatTok = Tokens->setPosition(StoredPosition); 1661 if (!mustBeJSIdent(Keywords, Next)) { 1662 nextToken(); 1663 break; 1664 } 1665 } 1666 parseRecord(); 1667 addUnwrappedLine(); 1668 return; 1669 } 1670 1671 if (FormatTok->is(Keywords.kw_interface)) { 1672 if (parseStructLike()) { 1673 return; 1674 } 1675 break; 1676 } 1677 1678 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1679 parseStatementMacro(); 1680 return; 1681 } 1682 1683 // See if the following token should start a new unwrapped line. 1684 StringRef Text = FormatTok->TokenText; 1685 nextToken(); 1686 1687 // JS doesn't have macros, and within classes colons indicate fields, not 1688 // labels. 1689 if (Style.isJavaScript()) 1690 break; 1691 1692 TokenCount = Line->Tokens.size(); 1693 if (TokenCount == 1 || 1694 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1695 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1696 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1697 parseLabel(!Style.IndentGotoLabels); 1698 return; 1699 } 1700 // Recognize function-like macro usages without trailing semicolon as 1701 // well as free-standing macros like Q_OBJECT. 1702 bool FunctionLike = FormatTok->is(tok::l_paren); 1703 if (FunctionLike) 1704 parseParens(); 1705 1706 bool FollowedByNewline = 1707 CommentsBeforeNextToken.empty() 1708 ? FormatTok->NewlinesBefore > 0 1709 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1710 1711 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1712 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1713 addUnwrappedLine(); 1714 return; 1715 } 1716 } 1717 break; 1718 } 1719 case tok::equal: 1720 if ((Style.isJavaScript() || Style.isCSharp()) && 1721 FormatTok->is(TT_FatArrow)) { 1722 tryToParseChildBlock(); 1723 break; 1724 } 1725 1726 nextToken(); 1727 if (FormatTok->Tok.is(tok::l_brace)) { 1728 // Block kind should probably be set to BK_BracedInit for any language. 1729 // C# needs this change to ensure that array initialisers and object 1730 // initialisers are indented the same way. 1731 if (Style.isCSharp()) 1732 FormatTok->setBlockKind(BK_BracedInit); 1733 nextToken(); 1734 parseBracedList(); 1735 } else if (Style.Language == FormatStyle::LK_Proto && 1736 FormatTok->Tok.is(tok::less)) { 1737 nextToken(); 1738 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1739 /*ClosingBraceKind=*/tok::greater); 1740 } 1741 break; 1742 case tok::l_square: 1743 parseSquare(); 1744 break; 1745 case tok::kw_new: 1746 parseNew(); 1747 break; 1748 default: 1749 nextToken(); 1750 break; 1751 } 1752 } while (!eof()); 1753 } 1754 1755 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1756 assert(FormatTok->is(tok::l_brace)); 1757 if (!Style.isCSharp()) 1758 return false; 1759 // See if it's a property accessor. 1760 if (FormatTok->Previous->isNot(tok::identifier)) 1761 return false; 1762 1763 // See if we are inside a property accessor. 1764 // 1765 // Record the current tokenPosition so that we can advance and 1766 // reset the current token. `Next` is not set yet so we need 1767 // another way to advance along the token stream. 1768 unsigned int StoredPosition = Tokens->getPosition(); 1769 FormatToken *Tok = Tokens->getNextToken(); 1770 1771 // A trivial property accessor is of the form: 1772 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } 1773 // Track these as they do not require line breaks to be introduced. 1774 bool HasGetOrSet = false; 1775 bool IsTrivialPropertyAccessor = true; 1776 while (!eof()) { 1777 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1778 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1779 Keywords.kw_set)) { 1780 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) 1781 HasGetOrSet = true; 1782 Tok = Tokens->getNextToken(); 1783 continue; 1784 } 1785 if (Tok->isNot(tok::r_brace)) 1786 IsTrivialPropertyAccessor = false; 1787 break; 1788 } 1789 1790 if (!HasGetOrSet) { 1791 Tokens->setPosition(StoredPosition); 1792 return false; 1793 } 1794 1795 // Try to parse the property accessor: 1796 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1797 Tokens->setPosition(StoredPosition); 1798 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1799 addUnwrappedLine(); 1800 nextToken(); 1801 do { 1802 switch (FormatTok->Tok.getKind()) { 1803 case tok::r_brace: 1804 nextToken(); 1805 if (FormatTok->is(tok::equal)) { 1806 while (!eof() && FormatTok->isNot(tok::semi)) 1807 nextToken(); 1808 nextToken(); 1809 } 1810 addUnwrappedLine(); 1811 return true; 1812 case tok::l_brace: 1813 ++Line->Level; 1814 parseBlock(/*MustBeDeclaration=*/true); 1815 addUnwrappedLine(); 1816 --Line->Level; 1817 break; 1818 case tok::equal: 1819 if (FormatTok->is(TT_FatArrow)) { 1820 ++Line->Level; 1821 do { 1822 nextToken(); 1823 } while (!eof() && FormatTok->isNot(tok::semi)); 1824 nextToken(); 1825 addUnwrappedLine(); 1826 --Line->Level; 1827 break; 1828 } 1829 nextToken(); 1830 break; 1831 default: 1832 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && 1833 !IsTrivialPropertyAccessor) { 1834 // Non-trivial get/set needs to be on its own line. 1835 addUnwrappedLine(); 1836 } 1837 nextToken(); 1838 } 1839 } while (!eof()); 1840 1841 // Unreachable for well-formed code (paired '{' and '}'). 1842 return true; 1843 } 1844 1845 bool UnwrappedLineParser::tryToParseLambda() { 1846 if (!Style.isCpp()) { 1847 nextToken(); 1848 return false; 1849 } 1850 assert(FormatTok->is(tok::l_square)); 1851 FormatToken &LSquare = *FormatTok; 1852 if (!tryToParseLambdaIntroducer()) 1853 return false; 1854 1855 bool SeenArrow = false; 1856 1857 while (FormatTok->isNot(tok::l_brace)) { 1858 if (FormatTok->isSimpleTypeSpecifier()) { 1859 nextToken(); 1860 continue; 1861 } 1862 switch (FormatTok->Tok.getKind()) { 1863 case tok::l_brace: 1864 break; 1865 case tok::l_paren: 1866 parseParens(); 1867 break; 1868 case tok::l_square: 1869 parseSquare(); 1870 break; 1871 case tok::amp: 1872 case tok::star: 1873 case tok::kw_const: 1874 case tok::comma: 1875 case tok::less: 1876 case tok::greater: 1877 case tok::identifier: 1878 case tok::numeric_constant: 1879 case tok::coloncolon: 1880 case tok::kw_class: 1881 case tok::kw_mutable: 1882 case tok::kw_noexcept: 1883 case tok::kw_template: 1884 case tok::kw_typename: 1885 nextToken(); 1886 break; 1887 // Specialization of a template with an integer parameter can contain 1888 // arithmetic, logical, comparison and ternary operators. 1889 // 1890 // FIXME: This also accepts sequences of operators that are not in the scope 1891 // of a template argument list. 1892 // 1893 // In a C++ lambda a template type can only occur after an arrow. We use 1894 // this as an heuristic to distinguish between Objective-C expressions 1895 // followed by an `a->b` expression, such as: 1896 // ([obj func:arg] + a->b) 1897 // Otherwise the code below would parse as a lambda. 1898 // 1899 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1900 // explicit template lists: []<bool b = true && false>(U &&u){} 1901 case tok::plus: 1902 case tok::minus: 1903 case tok::exclaim: 1904 case tok::tilde: 1905 case tok::slash: 1906 case tok::percent: 1907 case tok::lessless: 1908 case tok::pipe: 1909 case tok::pipepipe: 1910 case tok::ampamp: 1911 case tok::caret: 1912 case tok::equalequal: 1913 case tok::exclaimequal: 1914 case tok::greaterequal: 1915 case tok::lessequal: 1916 case tok::question: 1917 case tok::colon: 1918 case tok::ellipsis: 1919 case tok::kw_true: 1920 case tok::kw_false: 1921 if (SeenArrow) { 1922 nextToken(); 1923 break; 1924 } 1925 return true; 1926 case tok::arrow: 1927 // This might or might not actually be a lambda arrow (this could be an 1928 // ObjC method invocation followed by a dereferencing arrow). We might 1929 // reset this back to TT_Unknown in TokenAnnotator. 1930 FormatTok->setType(TT_LambdaArrow); 1931 SeenArrow = true; 1932 nextToken(); 1933 break; 1934 default: 1935 return true; 1936 } 1937 } 1938 FormatTok->setType(TT_LambdaLBrace); 1939 LSquare.setType(TT_LambdaLSquare); 1940 parseChildBlock(); 1941 return true; 1942 } 1943 1944 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1945 const FormatToken *Previous = FormatTok->Previous; 1946 if (Previous && 1947 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1948 tok::kw_delete, tok::l_square) || 1949 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1950 Previous->isSimpleTypeSpecifier())) { 1951 nextToken(); 1952 return false; 1953 } 1954 nextToken(); 1955 if (FormatTok->is(tok::l_square)) { 1956 return false; 1957 } 1958 parseSquare(/*LambdaIntroducer=*/true); 1959 return true; 1960 } 1961 1962 void UnwrappedLineParser::tryToParseJSFunction() { 1963 assert(FormatTok->is(Keywords.kw_function) || 1964 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1965 if (FormatTok->is(Keywords.kw_async)) 1966 nextToken(); 1967 // Consume "function". 1968 nextToken(); 1969 1970 // Consume * (generator function). Treat it like C++'s overloaded operators. 1971 if (FormatTok->is(tok::star)) { 1972 FormatTok->setType(TT_OverloadedOperator); 1973 nextToken(); 1974 } 1975 1976 // Consume function name. 1977 if (FormatTok->is(tok::identifier)) 1978 nextToken(); 1979 1980 if (FormatTok->isNot(tok::l_paren)) 1981 return; 1982 1983 // Parse formal parameter list. 1984 parseParens(); 1985 1986 if (FormatTok->is(tok::colon)) { 1987 // Parse a type definition. 1988 nextToken(); 1989 1990 // Eat the type declaration. For braced inline object types, balance braces, 1991 // otherwise just parse until finding an l_brace for the function body. 1992 if (FormatTok->is(tok::l_brace)) 1993 tryToParseBracedList(); 1994 else 1995 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1996 nextToken(); 1997 } 1998 1999 if (FormatTok->is(tok::semi)) 2000 return; 2001 2002 parseChildBlock(); 2003 } 2004 2005 bool UnwrappedLineParser::tryToParseBracedList() { 2006 if (FormatTok->is(BK_Unknown)) 2007 calculateBraceTypes(); 2008 assert(FormatTok->isNot(BK_Unknown)); 2009 if (FormatTok->is(BK_Block)) 2010 return false; 2011 nextToken(); 2012 parseBracedList(); 2013 return true; 2014 } 2015 2016 bool UnwrappedLineParser::tryToParseChildBlock() { 2017 assert(Style.isJavaScript() || Style.isCSharp()); 2018 assert(FormatTok->is(TT_FatArrow)); 2019 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2020 // They always start an expression or a child block if followed by a curly 2021 // brace. 2022 nextToken(); 2023 if (FormatTok->isNot(tok::l_brace)) 2024 return false; 2025 parseChildBlock(); 2026 return true; 2027 } 2028 2029 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2030 bool IsEnum, 2031 tok::TokenKind ClosingBraceKind) { 2032 bool HasError = false; 2033 2034 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2035 // replace this by using parseAssignmentExpression() inside. 2036 do { 2037 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2038 tryToParseChildBlock()) 2039 continue; 2040 if (Style.isJavaScript()) { 2041 if (FormatTok->is(Keywords.kw_function) || 2042 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2043 tryToParseJSFunction(); 2044 continue; 2045 } 2046 if (FormatTok->is(tok::l_brace)) { 2047 // Could be a method inside of a braced list `{a() { return 1; }}`. 2048 if (tryToParseBracedList()) 2049 continue; 2050 parseChildBlock(); 2051 } 2052 } 2053 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2054 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2055 addUnwrappedLine(); 2056 nextToken(); 2057 return !HasError; 2058 } 2059 switch (FormatTok->Tok.getKind()) { 2060 case tok::l_square: 2061 if (Style.isCSharp()) 2062 parseSquare(); 2063 else 2064 tryToParseLambda(); 2065 break; 2066 case tok::l_paren: 2067 parseParens(); 2068 // JavaScript can just have free standing methods and getters/setters in 2069 // object literals. Detect them by a "{" following ")". 2070 if (Style.isJavaScript()) { 2071 if (FormatTok->is(tok::l_brace)) 2072 parseChildBlock(); 2073 break; 2074 } 2075 break; 2076 case tok::l_brace: 2077 // Assume there are no blocks inside a braced init list apart 2078 // from the ones we explicitly parse out (like lambdas). 2079 FormatTok->setBlockKind(BK_BracedInit); 2080 nextToken(); 2081 parseBracedList(); 2082 break; 2083 case tok::less: 2084 if (Style.Language == FormatStyle::LK_Proto) { 2085 nextToken(); 2086 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2087 /*ClosingBraceKind=*/tok::greater); 2088 } else { 2089 nextToken(); 2090 } 2091 break; 2092 case tok::semi: 2093 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2094 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2095 // used for error recovery if we have otherwise determined that this is 2096 // a braced list. 2097 if (Style.isJavaScript()) { 2098 nextToken(); 2099 break; 2100 } 2101 HasError = true; 2102 if (!ContinueOnSemicolons) 2103 return !HasError; 2104 nextToken(); 2105 break; 2106 case tok::comma: 2107 nextToken(); 2108 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2109 addUnwrappedLine(); 2110 break; 2111 default: 2112 nextToken(); 2113 break; 2114 } 2115 } while (!eof()); 2116 return false; 2117 } 2118 2119 void UnwrappedLineParser::parseParens() { 2120 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 2121 nextToken(); 2122 do { 2123 switch (FormatTok->Tok.getKind()) { 2124 case tok::l_paren: 2125 parseParens(); 2126 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2127 parseChildBlock(); 2128 break; 2129 case tok::r_paren: 2130 nextToken(); 2131 return; 2132 case tok::r_brace: 2133 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2134 return; 2135 case tok::l_square: 2136 tryToParseLambda(); 2137 break; 2138 case tok::l_brace: 2139 if (!tryToParseBracedList()) 2140 parseChildBlock(); 2141 break; 2142 case tok::at: 2143 nextToken(); 2144 if (FormatTok->Tok.is(tok::l_brace)) { 2145 nextToken(); 2146 parseBracedList(); 2147 } 2148 break; 2149 case tok::equal: 2150 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2151 tryToParseChildBlock(); 2152 else 2153 nextToken(); 2154 break; 2155 case tok::kw_class: 2156 if (Style.isJavaScript()) 2157 parseRecord(/*ParseAsExpr=*/true); 2158 else 2159 nextToken(); 2160 break; 2161 case tok::identifier: 2162 if (Style.isJavaScript() && 2163 (FormatTok->is(Keywords.kw_function) || 2164 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2165 tryToParseJSFunction(); 2166 else 2167 nextToken(); 2168 break; 2169 default: 2170 nextToken(); 2171 break; 2172 } 2173 } while (!eof()); 2174 } 2175 2176 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2177 if (!LambdaIntroducer) { 2178 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 2179 if (tryToParseLambda()) 2180 return; 2181 } 2182 do { 2183 switch (FormatTok->Tok.getKind()) { 2184 case tok::l_paren: 2185 parseParens(); 2186 break; 2187 case tok::r_square: 2188 nextToken(); 2189 return; 2190 case tok::r_brace: 2191 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2192 return; 2193 case tok::l_square: 2194 parseSquare(); 2195 break; 2196 case tok::l_brace: { 2197 if (!tryToParseBracedList()) 2198 parseChildBlock(); 2199 break; 2200 } 2201 case tok::at: 2202 nextToken(); 2203 if (FormatTok->Tok.is(tok::l_brace)) { 2204 nextToken(); 2205 parseBracedList(); 2206 } 2207 break; 2208 default: 2209 nextToken(); 2210 break; 2211 } 2212 } while (!eof()); 2213 } 2214 2215 void UnwrappedLineParser::keepAncestorBraces() { 2216 if (!Style.RemoveBracesLLVM) 2217 return; 2218 2219 const int MaxNestingLevels = 2; 2220 const int Size = NestedTooDeep.size(); 2221 if (Size >= MaxNestingLevels) 2222 NestedTooDeep[Size - MaxNestingLevels] = true; 2223 NestedTooDeep.push_back(false); 2224 } 2225 2226 static void markOptionalBraces(FormatToken *LeftBrace) { 2227 if (!LeftBrace) 2228 return; 2229 2230 assert(LeftBrace->is(tok::l_brace)); 2231 2232 FormatToken *RightBrace = LeftBrace->MatchingParen; 2233 if (!RightBrace) { 2234 assert(!LeftBrace->Optional); 2235 return; 2236 } 2237 2238 assert(RightBrace->is(tok::r_brace)); 2239 assert(RightBrace->MatchingParen == LeftBrace); 2240 assert(LeftBrace->Optional == RightBrace->Optional); 2241 2242 LeftBrace->Optional = true; 2243 RightBrace->Optional = true; 2244 } 2245 2246 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2247 bool KeepBraces) { 2248 auto HandleAttributes = [this]() { 2249 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2250 if (FormatTok->is(TT_AttributeMacro)) 2251 nextToken(); 2252 // Handle [[likely]] / [[unlikely]] attributes. 2253 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) 2254 parseSquare(); 2255 }; 2256 2257 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 2258 nextToken(); 2259 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 2260 nextToken(); 2261 if (FormatTok->Tok.is(tok::l_paren)) 2262 parseParens(); 2263 HandleAttributes(); 2264 2265 bool NeedsUnwrappedLine = false; 2266 keepAncestorBraces(); 2267 2268 FormatToken *IfLeftBrace = nullptr; 2269 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2270 2271 if (FormatTok->Tok.is(tok::l_brace)) { 2272 IfLeftBrace = FormatTok; 2273 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2274 IfBlockKind = parseBlock(); 2275 if (Style.BraceWrapping.BeforeElse) 2276 addUnwrappedLine(); 2277 else 2278 NeedsUnwrappedLine = true; 2279 } else { 2280 addUnwrappedLine(); 2281 ++Line->Level; 2282 parseStructuralElement(); 2283 --Line->Level; 2284 } 2285 2286 bool KeepIfBraces = false; 2287 if (Style.RemoveBracesLLVM) { 2288 assert(!NestedTooDeep.empty()); 2289 KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2290 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2291 IfBlockKind == IfStmtKind::IfElseIf; 2292 } 2293 2294 FormatToken *ElseLeftBrace = nullptr; 2295 IfStmtKind Kind = IfStmtKind::IfOnly; 2296 2297 if (FormatTok->Tok.is(tok::kw_else)) { 2298 if (Style.RemoveBracesLLVM) { 2299 NestedTooDeep.back() = false; 2300 Kind = IfStmtKind::IfElse; 2301 } 2302 nextToken(); 2303 HandleAttributes(); 2304 if (FormatTok->Tok.is(tok::l_brace)) { 2305 ElseLeftBrace = FormatTok; 2306 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2307 if (parseBlock() == IfStmtKind::IfOnly) 2308 Kind = IfStmtKind::IfElseIf; 2309 addUnwrappedLine(); 2310 } else if (FormatTok->Tok.is(tok::kw_if)) { 2311 FormatToken *Previous = Tokens->getPreviousToken(); 2312 const bool IsPrecededByComment = Previous && Previous->is(tok::comment); 2313 if (IsPrecededByComment) { 2314 addUnwrappedLine(); 2315 ++Line->Level; 2316 } 2317 bool TooDeep = true; 2318 if (Style.RemoveBracesLLVM) { 2319 Kind = IfStmtKind::IfElseIf; 2320 TooDeep = NestedTooDeep.pop_back_val(); 2321 } 2322 ElseLeftBrace = 2323 parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces); 2324 if (Style.RemoveBracesLLVM) 2325 NestedTooDeep.push_back(TooDeep); 2326 if (IsPrecededByComment) 2327 --Line->Level; 2328 } else { 2329 addUnwrappedLine(); 2330 ++Line->Level; 2331 parseStructuralElement(); 2332 if (FormatTok->is(tok::eof)) 2333 addUnwrappedLine(); 2334 --Line->Level; 2335 } 2336 } else { 2337 if (Style.RemoveBracesLLVM) 2338 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2339 if (NeedsUnwrappedLine) 2340 addUnwrappedLine(); 2341 } 2342 2343 if (!Style.RemoveBracesLLVM) 2344 return nullptr; 2345 2346 assert(!NestedTooDeep.empty()); 2347 const bool KeepElseBraces = 2348 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back(); 2349 2350 NestedTooDeep.pop_back(); 2351 2352 if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) { 2353 markOptionalBraces(IfLeftBrace); 2354 markOptionalBraces(ElseLeftBrace); 2355 } else if (IfLeftBrace) { 2356 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2357 if (IfRightBrace) { 2358 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2359 assert(!IfLeftBrace->Optional); 2360 assert(!IfRightBrace->Optional); 2361 IfLeftBrace->MatchingParen = nullptr; 2362 IfRightBrace->MatchingParen = nullptr; 2363 } 2364 } 2365 2366 if (IfKind) 2367 *IfKind = Kind; 2368 2369 return IfLeftBrace; 2370 } 2371 2372 void UnwrappedLineParser::parseTryCatch() { 2373 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2374 nextToken(); 2375 bool NeedsUnwrappedLine = false; 2376 if (FormatTok->is(tok::colon)) { 2377 // We are in a function try block, what comes is an initializer list. 2378 nextToken(); 2379 2380 // In case identifiers were removed by clang-tidy, what might follow is 2381 // multiple commas in sequence - before the first identifier. 2382 while (FormatTok->is(tok::comma)) 2383 nextToken(); 2384 2385 while (FormatTok->is(tok::identifier)) { 2386 nextToken(); 2387 if (FormatTok->is(tok::l_paren)) 2388 parseParens(); 2389 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2390 FormatTok->is(tok::l_brace)) { 2391 do { 2392 nextToken(); 2393 } while (!FormatTok->is(tok::r_brace)); 2394 nextToken(); 2395 } 2396 2397 // In case identifiers were removed by clang-tidy, what might follow is 2398 // multiple commas in sequence - after the first identifier. 2399 while (FormatTok->is(tok::comma)) 2400 nextToken(); 2401 } 2402 } 2403 // Parse try with resource. 2404 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 2405 parseParens(); 2406 } 2407 2408 keepAncestorBraces(); 2409 2410 if (FormatTok->is(tok::l_brace)) { 2411 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2412 parseBlock(); 2413 if (Style.BraceWrapping.BeforeCatch) { 2414 addUnwrappedLine(); 2415 } else { 2416 NeedsUnwrappedLine = true; 2417 } 2418 } else if (!FormatTok->is(tok::kw_catch)) { 2419 // The C++ standard requires a compound-statement after a try. 2420 // If there's none, we try to assume there's a structuralElement 2421 // and try to continue. 2422 addUnwrappedLine(); 2423 ++Line->Level; 2424 parseStructuralElement(); 2425 --Line->Level; 2426 } 2427 while (true) { 2428 if (FormatTok->is(tok::at)) 2429 nextToken(); 2430 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2431 tok::kw___finally) || 2432 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2433 FormatTok->is(Keywords.kw_finally)) || 2434 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 2435 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 2436 break; 2437 nextToken(); 2438 while (FormatTok->isNot(tok::l_brace)) { 2439 if (FormatTok->is(tok::l_paren)) { 2440 parseParens(); 2441 continue; 2442 } 2443 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2444 if (Style.RemoveBracesLLVM) 2445 NestedTooDeep.pop_back(); 2446 return; 2447 } 2448 nextToken(); 2449 } 2450 NeedsUnwrappedLine = false; 2451 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2452 parseBlock(); 2453 if (Style.BraceWrapping.BeforeCatch) 2454 addUnwrappedLine(); 2455 else 2456 NeedsUnwrappedLine = true; 2457 } 2458 2459 if (Style.RemoveBracesLLVM) 2460 NestedTooDeep.pop_back(); 2461 2462 if (NeedsUnwrappedLine) 2463 addUnwrappedLine(); 2464 } 2465 2466 void UnwrappedLineParser::parseNamespace() { 2467 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2468 "'namespace' expected"); 2469 2470 const FormatToken &InitialToken = *FormatTok; 2471 nextToken(); 2472 if (InitialToken.is(TT_NamespaceMacro)) { 2473 parseParens(); 2474 } else { 2475 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2476 tok::l_square, tok::period)) { 2477 if (FormatTok->is(tok::l_square)) 2478 parseSquare(); 2479 else 2480 nextToken(); 2481 } 2482 } 2483 if (FormatTok->Tok.is(tok::l_brace)) { 2484 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2485 addUnwrappedLine(); 2486 2487 unsigned AddLevels = 2488 Style.NamespaceIndentation == FormatStyle::NI_All || 2489 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2490 DeclarationScopeStack.size() > 1) 2491 ? 1u 2492 : 0u; 2493 bool ManageWhitesmithsBraces = 2494 AddLevels == 0u && 2495 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2496 2497 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2498 // the whole block. 2499 if (ManageWhitesmithsBraces) 2500 ++Line->Level; 2501 2502 parseBlock(/*MustBeDeclaration=*/true, AddLevels, 2503 /*MunchSemi=*/true, 2504 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); 2505 2506 // Munch the semicolon after a namespace. This is more common than one would 2507 // think. Putting the semicolon into its own line is very ugly. 2508 if (FormatTok->Tok.is(tok::semi)) 2509 nextToken(); 2510 2511 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2512 2513 if (ManageWhitesmithsBraces) 2514 --Line->Level; 2515 } 2516 // FIXME: Add error handling. 2517 } 2518 2519 void UnwrappedLineParser::parseNew() { 2520 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2521 nextToken(); 2522 2523 if (Style.isCSharp()) { 2524 do { 2525 if (FormatTok->is(tok::l_brace)) 2526 parseBracedList(); 2527 2528 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2529 return; 2530 2531 nextToken(); 2532 } while (!eof()); 2533 } 2534 2535 if (Style.Language != FormatStyle::LK_Java) 2536 return; 2537 2538 // In Java, we can parse everything up to the parens, which aren't optional. 2539 do { 2540 // There should not be a ;, { or } before the new's open paren. 2541 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2542 return; 2543 2544 // Consume the parens. 2545 if (FormatTok->is(tok::l_paren)) { 2546 parseParens(); 2547 2548 // If there is a class body of an anonymous class, consume that as child. 2549 if (FormatTok->is(tok::l_brace)) 2550 parseChildBlock(); 2551 return; 2552 } 2553 nextToken(); 2554 } while (!eof()); 2555 } 2556 2557 void UnwrappedLineParser::parseForOrWhileLoop() { 2558 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2559 "'for', 'while' or foreach macro expected"); 2560 nextToken(); 2561 // JS' for await ( ... 2562 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2563 nextToken(); 2564 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2565 nextToken(); 2566 if (FormatTok->Tok.is(tok::l_paren)) 2567 parseParens(); 2568 2569 keepAncestorBraces(); 2570 2571 if (FormatTok->Tok.is(tok::l_brace)) { 2572 FormatToken *LeftBrace = FormatTok; 2573 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2574 parseBlock(); 2575 if (Style.RemoveBracesLLVM) { 2576 assert(!NestedTooDeep.empty()); 2577 if (!NestedTooDeep.back()) 2578 markOptionalBraces(LeftBrace); 2579 } 2580 addUnwrappedLine(); 2581 } else { 2582 addUnwrappedLine(); 2583 ++Line->Level; 2584 parseStructuralElement(); 2585 --Line->Level; 2586 } 2587 2588 if (Style.RemoveBracesLLVM) 2589 NestedTooDeep.pop_back(); 2590 } 2591 2592 void UnwrappedLineParser::parseDoWhile() { 2593 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 2594 nextToken(); 2595 2596 keepAncestorBraces(); 2597 2598 if (FormatTok->Tok.is(tok::l_brace)) { 2599 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2600 parseBlock(); 2601 if (Style.BraceWrapping.BeforeWhile) 2602 addUnwrappedLine(); 2603 } else { 2604 addUnwrappedLine(); 2605 ++Line->Level; 2606 parseStructuralElement(); 2607 --Line->Level; 2608 } 2609 2610 if (Style.RemoveBracesLLVM) 2611 NestedTooDeep.pop_back(); 2612 2613 // FIXME: Add error handling. 2614 if (!FormatTok->Tok.is(tok::kw_while)) { 2615 addUnwrappedLine(); 2616 return; 2617 } 2618 2619 // If in Whitesmiths mode, the line with the while() needs to be indented 2620 // to the same level as the block. 2621 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2622 ++Line->Level; 2623 2624 nextToken(); 2625 parseStructuralElement(); 2626 } 2627 2628 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2629 nextToken(); 2630 unsigned OldLineLevel = Line->Level; 2631 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2632 --Line->Level; 2633 if (LeftAlignLabel) 2634 Line->Level = 0; 2635 2636 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2637 FormatTok->Tok.is(tok::l_brace)) { 2638 2639 CompoundStatementIndenter Indenter(this, Line->Level, 2640 Style.BraceWrapping.AfterCaseLabel, 2641 Style.BraceWrapping.IndentBraces); 2642 parseBlock(); 2643 if (FormatTok->Tok.is(tok::kw_break)) { 2644 if (Style.BraceWrapping.AfterControlStatement == 2645 FormatStyle::BWACS_Always) { 2646 addUnwrappedLine(); 2647 if (!Style.IndentCaseBlocks && 2648 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2649 ++Line->Level; 2650 } 2651 } 2652 parseStructuralElement(); 2653 } 2654 addUnwrappedLine(); 2655 } else { 2656 if (FormatTok->is(tok::semi)) 2657 nextToken(); 2658 addUnwrappedLine(); 2659 } 2660 Line->Level = OldLineLevel; 2661 if (FormatTok->isNot(tok::l_brace)) { 2662 parseStructuralElement(); 2663 addUnwrappedLine(); 2664 } 2665 } 2666 2667 void UnwrappedLineParser::parseCaseLabel() { 2668 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2669 2670 // FIXME: fix handling of complex expressions here. 2671 do { 2672 nextToken(); 2673 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2674 parseLabel(); 2675 } 2676 2677 void UnwrappedLineParser::parseSwitch() { 2678 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2679 nextToken(); 2680 if (FormatTok->Tok.is(tok::l_paren)) 2681 parseParens(); 2682 2683 keepAncestorBraces(); 2684 2685 if (FormatTok->Tok.is(tok::l_brace)) { 2686 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2687 parseBlock(); 2688 addUnwrappedLine(); 2689 } else { 2690 addUnwrappedLine(); 2691 ++Line->Level; 2692 parseStructuralElement(); 2693 --Line->Level; 2694 } 2695 2696 if (Style.RemoveBracesLLVM) 2697 NestedTooDeep.pop_back(); 2698 } 2699 2700 void UnwrappedLineParser::parseAccessSpecifier() { 2701 nextToken(); 2702 // Understand Qt's slots. 2703 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2704 nextToken(); 2705 // Otherwise, we don't know what it is, and we'd better keep the next token. 2706 if (FormatTok->Tok.is(tok::colon)) 2707 nextToken(); 2708 addUnwrappedLine(); 2709 } 2710 2711 void UnwrappedLineParser::parseConcept() { 2712 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); 2713 nextToken(); 2714 if (!FormatTok->Tok.is(tok::identifier)) 2715 return; 2716 nextToken(); 2717 if (!FormatTok->Tok.is(tok::equal)) 2718 return; 2719 nextToken(); 2720 if (FormatTok->Tok.is(tok::kw_requires)) { 2721 nextToken(); 2722 parseRequiresExpression(Line->Level); 2723 } else { 2724 parseConstraintExpression(Line->Level); 2725 } 2726 } 2727 2728 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) { 2729 // requires (R range) 2730 if (FormatTok->Tok.is(tok::l_paren)) { 2731 parseParens(); 2732 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2733 addUnwrappedLine(); 2734 --Line->Level; 2735 } 2736 } 2737 2738 if (FormatTok->Tok.is(tok::l_brace)) { 2739 if (Style.BraceWrapping.AfterFunction) 2740 addUnwrappedLine(); 2741 FormatTok->setType(TT_FunctionLBrace); 2742 parseBlock(); 2743 addUnwrappedLine(); 2744 } else { 2745 parseConstraintExpression(OriginalLevel); 2746 } 2747 } 2748 2749 void UnwrappedLineParser::parseConstraintExpression( 2750 unsigned int OriginalLevel) { 2751 // requires Id<T> && Id<T> || Id<T> 2752 while ( 2753 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) { 2754 nextToken(); 2755 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less, 2756 tok::greater, tok::comma, tok::ellipsis)) { 2757 if (FormatTok->Tok.is(tok::less)) { 2758 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2759 /*ClosingBraceKind=*/tok::greater); 2760 continue; 2761 } 2762 nextToken(); 2763 } 2764 if (FormatTok->Tok.is(tok::kw_requires)) { 2765 parseRequiresExpression(OriginalLevel); 2766 } 2767 if (FormatTok->Tok.is(tok::less)) { 2768 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2769 /*ClosingBraceKind=*/tok::greater); 2770 } 2771 2772 if (FormatTok->Tok.is(tok::l_paren)) { 2773 parseParens(); 2774 } 2775 if (FormatTok->Tok.is(tok::l_brace)) { 2776 if (Style.BraceWrapping.AfterFunction) 2777 addUnwrappedLine(); 2778 FormatTok->setType(TT_FunctionLBrace); 2779 parseBlock(); 2780 } 2781 if (FormatTok->Tok.is(tok::semi)) { 2782 // Eat any trailing semi. 2783 nextToken(); 2784 addUnwrappedLine(); 2785 } 2786 if (FormatTok->Tok.is(tok::colon)) { 2787 return; 2788 } 2789 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { 2790 if (FormatTok->Previous && 2791 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires, 2792 tok::coloncolon)) { 2793 addUnwrappedLine(); 2794 } 2795 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2796 --Line->Level; 2797 } 2798 break; 2799 } else { 2800 FormatTok->setType(TT_ConstraintJunctions); 2801 } 2802 2803 nextToken(); 2804 } 2805 } 2806 2807 void UnwrappedLineParser::parseRequires() { 2808 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); 2809 2810 unsigned OriginalLevel = Line->Level; 2811 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { 2812 addUnwrappedLine(); 2813 if (Style.IndentRequires) { 2814 ++Line->Level; 2815 } 2816 } 2817 nextToken(); 2818 2819 parseRequiresExpression(OriginalLevel); 2820 } 2821 2822 bool UnwrappedLineParser::parseEnum() { 2823 const FormatToken &InitialToken = *FormatTok; 2824 2825 // Won't be 'enum' for NS_ENUMs. 2826 if (FormatTok->Tok.is(tok::kw_enum)) 2827 nextToken(); 2828 2829 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2830 // declarations. An "enum" keyword followed by a colon would be a syntax 2831 // error and thus assume it is just an identifier. 2832 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 2833 return false; 2834 2835 // In protobuf, "enum" can be used as a field name. 2836 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2837 return false; 2838 2839 // Eat up enum class ... 2840 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2841 nextToken(); 2842 2843 while (FormatTok->Tok.getIdentifierInfo() || 2844 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2845 tok::greater, tok::comma, tok::question)) { 2846 nextToken(); 2847 // We can have macros or attributes in between 'enum' and the enum name. 2848 if (FormatTok->is(tok::l_paren)) 2849 parseParens(); 2850 if (FormatTok->is(tok::identifier)) { 2851 nextToken(); 2852 // If there are two identifiers in a row, this is likely an elaborate 2853 // return type. In Java, this can be "implements", etc. 2854 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2855 return false; 2856 } 2857 } 2858 2859 // Just a declaration or something is wrong. 2860 if (FormatTok->isNot(tok::l_brace)) 2861 return true; 2862 FormatTok->setBlockKind(BK_Block); 2863 2864 if (Style.Language == FormatStyle::LK_Java) { 2865 // Java enums are different. 2866 parseJavaEnumBody(); 2867 return true; 2868 } 2869 if (Style.Language == FormatStyle::LK_Proto) { 2870 parseBlock(/*MustBeDeclaration=*/true); 2871 return true; 2872 } 2873 2874 if (!Style.AllowShortEnumsOnASingleLine && 2875 ShouldBreakBeforeBrace(Style, InitialToken)) 2876 addUnwrappedLine(); 2877 // Parse enum body. 2878 nextToken(); 2879 if (!Style.AllowShortEnumsOnASingleLine) { 2880 addUnwrappedLine(); 2881 Line->Level += 1; 2882 } 2883 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 2884 /*IsEnum=*/true); 2885 if (!Style.AllowShortEnumsOnASingleLine) 2886 Line->Level -= 1; 2887 if (HasError) { 2888 if (FormatTok->is(tok::semi)) 2889 nextToken(); 2890 addUnwrappedLine(); 2891 } 2892 return true; 2893 2894 // There is no addUnwrappedLine() here so that we fall through to parsing a 2895 // structural element afterwards. Thus, in "enum A {} n, m;", 2896 // "} n, m;" will end up in one unwrapped line. 2897 } 2898 2899 bool UnwrappedLineParser::parseStructLike() { 2900 // parseRecord falls through and does not yet add an unwrapped line as a 2901 // record declaration or definition can start a structural element. 2902 parseRecord(); 2903 // This does not apply to Java, JavaScript and C#. 2904 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 2905 Style.isCSharp()) { 2906 if (FormatTok->is(tok::semi)) 2907 nextToken(); 2908 addUnwrappedLine(); 2909 return true; 2910 } 2911 return false; 2912 } 2913 2914 namespace { 2915 // A class used to set and restore the Token position when peeking 2916 // ahead in the token source. 2917 class ScopedTokenPosition { 2918 unsigned StoredPosition; 2919 FormatTokenSource *Tokens; 2920 2921 public: 2922 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 2923 assert(Tokens && "Tokens expected to not be null"); 2924 StoredPosition = Tokens->getPosition(); 2925 } 2926 2927 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 2928 }; 2929 } // namespace 2930 2931 // Look to see if we have [[ by looking ahead, if 2932 // its not then rewind to the original position. 2933 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 2934 ScopedTokenPosition AutoPosition(Tokens); 2935 FormatToken *Tok = Tokens->getNextToken(); 2936 // We already read the first [ check for the second. 2937 if (!Tok->is(tok::l_square)) { 2938 return false; 2939 } 2940 // Double check that the attribute is just something 2941 // fairly simple. 2942 while (Tok->isNot(tok::eof)) { 2943 if (Tok->is(tok::r_square)) { 2944 break; 2945 } 2946 Tok = Tokens->getNextToken(); 2947 } 2948 if (Tok->is(tok::eof)) 2949 return false; 2950 Tok = Tokens->getNextToken(); 2951 if (!Tok->is(tok::r_square)) { 2952 return false; 2953 } 2954 Tok = Tokens->getNextToken(); 2955 if (Tok->is(tok::semi)) { 2956 return false; 2957 } 2958 return true; 2959 } 2960 2961 void UnwrappedLineParser::parseJavaEnumBody() { 2962 // Determine whether the enum is simple, i.e. does not have a semicolon or 2963 // constants with class bodies. Simple enums can be formatted like braced 2964 // lists, contracted to a single line, etc. 2965 unsigned StoredPosition = Tokens->getPosition(); 2966 bool IsSimple = true; 2967 FormatToken *Tok = Tokens->getNextToken(); 2968 while (!Tok->is(tok::eof)) { 2969 if (Tok->is(tok::r_brace)) 2970 break; 2971 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2972 IsSimple = false; 2973 break; 2974 } 2975 // FIXME: This will also mark enums with braces in the arguments to enum 2976 // constants as "not simple". This is probably fine in practice, though. 2977 Tok = Tokens->getNextToken(); 2978 } 2979 FormatTok = Tokens->setPosition(StoredPosition); 2980 2981 if (IsSimple) { 2982 nextToken(); 2983 parseBracedList(); 2984 addUnwrappedLine(); 2985 return; 2986 } 2987 2988 // Parse the body of a more complex enum. 2989 // First add a line for everything up to the "{". 2990 nextToken(); 2991 addUnwrappedLine(); 2992 ++Line->Level; 2993 2994 // Parse the enum constants. 2995 while (FormatTok) { 2996 if (FormatTok->is(tok::l_brace)) { 2997 // Parse the constant's class body. 2998 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 2999 /*MunchSemi=*/false); 3000 } else if (FormatTok->is(tok::l_paren)) { 3001 parseParens(); 3002 } else if (FormatTok->is(tok::comma)) { 3003 nextToken(); 3004 addUnwrappedLine(); 3005 } else if (FormatTok->is(tok::semi)) { 3006 nextToken(); 3007 addUnwrappedLine(); 3008 break; 3009 } else if (FormatTok->is(tok::r_brace)) { 3010 addUnwrappedLine(); 3011 break; 3012 } else { 3013 nextToken(); 3014 } 3015 } 3016 3017 // Parse the class body after the enum's ";" if any. 3018 parseLevel(/*HasOpeningBrace=*/true); 3019 nextToken(); 3020 --Line->Level; 3021 addUnwrappedLine(); 3022 } 3023 3024 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3025 const FormatToken &InitialToken = *FormatTok; 3026 nextToken(); 3027 3028 // The actual identifier can be a nested name specifier, and in macros 3029 // it is often token-pasted. 3030 // An [[attribute]] can be before the identifier. 3031 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3032 tok::kw___attribute, tok::kw___declspec, 3033 tok::kw_alignas, tok::l_square, tok::r_square) || 3034 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3035 FormatTok->isOneOf(tok::period, tok::comma))) { 3036 if (Style.isJavaScript() && 3037 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3038 // JavaScript/TypeScript supports inline object types in 3039 // extends/implements positions: 3040 // class Foo implements {bar: number} { } 3041 nextToken(); 3042 if (FormatTok->is(tok::l_brace)) { 3043 tryToParseBracedList(); 3044 continue; 3045 } 3046 } 3047 bool IsNonMacroIdentifier = 3048 FormatTok->is(tok::identifier) && 3049 FormatTok->TokenText != FormatTok->TokenText.upper(); 3050 nextToken(); 3051 // We can have macros or attributes in between 'class' and the class name. 3052 if (!IsNonMacroIdentifier) { 3053 if (FormatTok->Tok.is(tok::l_paren)) { 3054 parseParens(); 3055 } else if (FormatTok->is(TT_AttributeSquare)) { 3056 parseSquare(); 3057 // Consume the closing TT_AttributeSquare. 3058 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3059 nextToken(); 3060 } 3061 } 3062 } 3063 3064 // Note that parsing away template declarations here leads to incorrectly 3065 // accepting function declarations as record declarations. 3066 // In general, we cannot solve this problem. Consider: 3067 // class A<int> B() {} 3068 // which can be a function definition or a class definition when B() is a 3069 // macro. If we find enough real-world cases where this is a problem, we 3070 // can parse for the 'template' keyword in the beginning of the statement, 3071 // and thus rule out the record production in case there is no template 3072 // (this would still leave us with an ambiguity between template function 3073 // and class declarations). 3074 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3075 while (!eof()) { 3076 if (FormatTok->is(tok::l_brace)) { 3077 calculateBraceTypes(/*ExpectClassBody=*/true); 3078 if (!tryToParseBracedList()) 3079 break; 3080 } 3081 if (FormatTok->is(tok::l_square) && !tryToParseLambda()) 3082 break; 3083 if (FormatTok->Tok.is(tok::semi)) 3084 return; 3085 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3086 addUnwrappedLine(); 3087 nextToken(); 3088 parseCSharpGenericTypeConstraint(); 3089 break; 3090 } 3091 nextToken(); 3092 } 3093 } 3094 if (FormatTok->Tok.is(tok::l_brace)) { 3095 if (ParseAsExpr) { 3096 parseChildBlock(); 3097 } else { 3098 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3099 addUnwrappedLine(); 3100 3101 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3102 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3103 } 3104 } 3105 // There is no addUnwrappedLine() here so that we fall through to parsing a 3106 // structural element afterwards. Thus, in "class A {} n, m;", 3107 // "} n, m;" will end up in one unwrapped line. 3108 } 3109 3110 void UnwrappedLineParser::parseObjCMethod() { 3111 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 3112 "'(' or identifier expected."); 3113 do { 3114 if (FormatTok->Tok.is(tok::semi)) { 3115 nextToken(); 3116 addUnwrappedLine(); 3117 return; 3118 } else if (FormatTok->Tok.is(tok::l_brace)) { 3119 if (Style.BraceWrapping.AfterFunction) 3120 addUnwrappedLine(); 3121 parseBlock(); 3122 addUnwrappedLine(); 3123 return; 3124 } else { 3125 nextToken(); 3126 } 3127 } while (!eof()); 3128 } 3129 3130 void UnwrappedLineParser::parseObjCProtocolList() { 3131 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 3132 do { 3133 nextToken(); 3134 // Early exit in case someone forgot a close angle. 3135 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3136 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3137 return; 3138 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 3139 nextToken(); // Skip '>'. 3140 } 3141 3142 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3143 do { 3144 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 3145 nextToken(); 3146 addUnwrappedLine(); 3147 break; 3148 } 3149 if (FormatTok->is(tok::l_brace)) { 3150 parseBlock(); 3151 // In ObjC interfaces, nothing should be following the "}". 3152 addUnwrappedLine(); 3153 } else if (FormatTok->is(tok::r_brace)) { 3154 // Ignore stray "}". parseStructuralElement doesn't consume them. 3155 nextToken(); 3156 addUnwrappedLine(); 3157 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3158 nextToken(); 3159 parseObjCMethod(); 3160 } else { 3161 parseStructuralElement(); 3162 } 3163 } while (!eof()); 3164 } 3165 3166 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3167 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3168 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3169 nextToken(); 3170 nextToken(); // interface name 3171 3172 // @interface can be followed by a lightweight generic 3173 // specialization list, then either a base class or a category. 3174 if (FormatTok->Tok.is(tok::less)) { 3175 parseObjCLightweightGenerics(); 3176 } 3177 if (FormatTok->Tok.is(tok::colon)) { 3178 nextToken(); 3179 nextToken(); // base class name 3180 // The base class can also have lightweight generics applied to it. 3181 if (FormatTok->Tok.is(tok::less)) { 3182 parseObjCLightweightGenerics(); 3183 } 3184 } else if (FormatTok->Tok.is(tok::l_paren)) 3185 // Skip category, if present. 3186 parseParens(); 3187 3188 if (FormatTok->Tok.is(tok::less)) 3189 parseObjCProtocolList(); 3190 3191 if (FormatTok->Tok.is(tok::l_brace)) { 3192 if (Style.BraceWrapping.AfterObjCDeclaration) 3193 addUnwrappedLine(); 3194 parseBlock(/*MustBeDeclaration=*/true); 3195 } 3196 3197 // With instance variables, this puts '}' on its own line. Without instance 3198 // variables, this ends the @interface line. 3199 addUnwrappedLine(); 3200 3201 parseObjCUntilAtEnd(); 3202 } 3203 3204 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3205 assert(FormatTok->Tok.is(tok::less)); 3206 // Unlike protocol lists, generic parameterizations support 3207 // nested angles: 3208 // 3209 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3210 // NSObject <NSCopying, NSSecureCoding> 3211 // 3212 // so we need to count how many open angles we have left. 3213 unsigned NumOpenAngles = 1; 3214 do { 3215 nextToken(); 3216 // Early exit in case someone forgot a close angle. 3217 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3218 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3219 break; 3220 if (FormatTok->Tok.is(tok::less)) 3221 ++NumOpenAngles; 3222 else if (FormatTok->Tok.is(tok::greater)) { 3223 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3224 --NumOpenAngles; 3225 } 3226 } while (!eof() && NumOpenAngles != 0); 3227 nextToken(); // Skip '>'. 3228 } 3229 3230 // Returns true for the declaration/definition form of @protocol, 3231 // false for the expression form. 3232 bool UnwrappedLineParser::parseObjCProtocol() { 3233 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3234 nextToken(); 3235 3236 if (FormatTok->is(tok::l_paren)) 3237 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3238 return false; 3239 3240 // The definition/declaration form, 3241 // @protocol Foo 3242 // - (int)someMethod; 3243 // @end 3244 3245 nextToken(); // protocol name 3246 3247 if (FormatTok->Tok.is(tok::less)) 3248 parseObjCProtocolList(); 3249 3250 // Check for protocol declaration. 3251 if (FormatTok->Tok.is(tok::semi)) { 3252 nextToken(); 3253 addUnwrappedLine(); 3254 return true; 3255 } 3256 3257 addUnwrappedLine(); 3258 parseObjCUntilAtEnd(); 3259 return true; 3260 } 3261 3262 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3263 bool IsImport = FormatTok->is(Keywords.kw_import); 3264 assert(IsImport || FormatTok->is(tok::kw_export)); 3265 nextToken(); 3266 3267 // Consume the "default" in "export default class/function". 3268 if (FormatTok->is(tok::kw_default)) 3269 nextToken(); 3270 3271 // Consume "async function", "function" and "default function", so that these 3272 // get parsed as free-standing JS functions, i.e. do not require a trailing 3273 // semicolon. 3274 if (FormatTok->is(Keywords.kw_async)) 3275 nextToken(); 3276 if (FormatTok->is(Keywords.kw_function)) { 3277 nextToken(); 3278 return; 3279 } 3280 3281 // For imports, `export *`, `export {...}`, consume the rest of the line up 3282 // to the terminating `;`. For everything else, just return and continue 3283 // parsing the structural element, i.e. the declaration or expression for 3284 // `export default`. 3285 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3286 !FormatTok->isStringLiteral()) 3287 return; 3288 3289 while (!eof()) { 3290 if (FormatTok->is(tok::semi)) 3291 return; 3292 if (Line->Tokens.empty()) { 3293 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3294 // import statement should terminate. 3295 return; 3296 } 3297 if (FormatTok->is(tok::l_brace)) { 3298 FormatTok->setBlockKind(BK_Block); 3299 nextToken(); 3300 parseBracedList(); 3301 } else { 3302 nextToken(); 3303 } 3304 } 3305 } 3306 3307 void UnwrappedLineParser::parseStatementMacro() { 3308 nextToken(); 3309 if (FormatTok->is(tok::l_paren)) 3310 parseParens(); 3311 if (FormatTok->is(tok::semi)) 3312 nextToken(); 3313 addUnwrappedLine(); 3314 } 3315 3316 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3317 StringRef Prefix = "") { 3318 llvm::dbgs() << Prefix << "Line(" << Line.Level 3319 << ", FSC=" << Line.FirstStartColumn << ")" 3320 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3321 for (const auto &Node : Line.Tokens) { 3322 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3323 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3324 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3325 } 3326 for (const auto &Node : Line.Tokens) 3327 for (const auto &ChildNode : Node.Children) 3328 printDebugInfo(ChildNode, "\nChild: "); 3329 3330 llvm::dbgs() << "\n"; 3331 } 3332 3333 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3334 if (Line->Tokens.empty()) 3335 return; 3336 LLVM_DEBUG({ 3337 if (CurrentLines == &Lines) 3338 printDebugInfo(*Line); 3339 }); 3340 3341 // If this line closes a block when in Whitesmiths mode, remember that 3342 // information so that the level can be decreased after the line is added. 3343 // This has to happen after the addition of the line since the line itself 3344 // needs to be indented. 3345 bool ClosesWhitesmithsBlock = 3346 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3347 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3348 3349 CurrentLines->push_back(std::move(*Line)); 3350 Line->Tokens.clear(); 3351 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3352 Line->FirstStartColumn = 0; 3353 3354 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3355 --Line->Level; 3356 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3357 CurrentLines->append( 3358 std::make_move_iterator(PreprocessorDirectives.begin()), 3359 std::make_move_iterator(PreprocessorDirectives.end())); 3360 PreprocessorDirectives.clear(); 3361 } 3362 // Disconnect the current token from the last token on the previous line. 3363 FormatTok->Previous = nullptr; 3364 } 3365 3366 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 3367 3368 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3369 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3370 FormatTok.NewlinesBefore > 0; 3371 } 3372 3373 // Checks if \p FormatTok is a line comment that continues the line comment 3374 // section on \p Line. 3375 static bool 3376 continuesLineCommentSection(const FormatToken &FormatTok, 3377 const UnwrappedLine &Line, 3378 const llvm::Regex &CommentPragmasRegex) { 3379 if (Line.Tokens.empty()) 3380 return false; 3381 3382 StringRef IndentContent = FormatTok.TokenText; 3383 if (FormatTok.TokenText.startswith("//") || 3384 FormatTok.TokenText.startswith("/*")) 3385 IndentContent = FormatTok.TokenText.substr(2); 3386 if (CommentPragmasRegex.match(IndentContent)) 3387 return false; 3388 3389 // If Line starts with a line comment, then FormatTok continues the comment 3390 // section if its original column is greater or equal to the original start 3391 // column of the line. 3392 // 3393 // Define the min column token of a line as follows: if a line ends in '{' or 3394 // contains a '{' followed by a line comment, then the min column token is 3395 // that '{'. Otherwise, the min column token of the line is the first token of 3396 // the line. 3397 // 3398 // If Line starts with a token other than a line comment, then FormatTok 3399 // continues the comment section if its original column is greater than the 3400 // original start column of the min column token of the line. 3401 // 3402 // For example, the second line comment continues the first in these cases: 3403 // 3404 // // first line 3405 // // second line 3406 // 3407 // and: 3408 // 3409 // // first line 3410 // // second line 3411 // 3412 // and: 3413 // 3414 // int i; // first line 3415 // // second line 3416 // 3417 // and: 3418 // 3419 // do { // first line 3420 // // second line 3421 // int i; 3422 // } while (true); 3423 // 3424 // and: 3425 // 3426 // enum { 3427 // a, // first line 3428 // // second line 3429 // b 3430 // }; 3431 // 3432 // The second line comment doesn't continue the first in these cases: 3433 // 3434 // // first line 3435 // // second line 3436 // 3437 // and: 3438 // 3439 // int i; // first line 3440 // // second line 3441 // 3442 // and: 3443 // 3444 // do { // first line 3445 // // second line 3446 // int i; 3447 // } while (true); 3448 // 3449 // and: 3450 // 3451 // enum { 3452 // a, // first line 3453 // // second line 3454 // }; 3455 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3456 3457 // Scan for '{//'. If found, use the column of '{' as a min column for line 3458 // comment section continuation. 3459 const FormatToken *PreviousToken = nullptr; 3460 for (const UnwrappedLineNode &Node : Line.Tokens) { 3461 if (PreviousToken && PreviousToken->is(tok::l_brace) && 3462 isLineComment(*Node.Tok)) { 3463 MinColumnToken = PreviousToken; 3464 break; 3465 } 3466 PreviousToken = Node.Tok; 3467 3468 // Grab the last newline preceding a token in this unwrapped line. 3469 if (Node.Tok->NewlinesBefore > 0) { 3470 MinColumnToken = Node.Tok; 3471 } 3472 } 3473 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 3474 MinColumnToken = PreviousToken; 3475 } 3476 3477 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 3478 MinColumnToken); 3479 } 3480 3481 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 3482 bool JustComments = Line->Tokens.empty(); 3483 for (FormatToken *Tok : CommentsBeforeNextToken) { 3484 // Line comments that belong to the same line comment section are put on the 3485 // same line since later we might want to reflow content between them. 3486 // Additional fine-grained breaking of line comment sections is controlled 3487 // by the class BreakableLineCommentSection in case it is desirable to keep 3488 // several line comment sections in the same unwrapped line. 3489 // 3490 // FIXME: Consider putting separate line comment sections as children to the 3491 // unwrapped line instead. 3492 Tok->ContinuesLineCommentSection = 3493 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 3494 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 3495 addUnwrappedLine(); 3496 pushToken(Tok); 3497 } 3498 if (NewlineBeforeNext && JustComments) 3499 addUnwrappedLine(); 3500 CommentsBeforeNextToken.clear(); 3501 } 3502 3503 void UnwrappedLineParser::nextToken(int LevelDifference) { 3504 if (eof()) 3505 return; 3506 flushComments(isOnNewLine(*FormatTok)); 3507 pushToken(FormatTok); 3508 FormatToken *Previous = FormatTok; 3509 if (!Style.isJavaScript()) 3510 readToken(LevelDifference); 3511 else 3512 readTokenWithJavaScriptASI(); 3513 FormatTok->Previous = Previous; 3514 } 3515 3516 void UnwrappedLineParser::distributeComments( 3517 const SmallVectorImpl<FormatToken *> &Comments, 3518 const FormatToken *NextTok) { 3519 // Whether or not a line comment token continues a line is controlled by 3520 // the method continuesLineCommentSection, with the following caveat: 3521 // 3522 // Define a trail of Comments to be a nonempty proper postfix of Comments such 3523 // that each comment line from the trail is aligned with the next token, if 3524 // the next token exists. If a trail exists, the beginning of the maximal 3525 // trail is marked as a start of a new comment section. 3526 // 3527 // For example in this code: 3528 // 3529 // int a; // line about a 3530 // // line 1 about b 3531 // // line 2 about b 3532 // int b; 3533 // 3534 // the two lines about b form a maximal trail, so there are two sections, the 3535 // first one consisting of the single comment "// line about a" and the 3536 // second one consisting of the next two comments. 3537 if (Comments.empty()) 3538 return; 3539 bool ShouldPushCommentsInCurrentLine = true; 3540 bool HasTrailAlignedWithNextToken = false; 3541 unsigned StartOfTrailAlignedWithNextToken = 0; 3542 if (NextTok) { 3543 // We are skipping the first element intentionally. 3544 for (unsigned i = Comments.size() - 1; i > 0; --i) { 3545 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 3546 HasTrailAlignedWithNextToken = true; 3547 StartOfTrailAlignedWithNextToken = i; 3548 } 3549 } 3550 } 3551 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 3552 FormatToken *FormatTok = Comments[i]; 3553 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 3554 FormatTok->ContinuesLineCommentSection = false; 3555 } else { 3556 FormatTok->ContinuesLineCommentSection = 3557 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 3558 } 3559 if (!FormatTok->ContinuesLineCommentSection && 3560 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 3561 ShouldPushCommentsInCurrentLine = false; 3562 } 3563 if (ShouldPushCommentsInCurrentLine) { 3564 pushToken(FormatTok); 3565 } else { 3566 CommentsBeforeNextToken.push_back(FormatTok); 3567 } 3568 } 3569 } 3570 3571 void UnwrappedLineParser::readToken(int LevelDifference) { 3572 SmallVector<FormatToken *, 1> Comments; 3573 do { 3574 FormatTok = Tokens->getNextToken(); 3575 assert(FormatTok); 3576 while (FormatTok->getType() == TT_ConflictStart || 3577 FormatTok->getType() == TT_ConflictEnd || 3578 FormatTok->getType() == TT_ConflictAlternative) { 3579 if (FormatTok->getType() == TT_ConflictStart) { 3580 conditionalCompilationStart(/*Unreachable=*/false); 3581 } else if (FormatTok->getType() == TT_ConflictAlternative) { 3582 conditionalCompilationAlternative(); 3583 } else if (FormatTok->getType() == TT_ConflictEnd) { 3584 conditionalCompilationEnd(); 3585 } 3586 FormatTok = Tokens->getNextToken(); 3587 FormatTok->MustBreakBefore = true; 3588 } 3589 3590 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 3591 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 3592 distributeComments(Comments, FormatTok); 3593 Comments.clear(); 3594 // If there is an unfinished unwrapped line, we flush the preprocessor 3595 // directives only after that unwrapped line was finished later. 3596 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 3597 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 3598 assert((LevelDifference >= 0 || 3599 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 3600 "LevelDifference makes Line->Level negative"); 3601 Line->Level += LevelDifference; 3602 // Comments stored before the preprocessor directive need to be output 3603 // before the preprocessor directive, at the same level as the 3604 // preprocessor directive, as we consider them to apply to the directive. 3605 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 3606 PPBranchLevel > 0) 3607 Line->Level += PPBranchLevel; 3608 flushComments(isOnNewLine(*FormatTok)); 3609 parsePPDirective(); 3610 } 3611 3612 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 3613 !Line->InPPDirective) { 3614 continue; 3615 } 3616 3617 if (!FormatTok->Tok.is(tok::comment)) { 3618 distributeComments(Comments, FormatTok); 3619 Comments.clear(); 3620 return; 3621 } 3622 3623 Comments.push_back(FormatTok); 3624 } while (!eof()); 3625 3626 distributeComments(Comments, nullptr); 3627 Comments.clear(); 3628 } 3629 3630 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 3631 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 3632 if (MustBreakBeforeNextToken) { 3633 Line->Tokens.back().Tok->MustBreakBefore = true; 3634 MustBreakBeforeNextToken = false; 3635 } 3636 } 3637 3638 } // end namespace format 3639 } // end namespace clang 3640