1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-parser" 25 26 namespace clang { 27 namespace format { 28 29 class FormatTokenSource { 30 public: 31 virtual ~FormatTokenSource() {} 32 33 // Returns the next token in the token stream. 34 virtual FormatToken *getNextToken() = 0; 35 36 // Returns the token preceding the token returned by the last call to 37 // getNextToken() in the token stream, or nullptr if no such token exists. 38 virtual FormatToken *getPreviousToken() = 0; 39 40 // Returns the token that would be returned by the next call to 41 // getNextToken(). 42 virtual FormatToken *peekNextToken() = 0; 43 44 // Returns whether we are at the end of the file. 45 // This can be different from whether getNextToken() returned an eof token 46 // when the FormatTokenSource is a view on a part of the token stream. 47 virtual bool isEOF() = 0; 48 49 // Gets the current position in the token stream, to be used by setPosition(). 50 virtual unsigned getPosition() = 0; 51 52 // Resets the token stream to the state it was in when getPosition() returned 53 // Position, and return the token at that position in the stream. 54 virtual FormatToken *setPosition(unsigned Position) = 0; 55 }; 56 57 namespace { 58 59 class ScopedDeclarationState { 60 public: 61 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 62 bool MustBeDeclaration) 63 : Line(Line), Stack(Stack) { 64 Line.MustBeDeclaration = MustBeDeclaration; 65 Stack.push_back(MustBeDeclaration); 66 } 67 ~ScopedDeclarationState() { 68 Stack.pop_back(); 69 if (!Stack.empty()) 70 Line.MustBeDeclaration = Stack.back(); 71 else 72 Line.MustBeDeclaration = true; 73 } 74 75 private: 76 UnwrappedLine &Line; 77 llvm::BitVector &Stack; 78 }; 79 80 static bool isLineComment(const FormatToken &FormatTok) { 81 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 82 } 83 84 // Checks if \p FormatTok is a line comment that continues the line comment 85 // \p Previous. The original column of \p MinColumnToken is used to determine 86 // whether \p FormatTok is indented enough to the right to continue \p Previous. 87 static bool continuesLineComment(const FormatToken &FormatTok, 88 const FormatToken *Previous, 89 const FormatToken *MinColumnToken) { 90 if (!Previous || !MinColumnToken) 91 return false; 92 unsigned MinContinueColumn = 93 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 94 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 95 isLineComment(*Previous) && 96 FormatTok.OriginalColumn >= MinContinueColumn; 97 } 98 99 class ScopedMacroState : public FormatTokenSource { 100 public: 101 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 102 FormatToken *&ResetToken) 103 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 104 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 105 Token(nullptr), PreviousToken(nullptr) { 106 FakeEOF.Tok.startToken(); 107 FakeEOF.Tok.setKind(tok::eof); 108 TokenSource = this; 109 Line.Level = 0; 110 Line.InPPDirective = true; 111 } 112 113 ~ScopedMacroState() override { 114 TokenSource = PreviousTokenSource; 115 ResetToken = Token; 116 Line.InPPDirective = false; 117 Line.Level = PreviousLineLevel; 118 } 119 120 FormatToken *getNextToken() override { 121 // The \c UnwrappedLineParser guards against this by never calling 122 // \c getNextToken() after it has encountered the first eof token. 123 assert(!eof()); 124 PreviousToken = Token; 125 Token = PreviousTokenSource->getNextToken(); 126 if (eof()) 127 return &FakeEOF; 128 return Token; 129 } 130 131 FormatToken *getPreviousToken() override { 132 return PreviousTokenSource->getPreviousToken(); 133 } 134 135 FormatToken *peekNextToken() override { 136 if (eof()) 137 return &FakeEOF; 138 return PreviousTokenSource->peekNextToken(); 139 } 140 141 bool isEOF() override { return PreviousTokenSource->isEOF(); } 142 143 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 144 145 FormatToken *setPosition(unsigned Position) override { 146 PreviousToken = nullptr; 147 Token = PreviousTokenSource->setPosition(Position); 148 return Token; 149 } 150 151 private: 152 bool eof() { 153 return Token && Token->HasUnescapedNewline && 154 !continuesLineComment(*Token, PreviousToken, 155 /*MinColumnToken=*/PreviousToken); 156 } 157 158 FormatToken FakeEOF; 159 UnwrappedLine &Line; 160 FormatTokenSource *&TokenSource; 161 FormatToken *&ResetToken; 162 unsigned PreviousLineLevel; 163 FormatTokenSource *PreviousTokenSource; 164 165 FormatToken *Token; 166 FormatToken *PreviousToken; 167 }; 168 169 } // end anonymous namespace 170 171 class ScopedLineState { 172 public: 173 ScopedLineState(UnwrappedLineParser &Parser, 174 bool SwitchToPreprocessorLines = false) 175 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 176 if (SwitchToPreprocessorLines) 177 Parser.CurrentLines = &Parser.PreprocessorDirectives; 178 else if (!Parser.Line->Tokens.empty()) 179 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 180 PreBlockLine = std::move(Parser.Line); 181 Parser.Line = std::make_unique<UnwrappedLine>(); 182 Parser.Line->Level = PreBlockLine->Level; 183 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 184 } 185 186 ~ScopedLineState() { 187 if (!Parser.Line->Tokens.empty()) 188 Parser.addUnwrappedLine(); 189 assert(Parser.Line->Tokens.empty()); 190 Parser.Line = std::move(PreBlockLine); 191 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 192 Parser.MustBreakBeforeNextToken = true; 193 Parser.CurrentLines = OriginalLines; 194 } 195 196 private: 197 UnwrappedLineParser &Parser; 198 199 std::unique_ptr<UnwrappedLine> PreBlockLine; 200 SmallVectorImpl<UnwrappedLine> *OriginalLines; 201 }; 202 203 class CompoundStatementIndenter { 204 public: 205 CompoundStatementIndenter(UnwrappedLineParser *Parser, 206 const FormatStyle &Style, unsigned &LineLevel) 207 : CompoundStatementIndenter(Parser, LineLevel, 208 Style.BraceWrapping.AfterControlStatement, 209 Style.BraceWrapping.IndentBraces) {} 210 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 211 bool WrapBrace, bool IndentBrace) 212 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 213 if (WrapBrace) 214 Parser->addUnwrappedLine(); 215 if (IndentBrace) 216 ++LineLevel; 217 } 218 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 219 220 private: 221 unsigned &LineLevel; 222 unsigned OldLineLevel; 223 }; 224 225 namespace { 226 227 class IndexedTokenSource : public FormatTokenSource { 228 public: 229 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 230 : Tokens(Tokens), Position(-1) {} 231 232 FormatToken *getNextToken() override { 233 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 234 LLVM_DEBUG({ 235 llvm::dbgs() << "Next "; 236 dbgToken(Position); 237 }); 238 return Tokens[Position]; 239 } 240 ++Position; 241 LLVM_DEBUG({ 242 llvm::dbgs() << "Next "; 243 dbgToken(Position); 244 }); 245 return Tokens[Position]; 246 } 247 248 FormatToken *getPreviousToken() override { 249 return Position > 0 ? Tokens[Position - 1] : nullptr; 250 } 251 252 FormatToken *peekNextToken() override { 253 int Next = Position + 1; 254 LLVM_DEBUG({ 255 llvm::dbgs() << "Peeking "; 256 dbgToken(Next); 257 }); 258 return Tokens[Next]; 259 } 260 261 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 262 263 unsigned getPosition() override { 264 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 265 assert(Position >= 0); 266 return Position; 267 } 268 269 FormatToken *setPosition(unsigned P) override { 270 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 271 Position = P; 272 return Tokens[Position]; 273 } 274 275 void reset() { Position = -1; } 276 277 private: 278 void dbgToken(int Position, llvm::StringRef Indent = "") { 279 FormatToken *Tok = Tokens[Position]; 280 llvm::dbgs() << Indent << "[" << Position 281 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 282 << ", Macro: " << !!Tok->MacroCtx << "\n"; 283 } 284 285 ArrayRef<FormatToken *> Tokens; 286 int Position; 287 }; 288 289 } // end anonymous namespace 290 291 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 292 const AdditionalKeywords &Keywords, 293 unsigned FirstStartColumn, 294 ArrayRef<FormatToken *> Tokens, 295 UnwrappedLineConsumer &Callback) 296 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 297 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 298 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 299 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 300 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 301 ? IG_Rejected 302 : IG_Inited), 303 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 304 305 void UnwrappedLineParser::reset() { 306 PPBranchLevel = -1; 307 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 308 ? IG_Rejected 309 : IG_Inited; 310 IncludeGuardToken = nullptr; 311 Line.reset(new UnwrappedLine); 312 CommentsBeforeNextToken.clear(); 313 FormatTok = nullptr; 314 MustBreakBeforeNextToken = false; 315 PreprocessorDirectives.clear(); 316 CurrentLines = &Lines; 317 DeclarationScopeStack.clear(); 318 NestedTooDeep.clear(); 319 PPStack.clear(); 320 Line->FirstStartColumn = FirstStartColumn; 321 } 322 323 void UnwrappedLineParser::parse() { 324 IndexedTokenSource TokenSource(AllTokens); 325 Line->FirstStartColumn = FirstStartColumn; 326 do { 327 LLVM_DEBUG(llvm::dbgs() << "----\n"); 328 reset(); 329 Tokens = &TokenSource; 330 TokenSource.reset(); 331 332 readToken(); 333 parseFile(); 334 335 // If we found an include guard then all preprocessor directives (other than 336 // the guard) are over-indented by one. 337 if (IncludeGuard == IG_Found) 338 for (auto &Line : Lines) 339 if (Line.InPPDirective && Line.Level > 0) 340 --Line.Level; 341 342 // Create line with eof token. 343 pushToken(FormatTok); 344 addUnwrappedLine(); 345 346 for (const UnwrappedLine &Line : Lines) 347 Callback.consumeUnwrappedLine(Line); 348 349 Callback.finishRun(); 350 Lines.clear(); 351 while (!PPLevelBranchIndex.empty() && 352 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 353 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 354 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 355 } 356 if (!PPLevelBranchIndex.empty()) { 357 ++PPLevelBranchIndex.back(); 358 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 359 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 360 } 361 } while (!PPLevelBranchIndex.empty()); 362 } 363 364 void UnwrappedLineParser::parseFile() { 365 // The top-level context in a file always has declarations, except for pre- 366 // processor directives and JavaScript files. 367 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 368 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 369 MustBeDeclaration); 370 if (Style.Language == FormatStyle::LK_TextProto) 371 parseBracedList(); 372 else 373 parseLevel(/*HasOpeningBrace=*/false); 374 // Make sure to format the remaining tokens. 375 // 376 // LK_TextProto is special since its top-level is parsed as the body of a 377 // braced list, which does not necessarily have natural line separators such 378 // as a semicolon. Comments after the last entry that have been determined to 379 // not belong to that line, as in: 380 // key: value 381 // // endfile comment 382 // do not have a chance to be put on a line of their own until this point. 383 // Here we add this newline before end-of-file comments. 384 if (Style.Language == FormatStyle::LK_TextProto && 385 !CommentsBeforeNextToken.empty()) 386 addUnwrappedLine(); 387 flushComments(true); 388 addUnwrappedLine(); 389 } 390 391 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 392 do { 393 switch (FormatTok->Tok.getKind()) { 394 case tok::l_brace: 395 return; 396 default: 397 if (FormatTok->is(Keywords.kw_where)) { 398 addUnwrappedLine(); 399 nextToken(); 400 parseCSharpGenericTypeConstraint(); 401 break; 402 } 403 nextToken(); 404 break; 405 } 406 } while (!eof()); 407 } 408 409 void UnwrappedLineParser::parseCSharpAttribute() { 410 int UnpairedSquareBrackets = 1; 411 do { 412 switch (FormatTok->Tok.getKind()) { 413 case tok::r_square: 414 nextToken(); 415 --UnpairedSquareBrackets; 416 if (UnpairedSquareBrackets == 0) { 417 addUnwrappedLine(); 418 return; 419 } 420 break; 421 case tok::l_square: 422 ++UnpairedSquareBrackets; 423 nextToken(); 424 break; 425 default: 426 nextToken(); 427 break; 428 } 429 } while (!eof()); 430 } 431 432 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 433 if (!Lines.empty() && Lines.back().InPPDirective) 434 return true; 435 436 const FormatToken *Previous = Tokens->getPreviousToken(); 437 return Previous && Previous->is(tok::comment) && 438 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 439 } 440 441 bool UnwrappedLineParser::mightFitOnOneLine() const { 442 const auto ColumnLimit = Style.ColumnLimit; 443 if (ColumnLimit == 0) 444 return true; 445 446 if (Lines.empty()) 447 return true; 448 449 const auto &PreviousLine = Lines.back(); 450 const auto &Tokens = PreviousLine.Tokens; 451 assert(!Tokens.empty()); 452 const auto *LastToken = Tokens.back().Tok; 453 assert(LastToken); 454 if (!LastToken->isOneOf(tok::semi, tok::comment)) 455 return true; 456 457 AnnotatedLine Line(PreviousLine); 458 assert(Line.Last == LastToken); 459 460 TokenAnnotator Annotator(Style, Keywords); 461 Annotator.annotate(Line); 462 Annotator.calculateFormattingInformation(Line); 463 464 return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit; 465 } 466 467 // Returns true if a simple block, or false otherwise. (A simple block has a 468 // single statement that fits on a single line.) 469 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) { 470 const bool IsPrecededByCommentOrPPDirective = 471 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 472 unsigned StatementCount = 0; 473 bool SwitchLabelEncountered = false; 474 do { 475 tok::TokenKind kind = FormatTok->Tok.getKind(); 476 if (FormatTok->getType() == TT_MacroBlockBegin) 477 kind = tok::l_brace; 478 else if (FormatTok->getType() == TT_MacroBlockEnd) 479 kind = tok::r_brace; 480 481 switch (kind) { 482 case tok::comment: 483 nextToken(); 484 addUnwrappedLine(); 485 break; 486 case tok::l_brace: 487 // FIXME: Add parameter whether this can happen - if this happens, we must 488 // be in a non-declaration context. 489 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 490 continue; 491 parseBlock(); 492 ++StatementCount; 493 assert(StatementCount > 0 && "StatementCount overflow!"); 494 addUnwrappedLine(); 495 break; 496 case tok::r_brace: 497 if (HasOpeningBrace) { 498 if (!Style.RemoveBracesLLVM) 499 return false; 500 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || 501 IsPrecededByCommentOrPPDirective || 502 precededByCommentOrPPDirective()) 503 return false; 504 const FormatToken *Next = Tokens->peekNextToken(); 505 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 506 return false; 507 return mightFitOnOneLine(); 508 } 509 nextToken(); 510 addUnwrappedLine(); 511 break; 512 case tok::kw_default: { 513 unsigned StoredPosition = Tokens->getPosition(); 514 FormatToken *Next; 515 do { 516 Next = Tokens->getNextToken(); 517 } while (Next->is(tok::comment)); 518 FormatTok = Tokens->setPosition(StoredPosition); 519 if (Next && Next->isNot(tok::colon)) { 520 // default not followed by ':' is not a case label; treat it like 521 // an identifier. 522 parseStructuralElement(); 523 break; 524 } 525 // Else, if it is 'default:', fall through to the case handling. 526 LLVM_FALLTHROUGH; 527 } 528 case tok::kw_case: 529 if (Style.isJavaScript() && Line->MustBeDeclaration) { 530 // A 'case: string' style field declaration. 531 parseStructuralElement(); 532 break; 533 } 534 if (!SwitchLabelEncountered && 535 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 536 ++Line->Level; 537 SwitchLabelEncountered = true; 538 parseStructuralElement(); 539 break; 540 case tok::l_square: 541 if (Style.isCSharp()) { 542 nextToken(); 543 parseCSharpAttribute(); 544 break; 545 } 546 LLVM_FALLTHROUGH; 547 default: 548 parseStructuralElement(IfKind, !HasOpeningBrace); 549 ++StatementCount; 550 assert(StatementCount > 0 && "StatementCount overflow!"); 551 break; 552 } 553 } while (!eof()); 554 return false; 555 } 556 557 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 558 // We'll parse forward through the tokens until we hit 559 // a closing brace or eof - note that getNextToken() will 560 // parse macros, so this will magically work inside macro 561 // definitions, too. 562 unsigned StoredPosition = Tokens->getPosition(); 563 FormatToken *Tok = FormatTok; 564 const FormatToken *PrevTok = Tok->Previous; 565 // Keep a stack of positions of lbrace tokens. We will 566 // update information about whether an lbrace starts a 567 // braced init list or a different block during the loop. 568 SmallVector<FormatToken *, 8> LBraceStack; 569 assert(Tok->Tok.is(tok::l_brace)); 570 do { 571 // Get next non-comment token. 572 FormatToken *NextTok; 573 unsigned ReadTokens = 0; 574 do { 575 NextTok = Tokens->getNextToken(); 576 ++ReadTokens; 577 } while (NextTok->is(tok::comment)); 578 579 switch (Tok->Tok.getKind()) { 580 case tok::l_brace: 581 if (Style.isJavaScript() && PrevTok) { 582 if (PrevTok->isOneOf(tok::colon, tok::less)) 583 // A ':' indicates this code is in a type, or a braced list 584 // following a label in an object literal ({a: {b: 1}}). 585 // A '<' could be an object used in a comparison, but that is nonsense 586 // code (can never return true), so more likely it is a generic type 587 // argument (`X<{a: string; b: number}>`). 588 // The code below could be confused by semicolons between the 589 // individual members in a type member list, which would normally 590 // trigger BK_Block. In both cases, this must be parsed as an inline 591 // braced init. 592 Tok->setBlockKind(BK_BracedInit); 593 else if (PrevTok->is(tok::r_paren)) 594 // `) { }` can only occur in function or method declarations in JS. 595 Tok->setBlockKind(BK_Block); 596 } else { 597 Tok->setBlockKind(BK_Unknown); 598 } 599 LBraceStack.push_back(Tok); 600 break; 601 case tok::r_brace: 602 if (LBraceStack.empty()) 603 break; 604 if (LBraceStack.back()->is(BK_Unknown)) { 605 bool ProbablyBracedList = false; 606 if (Style.Language == FormatStyle::LK_Proto) { 607 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 608 } else { 609 // Skip NextTok over preprocessor lines, otherwise we may not 610 // properly diagnose the block as a braced intializer 611 // if the comma separator appears after the pp directive. 612 while (NextTok->is(tok::hash)) { 613 ScopedMacroState MacroState(*Line, Tokens, NextTok); 614 do { 615 NextTok = Tokens->getNextToken(); 616 ++ReadTokens; 617 } while (NextTok->isNot(tok::eof)); 618 } 619 620 // Using OriginalColumn to distinguish between ObjC methods and 621 // binary operators is a bit hacky. 622 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 623 NextTok->OriginalColumn == 0; 624 625 // If there is a comma, semicolon or right paren after the closing 626 // brace, we assume this is a braced initializer list. Note that 627 // regardless how we mark inner braces here, we will overwrite the 628 // BlockKind later if we parse a braced list (where all blocks 629 // inside are by default braced lists), or when we explicitly detect 630 // blocks (for example while parsing lambdas). 631 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 632 // braced list in JS. 633 ProbablyBracedList = 634 (Style.isJavaScript() && 635 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 636 Keywords.kw_as)) || 637 (Style.isCpp() && NextTok->is(tok::l_paren)) || 638 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 639 tok::r_paren, tok::r_square, tok::l_brace, 640 tok::ellipsis) || 641 (NextTok->is(tok::identifier) && 642 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 643 (NextTok->is(tok::semi) && 644 (!ExpectClassBody || LBraceStack.size() != 1)) || 645 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 646 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 647 // We can have an array subscript after a braced init 648 // list, but C++11 attributes are expected after blocks. 649 NextTok = Tokens->getNextToken(); 650 ++ReadTokens; 651 ProbablyBracedList = NextTok->isNot(tok::l_square); 652 } 653 } 654 if (ProbablyBracedList) { 655 Tok->setBlockKind(BK_BracedInit); 656 LBraceStack.back()->setBlockKind(BK_BracedInit); 657 } else { 658 Tok->setBlockKind(BK_Block); 659 LBraceStack.back()->setBlockKind(BK_Block); 660 } 661 } 662 LBraceStack.pop_back(); 663 break; 664 case tok::identifier: 665 if (!Tok->is(TT_StatementMacro)) 666 break; 667 LLVM_FALLTHROUGH; 668 case tok::at: 669 case tok::semi: 670 case tok::kw_if: 671 case tok::kw_while: 672 case tok::kw_for: 673 case tok::kw_switch: 674 case tok::kw_try: 675 case tok::kw___try: 676 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 677 LBraceStack.back()->setBlockKind(BK_Block); 678 break; 679 default: 680 break; 681 } 682 PrevTok = Tok; 683 Tok = NextTok; 684 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 685 686 // Assume other blocks for all unclosed opening braces. 687 for (FormatToken *LBrace : LBraceStack) 688 if (LBrace->is(BK_Unknown)) 689 LBrace->setBlockKind(BK_Block); 690 691 FormatTok = Tokens->setPosition(StoredPosition); 692 } 693 694 template <class T> 695 static inline void hash_combine(std::size_t &seed, const T &v) { 696 std::hash<T> hasher; 697 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 698 } 699 700 size_t UnwrappedLineParser::computePPHash() const { 701 size_t h = 0; 702 for (const auto &i : PPStack) { 703 hash_combine(h, size_t(i.Kind)); 704 hash_combine(h, i.Line); 705 } 706 return h; 707 } 708 709 UnwrappedLineParser::IfStmtKind 710 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, 711 bool MunchSemi, 712 bool UnindentWhitesmithsBraces) { 713 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 714 "'{' or macro block token expected"); 715 FormatToken *Tok = FormatTok; 716 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 717 FormatTok->setBlockKind(BK_Block); 718 719 // For Whitesmiths mode, jump to the next level prior to skipping over the 720 // braces. 721 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 722 ++Line->Level; 723 724 size_t PPStartHash = computePPHash(); 725 726 unsigned InitialLevel = Line->Level; 727 nextToken(/*LevelDifference=*/AddLevels); 728 729 if (MacroBlock && FormatTok->is(tok::l_paren)) 730 parseParens(); 731 732 size_t NbPreprocessorDirectives = 733 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 734 addUnwrappedLine(); 735 size_t OpeningLineIndex = 736 CurrentLines->empty() 737 ? (UnwrappedLine::kInvalidIndex) 738 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 739 740 // Whitesmiths is weird here. The brace needs to be indented for the namespace 741 // block, but the block itself may not be indented depending on the style 742 // settings. This allows the format to back up one level in those cases. 743 if (UnindentWhitesmithsBraces) 744 --Line->Level; 745 746 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 747 MustBeDeclaration); 748 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 749 Line->Level += AddLevels; 750 751 IfStmtKind IfKind = IfStmtKind::NotIf; 752 const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind); 753 754 if (eof()) 755 return IfKind; 756 757 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 758 : !FormatTok->is(tok::r_brace)) { 759 Line->Level = InitialLevel; 760 FormatTok->setBlockKind(BK_Block); 761 return IfKind; 762 } 763 764 if (SimpleBlock && Tok->is(tok::l_brace)) { 765 assert(FormatTok->is(tok::r_brace)); 766 const FormatToken *Previous = Tokens->getPreviousToken(); 767 assert(Previous); 768 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 769 Tok->MatchingParen = FormatTok; 770 FormatTok->MatchingParen = Tok; 771 } 772 } 773 774 size_t PPEndHash = computePPHash(); 775 776 // Munch the closing brace. 777 nextToken(/*LevelDifference=*/-AddLevels); 778 779 if (MacroBlock && FormatTok->is(tok::l_paren)) 780 parseParens(); 781 782 if (FormatTok->is(tok::arrow)) { 783 // Following the } we can find a trailing return type arrow 784 // as part of an implicit conversion constraint. 785 nextToken(); 786 parseStructuralElement(); 787 } 788 789 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 790 nextToken(); 791 792 Line->Level = InitialLevel; 793 794 if (PPStartHash == PPEndHash) { 795 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 796 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 797 // Update the opening line to add the forward reference as well 798 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 799 CurrentLines->size() - 1; 800 } 801 } 802 803 return IfKind; 804 } 805 806 static bool isGoogScope(const UnwrappedLine &Line) { 807 // FIXME: Closure-library specific stuff should not be hard-coded but be 808 // configurable. 809 if (Line.Tokens.size() < 4) 810 return false; 811 auto I = Line.Tokens.begin(); 812 if (I->Tok->TokenText != "goog") 813 return false; 814 ++I; 815 if (I->Tok->isNot(tok::period)) 816 return false; 817 ++I; 818 if (I->Tok->TokenText != "scope") 819 return false; 820 ++I; 821 return I->Tok->is(tok::l_paren); 822 } 823 824 static bool isIIFE(const UnwrappedLine &Line, 825 const AdditionalKeywords &Keywords) { 826 // Look for the start of an immediately invoked anonymous function. 827 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 828 // This is commonly done in JavaScript to create a new, anonymous scope. 829 // Example: (function() { ... })() 830 if (Line.Tokens.size() < 3) 831 return false; 832 auto I = Line.Tokens.begin(); 833 if (I->Tok->isNot(tok::l_paren)) 834 return false; 835 ++I; 836 if (I->Tok->isNot(Keywords.kw_function)) 837 return false; 838 ++I; 839 return I->Tok->is(tok::l_paren); 840 } 841 842 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 843 const FormatToken &InitialToken) { 844 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 845 return Style.BraceWrapping.AfterNamespace; 846 if (InitialToken.is(tok::kw_class)) 847 return Style.BraceWrapping.AfterClass; 848 if (InitialToken.is(tok::kw_union)) 849 return Style.BraceWrapping.AfterUnion; 850 if (InitialToken.is(tok::kw_struct)) 851 return Style.BraceWrapping.AfterStruct; 852 if (InitialToken.is(tok::kw_enum)) 853 return Style.BraceWrapping.AfterEnum; 854 return false; 855 } 856 857 void UnwrappedLineParser::parseChildBlock() { 858 FormatTok->setBlockKind(BK_Block); 859 nextToken(); 860 { 861 bool SkipIndent = (Style.isJavaScript() && 862 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 863 ScopedLineState LineState(*this); 864 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 865 /*MustBeDeclaration=*/false); 866 Line->Level += SkipIndent ? 0 : 1; 867 parseLevel(/*HasOpeningBrace=*/true); 868 flushComments(isOnNewLine(*FormatTok)); 869 Line->Level -= SkipIndent ? 0 : 1; 870 } 871 nextToken(); 872 } 873 874 void UnwrappedLineParser::parsePPDirective() { 875 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 876 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 877 878 nextToken(); 879 880 if (!FormatTok->Tok.getIdentifierInfo()) { 881 parsePPUnknown(); 882 return; 883 } 884 885 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 886 case tok::pp_define: 887 parsePPDefine(); 888 return; 889 case tok::pp_if: 890 parsePPIf(/*IfDef=*/false); 891 break; 892 case tok::pp_ifdef: 893 case tok::pp_ifndef: 894 parsePPIf(/*IfDef=*/true); 895 break; 896 case tok::pp_else: 897 parsePPElse(); 898 break; 899 case tok::pp_elifdef: 900 case tok::pp_elifndef: 901 case tok::pp_elif: 902 parsePPElIf(); 903 break; 904 case tok::pp_endif: 905 parsePPEndIf(); 906 break; 907 default: 908 parsePPUnknown(); 909 break; 910 } 911 } 912 913 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 914 size_t Line = CurrentLines->size(); 915 if (CurrentLines == &PreprocessorDirectives) 916 Line += Lines.size(); 917 918 if (Unreachable || 919 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 920 PPStack.push_back({PP_Unreachable, Line}); 921 else 922 PPStack.push_back({PP_Conditional, Line}); 923 } 924 925 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 926 ++PPBranchLevel; 927 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 928 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 929 PPLevelBranchIndex.push_back(0); 930 PPLevelBranchCount.push_back(0); 931 } 932 PPChainBranchIndex.push(0); 933 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 934 conditionalCompilationCondition(Unreachable || Skip); 935 } 936 937 void UnwrappedLineParser::conditionalCompilationAlternative() { 938 if (!PPStack.empty()) 939 PPStack.pop_back(); 940 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 941 if (!PPChainBranchIndex.empty()) 942 ++PPChainBranchIndex.top(); 943 conditionalCompilationCondition( 944 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 945 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 946 } 947 948 void UnwrappedLineParser::conditionalCompilationEnd() { 949 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 950 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 951 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 952 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 953 } 954 // Guard against #endif's without #if. 955 if (PPBranchLevel > -1) 956 --PPBranchLevel; 957 if (!PPChainBranchIndex.empty()) 958 PPChainBranchIndex.pop(); 959 if (!PPStack.empty()) 960 PPStack.pop_back(); 961 } 962 963 void UnwrappedLineParser::parsePPIf(bool IfDef) { 964 bool IfNDef = FormatTok->is(tok::pp_ifndef); 965 nextToken(); 966 bool Unreachable = false; 967 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 968 Unreachable = true; 969 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 970 Unreachable = true; 971 conditionalCompilationStart(Unreachable); 972 FormatToken *IfCondition = FormatTok; 973 // If there's a #ifndef on the first line, and the only lines before it are 974 // comments, it could be an include guard. 975 bool MaybeIncludeGuard = IfNDef; 976 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 977 for (auto &Line : Lines) { 978 if (!Line.Tokens.front().Tok->is(tok::comment)) { 979 MaybeIncludeGuard = false; 980 IncludeGuard = IG_Rejected; 981 break; 982 } 983 } 984 --PPBranchLevel; 985 parsePPUnknown(); 986 ++PPBranchLevel; 987 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 988 IncludeGuard = IG_IfNdefed; 989 IncludeGuardToken = IfCondition; 990 } 991 } 992 993 void UnwrappedLineParser::parsePPElse() { 994 // If a potential include guard has an #else, it's not an include guard. 995 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 996 IncludeGuard = IG_Rejected; 997 conditionalCompilationAlternative(); 998 if (PPBranchLevel > -1) 999 --PPBranchLevel; 1000 parsePPUnknown(); 1001 ++PPBranchLevel; 1002 } 1003 1004 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1005 1006 void UnwrappedLineParser::parsePPEndIf() { 1007 conditionalCompilationEnd(); 1008 parsePPUnknown(); 1009 // If the #endif of a potential include guard is the last thing in the file, 1010 // then we found an include guard. 1011 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1012 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1013 IncludeGuard = IG_Found; 1014 } 1015 1016 void UnwrappedLineParser::parsePPDefine() { 1017 nextToken(); 1018 1019 if (!FormatTok->Tok.getIdentifierInfo()) { 1020 IncludeGuard = IG_Rejected; 1021 IncludeGuardToken = nullptr; 1022 parsePPUnknown(); 1023 return; 1024 } 1025 1026 if (IncludeGuard == IG_IfNdefed && 1027 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1028 IncludeGuard = IG_Defined; 1029 IncludeGuardToken = nullptr; 1030 for (auto &Line : Lines) { 1031 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1032 IncludeGuard = IG_Rejected; 1033 break; 1034 } 1035 } 1036 } 1037 1038 nextToken(); 1039 if (FormatTok->Tok.getKind() == tok::l_paren && 1040 !FormatTok->hasWhitespaceBefore()) 1041 parseParens(); 1042 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1043 Line->Level += PPBranchLevel + 1; 1044 addUnwrappedLine(); 1045 ++Line->Level; 1046 1047 // Errors during a preprocessor directive can only affect the layout of the 1048 // preprocessor directive, and thus we ignore them. An alternative approach 1049 // would be to use the same approach we use on the file level (no 1050 // re-indentation if there was a structural error) within the macro 1051 // definition. 1052 parseFile(); 1053 } 1054 1055 void UnwrappedLineParser::parsePPUnknown() { 1056 do { 1057 nextToken(); 1058 } while (!eof()); 1059 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1060 Line->Level += PPBranchLevel + 1; 1061 addUnwrappedLine(); 1062 } 1063 1064 // Here we exclude certain tokens that are not usually the first token in an 1065 // unwrapped line. This is used in attempt to distinguish macro calls without 1066 // trailing semicolons from other constructs split to several lines. 1067 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1068 // Semicolon can be a null-statement, l_square can be a start of a macro or 1069 // a C++11 attribute, but this doesn't seem to be common. 1070 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1071 Tok.isNot(TT_AttributeSquare) && 1072 // Tokens that can only be used as binary operators and a part of 1073 // overloaded operator names. 1074 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1075 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1076 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1077 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1078 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1079 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1080 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1081 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1082 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1083 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1084 Tok.isNot(tok::lesslessequal) && 1085 // Colon is used in labels, base class lists, initializer lists, 1086 // range-based for loops, ternary operator, but should never be the 1087 // first token in an unwrapped line. 1088 Tok.isNot(tok::colon) && 1089 // 'noexcept' is a trailing annotation. 1090 Tok.isNot(tok::kw_noexcept); 1091 } 1092 1093 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1094 const FormatToken *FormatTok) { 1095 // FIXME: This returns true for C/C++ keywords like 'struct'. 1096 return FormatTok->is(tok::identifier) && 1097 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1098 !FormatTok->isOneOf( 1099 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1100 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1101 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1102 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1103 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1104 Keywords.kw_instanceof, Keywords.kw_interface, 1105 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1106 } 1107 1108 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1109 const FormatToken *FormatTok) { 1110 return FormatTok->Tok.isLiteral() || 1111 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1112 mustBeJSIdent(Keywords, FormatTok); 1113 } 1114 1115 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1116 // when encountered after a value (see mustBeJSIdentOrValue). 1117 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1118 const FormatToken *FormatTok) { 1119 return FormatTok->isOneOf( 1120 tok::kw_return, Keywords.kw_yield, 1121 // conditionals 1122 tok::kw_if, tok::kw_else, 1123 // loops 1124 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1125 // switch/case 1126 tok::kw_switch, tok::kw_case, 1127 // exceptions 1128 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1129 // declaration 1130 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1131 Keywords.kw_async, Keywords.kw_function, 1132 // import/export 1133 Keywords.kw_import, tok::kw_export); 1134 } 1135 1136 // Checks whether a token is a type in K&R C (aka C78). 1137 static bool isC78Type(const FormatToken &Tok) { 1138 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1139 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1140 tok::identifier); 1141 } 1142 1143 // This function checks whether a token starts the first parameter declaration 1144 // in a K&R C (aka C78) function definition, e.g.: 1145 // int f(a, b) 1146 // short a, b; 1147 // { 1148 // return a + b; 1149 // } 1150 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1151 const FormatToken *FuncName) { 1152 assert(Tok); 1153 assert(Next); 1154 assert(FuncName); 1155 1156 if (FuncName->isNot(tok::identifier)) 1157 return false; 1158 1159 const FormatToken *Prev = FuncName->Previous; 1160 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1161 return false; 1162 1163 if (!isC78Type(*Tok) && 1164 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1165 return false; 1166 1167 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1168 return false; 1169 1170 Tok = Tok->Previous; 1171 if (!Tok || Tok->isNot(tok::r_paren)) 1172 return false; 1173 1174 Tok = Tok->Previous; 1175 if (!Tok || Tok->isNot(tok::identifier)) 1176 return false; 1177 1178 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1179 } 1180 1181 void UnwrappedLineParser::parseModuleImport() { 1182 nextToken(); 1183 while (!eof()) { 1184 if (FormatTok->is(tok::colon)) { 1185 FormatTok->setType(TT_ModulePartitionColon); 1186 } 1187 // Handle import <foo/bar.h> as we would an include statement. 1188 else if (FormatTok->is(tok::less)) { 1189 nextToken(); 1190 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1191 // Mark tokens up to the trailing line comments as implicit string 1192 // literals. 1193 if (FormatTok->isNot(tok::comment) && 1194 !FormatTok->TokenText.startswith("//")) 1195 FormatTok->setType(TT_ImplicitStringLiteral); 1196 nextToken(); 1197 } 1198 } 1199 if (FormatTok->is(tok::semi)) { 1200 nextToken(); 1201 break; 1202 } 1203 nextToken(); 1204 } 1205 1206 addUnwrappedLine(); 1207 } 1208 1209 // readTokenWithJavaScriptASI reads the next token and terminates the current 1210 // line if JavaScript Automatic Semicolon Insertion must 1211 // happen between the current token and the next token. 1212 // 1213 // This method is conservative - it cannot cover all edge cases of JavaScript, 1214 // but only aims to correctly handle certain well known cases. It *must not* 1215 // return true in speculative cases. 1216 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1217 FormatToken *Previous = FormatTok; 1218 readToken(); 1219 FormatToken *Next = FormatTok; 1220 1221 bool IsOnSameLine = 1222 CommentsBeforeNextToken.empty() 1223 ? Next->NewlinesBefore == 0 1224 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1225 if (IsOnSameLine) 1226 return; 1227 1228 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1229 bool PreviousStartsTemplateExpr = 1230 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1231 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1232 // If the line contains an '@' sign, the previous token might be an 1233 // annotation, which can precede another identifier/value. 1234 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1235 return LineNode.Tok->is(tok::at); 1236 }); 1237 if (HasAt) 1238 return; 1239 } 1240 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1241 return addUnwrappedLine(); 1242 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1243 bool NextEndsTemplateExpr = 1244 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1245 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1246 (PreviousMustBeValue || 1247 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1248 tok::minusminus))) 1249 return addUnwrappedLine(); 1250 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1251 isJSDeclOrStmt(Keywords, Next)) 1252 return addUnwrappedLine(); 1253 } 1254 1255 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1256 bool IsTopLevel) { 1257 if (Style.Language == FormatStyle::LK_TableGen && 1258 FormatTok->is(tok::pp_include)) { 1259 nextToken(); 1260 if (FormatTok->is(tok::string_literal)) 1261 nextToken(); 1262 addUnwrappedLine(); 1263 return; 1264 } 1265 switch (FormatTok->Tok.getKind()) { 1266 case tok::kw_asm: 1267 nextToken(); 1268 if (FormatTok->is(tok::l_brace)) { 1269 FormatTok->setType(TT_InlineASMBrace); 1270 nextToken(); 1271 while (FormatTok && FormatTok->isNot(tok::eof)) { 1272 if (FormatTok->is(tok::r_brace)) { 1273 FormatTok->setType(TT_InlineASMBrace); 1274 nextToken(); 1275 addUnwrappedLine(); 1276 break; 1277 } 1278 FormatTok->Finalized = true; 1279 nextToken(); 1280 } 1281 } 1282 break; 1283 case tok::kw_namespace: 1284 parseNamespace(); 1285 return; 1286 case tok::kw_public: 1287 case tok::kw_protected: 1288 case tok::kw_private: 1289 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1290 Style.isCSharp()) 1291 nextToken(); 1292 else 1293 parseAccessSpecifier(); 1294 return; 1295 case tok::kw_if: 1296 if (Style.isJavaScript() && Line->MustBeDeclaration) 1297 // field/method declaration. 1298 break; 1299 parseIfThenElse(IfKind); 1300 return; 1301 case tok::kw_for: 1302 case tok::kw_while: 1303 if (Style.isJavaScript() && Line->MustBeDeclaration) 1304 // field/method declaration. 1305 break; 1306 parseForOrWhileLoop(); 1307 return; 1308 case tok::kw_do: 1309 if (Style.isJavaScript() && Line->MustBeDeclaration) 1310 // field/method declaration. 1311 break; 1312 parseDoWhile(); 1313 return; 1314 case tok::kw_switch: 1315 if (Style.isJavaScript() && Line->MustBeDeclaration) 1316 // 'switch: string' field declaration. 1317 break; 1318 parseSwitch(); 1319 return; 1320 case tok::kw_default: 1321 if (Style.isJavaScript() && Line->MustBeDeclaration) 1322 // 'default: string' field declaration. 1323 break; 1324 nextToken(); 1325 if (FormatTok->is(tok::colon)) { 1326 parseLabel(); 1327 return; 1328 } 1329 // e.g. "default void f() {}" in a Java interface. 1330 break; 1331 case tok::kw_case: 1332 if (Style.isJavaScript() && Line->MustBeDeclaration) 1333 // 'case: string' field declaration. 1334 break; 1335 parseCaseLabel(); 1336 return; 1337 case tok::kw_try: 1338 case tok::kw___try: 1339 if (Style.isJavaScript() && Line->MustBeDeclaration) 1340 // field/method declaration. 1341 break; 1342 parseTryCatch(); 1343 return; 1344 case tok::kw_extern: 1345 nextToken(); 1346 if (FormatTok->Tok.is(tok::string_literal)) { 1347 nextToken(); 1348 if (FormatTok->Tok.is(tok::l_brace)) { 1349 if (Style.BraceWrapping.AfterExternBlock) 1350 addUnwrappedLine(); 1351 // Either we indent or for backwards compatibility we follow the 1352 // AfterExternBlock style. 1353 unsigned AddLevels = 1354 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1355 (Style.BraceWrapping.AfterExternBlock && 1356 Style.IndentExternBlock == 1357 FormatStyle::IEBS_AfterExternBlock) 1358 ? 1u 1359 : 0u; 1360 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1361 addUnwrappedLine(); 1362 return; 1363 } 1364 } 1365 break; 1366 case tok::kw_export: 1367 if (Style.isJavaScript()) { 1368 parseJavaScriptEs6ImportExport(); 1369 return; 1370 } 1371 if (!Style.isCpp()) 1372 break; 1373 // Handle C++ "(inline|export) namespace". 1374 LLVM_FALLTHROUGH; 1375 case tok::kw_inline: 1376 nextToken(); 1377 if (FormatTok->Tok.is(tok::kw_namespace)) { 1378 parseNamespace(); 1379 return; 1380 } 1381 break; 1382 case tok::identifier: 1383 if (FormatTok->is(TT_ForEachMacro)) { 1384 parseForOrWhileLoop(); 1385 return; 1386 } 1387 if (FormatTok->is(TT_MacroBlockBegin)) { 1388 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1389 /*MunchSemi=*/false); 1390 return; 1391 } 1392 if (FormatTok->is(Keywords.kw_import)) { 1393 if (Style.isJavaScript()) { 1394 parseJavaScriptEs6ImportExport(); 1395 return; 1396 } 1397 if (Style.Language == FormatStyle::LK_Proto) { 1398 nextToken(); 1399 if (FormatTok->is(tok::kw_public)) 1400 nextToken(); 1401 if (!FormatTok->is(tok::string_literal)) 1402 return; 1403 nextToken(); 1404 if (FormatTok->is(tok::semi)) 1405 nextToken(); 1406 addUnwrappedLine(); 1407 return; 1408 } 1409 if (Style.isCpp()) { 1410 parseModuleImport(); 1411 return; 1412 } 1413 } 1414 if (Style.isCpp() && 1415 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1416 Keywords.kw_slots, Keywords.kw_qslots)) { 1417 nextToken(); 1418 if (FormatTok->is(tok::colon)) { 1419 nextToken(); 1420 addUnwrappedLine(); 1421 return; 1422 } 1423 } 1424 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1425 parseStatementMacro(); 1426 return; 1427 } 1428 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1429 parseNamespace(); 1430 return; 1431 } 1432 // In all other cases, parse the declaration. 1433 break; 1434 default: 1435 break; 1436 } 1437 do { 1438 const FormatToken *Previous = FormatTok->Previous; 1439 switch (FormatTok->Tok.getKind()) { 1440 case tok::at: 1441 nextToken(); 1442 if (FormatTok->Tok.is(tok::l_brace)) { 1443 nextToken(); 1444 parseBracedList(); 1445 break; 1446 } else if (Style.Language == FormatStyle::LK_Java && 1447 FormatTok->is(Keywords.kw_interface)) { 1448 nextToken(); 1449 break; 1450 } 1451 switch (FormatTok->Tok.getObjCKeywordID()) { 1452 case tok::objc_public: 1453 case tok::objc_protected: 1454 case tok::objc_package: 1455 case tok::objc_private: 1456 return parseAccessSpecifier(); 1457 case tok::objc_interface: 1458 case tok::objc_implementation: 1459 return parseObjCInterfaceOrImplementation(); 1460 case tok::objc_protocol: 1461 if (parseObjCProtocol()) 1462 return; 1463 break; 1464 case tok::objc_end: 1465 return; // Handled by the caller. 1466 case tok::objc_optional: 1467 case tok::objc_required: 1468 nextToken(); 1469 addUnwrappedLine(); 1470 return; 1471 case tok::objc_autoreleasepool: 1472 nextToken(); 1473 if (FormatTok->Tok.is(tok::l_brace)) { 1474 if (Style.BraceWrapping.AfterControlStatement == 1475 FormatStyle::BWACS_Always) 1476 addUnwrappedLine(); 1477 parseBlock(); 1478 } 1479 addUnwrappedLine(); 1480 return; 1481 case tok::objc_synchronized: 1482 nextToken(); 1483 if (FormatTok->Tok.is(tok::l_paren)) 1484 // Skip synchronization object 1485 parseParens(); 1486 if (FormatTok->Tok.is(tok::l_brace)) { 1487 if (Style.BraceWrapping.AfterControlStatement == 1488 FormatStyle::BWACS_Always) 1489 addUnwrappedLine(); 1490 parseBlock(); 1491 } 1492 addUnwrappedLine(); 1493 return; 1494 case tok::objc_try: 1495 // This branch isn't strictly necessary (the kw_try case below would 1496 // do this too after the tok::at is parsed above). But be explicit. 1497 parseTryCatch(); 1498 return; 1499 default: 1500 break; 1501 } 1502 break; 1503 case tok::kw_concept: 1504 parseConcept(); 1505 return; 1506 case tok::kw_requires: 1507 parseRequires(); 1508 return; 1509 case tok::kw_enum: 1510 // Ignore if this is part of "template <enum ...". 1511 if (Previous && Previous->is(tok::less)) { 1512 nextToken(); 1513 break; 1514 } 1515 1516 // parseEnum falls through and does not yet add an unwrapped line as an 1517 // enum definition can start a structural element. 1518 if (!parseEnum()) 1519 break; 1520 // This only applies for C++. 1521 if (!Style.isCpp()) { 1522 addUnwrappedLine(); 1523 return; 1524 } 1525 break; 1526 case tok::kw_typedef: 1527 nextToken(); 1528 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1529 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1530 Keywords.kw_CF_CLOSED_ENUM, 1531 Keywords.kw_NS_CLOSED_ENUM)) 1532 parseEnum(); 1533 break; 1534 case tok::kw_struct: 1535 case tok::kw_union: 1536 case tok::kw_class: 1537 if (parseStructLike()) 1538 return; 1539 break; 1540 case tok::period: 1541 nextToken(); 1542 // In Java, classes have an implicit static member "class". 1543 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1544 FormatTok->is(tok::kw_class)) 1545 nextToken(); 1546 if (Style.isJavaScript() && FormatTok && 1547 FormatTok->Tok.getIdentifierInfo()) 1548 // JavaScript only has pseudo keywords, all keywords are allowed to 1549 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1550 nextToken(); 1551 break; 1552 case tok::semi: 1553 nextToken(); 1554 addUnwrappedLine(); 1555 return; 1556 case tok::r_brace: 1557 addUnwrappedLine(); 1558 return; 1559 case tok::l_paren: { 1560 parseParens(); 1561 // Break the unwrapped line if a K&R C function definition has a parameter 1562 // declaration. 1563 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1564 break; 1565 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1566 addUnwrappedLine(); 1567 return; 1568 } 1569 break; 1570 } 1571 case tok::kw_operator: 1572 nextToken(); 1573 if (FormatTok->isBinaryOperator()) 1574 nextToken(); 1575 break; 1576 case tok::caret: 1577 nextToken(); 1578 if (FormatTok->Tok.isAnyIdentifier() || 1579 FormatTok->isSimpleTypeSpecifier()) 1580 nextToken(); 1581 if (FormatTok->is(tok::l_paren)) 1582 parseParens(); 1583 if (FormatTok->is(tok::l_brace)) 1584 parseChildBlock(); 1585 break; 1586 case tok::l_brace: 1587 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1588 // A block outside of parentheses must be the last part of a 1589 // structural element. 1590 // FIXME: Figure out cases where this is not true, and add projections 1591 // for them (the one we know is missing are lambdas). 1592 if (Style.Language == FormatStyle::LK_Java && 1593 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1594 // If necessary, we could set the type to something different than 1595 // TT_FunctionLBrace. 1596 if (Style.BraceWrapping.AfterControlStatement == 1597 FormatStyle::BWACS_Always) 1598 addUnwrappedLine(); 1599 } else if (Style.BraceWrapping.AfterFunction) { 1600 addUnwrappedLine(); 1601 } 1602 FormatTok->setType(TT_FunctionLBrace); 1603 parseBlock(); 1604 addUnwrappedLine(); 1605 return; 1606 } 1607 // Otherwise this was a braced init list, and the structural 1608 // element continues. 1609 break; 1610 case tok::kw_try: 1611 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1612 // field/method declaration. 1613 nextToken(); 1614 break; 1615 } 1616 // We arrive here when parsing function-try blocks. 1617 if (Style.BraceWrapping.AfterFunction) 1618 addUnwrappedLine(); 1619 parseTryCatch(); 1620 return; 1621 case tok::identifier: { 1622 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1623 Line->MustBeDeclaration) { 1624 addUnwrappedLine(); 1625 parseCSharpGenericTypeConstraint(); 1626 break; 1627 } 1628 if (FormatTok->is(TT_MacroBlockEnd)) { 1629 addUnwrappedLine(); 1630 return; 1631 } 1632 1633 // Function declarations (as opposed to function expressions) are parsed 1634 // on their own unwrapped line by continuing this loop. Function 1635 // expressions (functions that are not on their own line) must not create 1636 // a new unwrapped line, so they are special cased below. 1637 size_t TokenCount = Line->Tokens.size(); 1638 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1639 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1640 Keywords.kw_async)))) { 1641 tryToParseJSFunction(); 1642 break; 1643 } 1644 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1645 FormatTok->is(Keywords.kw_interface)) { 1646 if (Style.isJavaScript()) { 1647 // In JavaScript/TypeScript, "interface" can be used as a standalone 1648 // identifier, e.g. in `var interface = 1;`. If "interface" is 1649 // followed by another identifier, it is very like to be an actual 1650 // interface declaration. 1651 unsigned StoredPosition = Tokens->getPosition(); 1652 FormatToken *Next = Tokens->getNextToken(); 1653 FormatTok = Tokens->setPosition(StoredPosition); 1654 if (!mustBeJSIdent(Keywords, Next)) { 1655 nextToken(); 1656 break; 1657 } 1658 } 1659 parseRecord(); 1660 addUnwrappedLine(); 1661 return; 1662 } 1663 1664 if (FormatTok->is(Keywords.kw_interface)) { 1665 if (parseStructLike()) 1666 return; 1667 break; 1668 } 1669 1670 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1671 parseStatementMacro(); 1672 return; 1673 } 1674 1675 // See if the following token should start a new unwrapped line. 1676 StringRef Text = FormatTok->TokenText; 1677 1678 FormatToken *PreviousToken = FormatTok; 1679 nextToken(); 1680 1681 // JS doesn't have macros, and within classes colons indicate fields, not 1682 // labels. 1683 if (Style.isJavaScript()) 1684 break; 1685 1686 TokenCount = Line->Tokens.size(); 1687 if (TokenCount == 1 || 1688 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1689 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1690 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1691 parseLabel(!Style.IndentGotoLabels); 1692 return; 1693 } 1694 // Recognize function-like macro usages without trailing semicolon as 1695 // well as free-standing macros like Q_OBJECT. 1696 bool FunctionLike = FormatTok->is(tok::l_paren); 1697 if (FunctionLike) 1698 parseParens(); 1699 1700 bool FollowedByNewline = 1701 CommentsBeforeNextToken.empty() 1702 ? FormatTok->NewlinesBefore > 0 1703 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1704 1705 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1706 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1707 PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro); 1708 addUnwrappedLine(); 1709 return; 1710 } 1711 } 1712 break; 1713 } 1714 case tok::equal: 1715 if ((Style.isJavaScript() || Style.isCSharp()) && 1716 FormatTok->is(TT_FatArrow)) { 1717 tryToParseChildBlock(); 1718 break; 1719 } 1720 1721 nextToken(); 1722 if (FormatTok->Tok.is(tok::l_brace)) { 1723 // Block kind should probably be set to BK_BracedInit for any language. 1724 // C# needs this change to ensure that array initialisers and object 1725 // initialisers are indented the same way. 1726 if (Style.isCSharp()) 1727 FormatTok->setBlockKind(BK_BracedInit); 1728 nextToken(); 1729 parseBracedList(); 1730 } else if (Style.Language == FormatStyle::LK_Proto && 1731 FormatTok->Tok.is(tok::less)) { 1732 nextToken(); 1733 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1734 /*ClosingBraceKind=*/tok::greater); 1735 } 1736 break; 1737 case tok::l_square: 1738 parseSquare(); 1739 break; 1740 case tok::kw_new: 1741 parseNew(); 1742 break; 1743 default: 1744 nextToken(); 1745 break; 1746 } 1747 } while (!eof()); 1748 } 1749 1750 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1751 assert(FormatTok->is(tok::l_brace)); 1752 if (!Style.isCSharp()) 1753 return false; 1754 // See if it's a property accessor. 1755 if (FormatTok->Previous->isNot(tok::identifier)) 1756 return false; 1757 1758 // See if we are inside a property accessor. 1759 // 1760 // Record the current tokenPosition so that we can advance and 1761 // reset the current token. `Next` is not set yet so we need 1762 // another way to advance along the token stream. 1763 unsigned int StoredPosition = Tokens->getPosition(); 1764 FormatToken *Tok = Tokens->getNextToken(); 1765 1766 // A trivial property accessor is of the form: 1767 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } 1768 // Track these as they do not require line breaks to be introduced. 1769 bool HasGetOrSet = false; 1770 bool IsTrivialPropertyAccessor = true; 1771 while (!eof()) { 1772 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1773 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1774 Keywords.kw_set)) { 1775 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) 1776 HasGetOrSet = true; 1777 Tok = Tokens->getNextToken(); 1778 continue; 1779 } 1780 if (Tok->isNot(tok::r_brace)) 1781 IsTrivialPropertyAccessor = false; 1782 break; 1783 } 1784 1785 if (!HasGetOrSet) { 1786 Tokens->setPosition(StoredPosition); 1787 return false; 1788 } 1789 1790 // Try to parse the property accessor: 1791 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1792 Tokens->setPosition(StoredPosition); 1793 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1794 addUnwrappedLine(); 1795 nextToken(); 1796 do { 1797 switch (FormatTok->Tok.getKind()) { 1798 case tok::r_brace: 1799 nextToken(); 1800 if (FormatTok->is(tok::equal)) { 1801 while (!eof() && FormatTok->isNot(tok::semi)) 1802 nextToken(); 1803 nextToken(); 1804 } 1805 addUnwrappedLine(); 1806 return true; 1807 case tok::l_brace: 1808 ++Line->Level; 1809 parseBlock(/*MustBeDeclaration=*/true); 1810 addUnwrappedLine(); 1811 --Line->Level; 1812 break; 1813 case tok::equal: 1814 if (FormatTok->is(TT_FatArrow)) { 1815 ++Line->Level; 1816 do { 1817 nextToken(); 1818 } while (!eof() && FormatTok->isNot(tok::semi)); 1819 nextToken(); 1820 addUnwrappedLine(); 1821 --Line->Level; 1822 break; 1823 } 1824 nextToken(); 1825 break; 1826 default: 1827 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && 1828 !IsTrivialPropertyAccessor) { 1829 // Non-trivial get/set needs to be on its own line. 1830 addUnwrappedLine(); 1831 } 1832 nextToken(); 1833 } 1834 } while (!eof()); 1835 1836 // Unreachable for well-formed code (paired '{' and '}'). 1837 return true; 1838 } 1839 1840 bool UnwrappedLineParser::tryToParseLambda() { 1841 if (!Style.isCpp()) { 1842 nextToken(); 1843 return false; 1844 } 1845 assert(FormatTok->is(tok::l_square)); 1846 FormatToken &LSquare = *FormatTok; 1847 if (!tryToParseLambdaIntroducer()) 1848 return false; 1849 1850 bool SeenArrow = false; 1851 bool InTemplateParameterList = false; 1852 1853 while (FormatTok->isNot(tok::l_brace)) { 1854 if (FormatTok->isSimpleTypeSpecifier()) { 1855 nextToken(); 1856 continue; 1857 } 1858 switch (FormatTok->Tok.getKind()) { 1859 case tok::l_brace: 1860 break; 1861 case tok::l_paren: 1862 parseParens(); 1863 break; 1864 case tok::l_square: 1865 parseSquare(); 1866 break; 1867 case tok::kw_class: 1868 case tok::kw_template: 1869 case tok::kw_typename: 1870 assert(FormatTok->Previous); 1871 if (FormatTok->Previous->is(tok::less)) 1872 InTemplateParameterList = true; 1873 nextToken(); 1874 break; 1875 case tok::amp: 1876 case tok::star: 1877 case tok::kw_const: 1878 case tok::comma: 1879 case tok::less: 1880 case tok::greater: 1881 case tok::identifier: 1882 case tok::numeric_constant: 1883 case tok::coloncolon: 1884 case tok::kw_mutable: 1885 case tok::kw_noexcept: 1886 nextToken(); 1887 break; 1888 // Specialization of a template with an integer parameter can contain 1889 // arithmetic, logical, comparison and ternary operators. 1890 // 1891 // FIXME: This also accepts sequences of operators that are not in the scope 1892 // of a template argument list. 1893 // 1894 // In a C++ lambda a template type can only occur after an arrow. We use 1895 // this as an heuristic to distinguish between Objective-C expressions 1896 // followed by an `a->b` expression, such as: 1897 // ([obj func:arg] + a->b) 1898 // Otherwise the code below would parse as a lambda. 1899 // 1900 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1901 // explicit template lists: []<bool b = true && false>(U &&u){} 1902 case tok::plus: 1903 case tok::minus: 1904 case tok::exclaim: 1905 case tok::tilde: 1906 case tok::slash: 1907 case tok::percent: 1908 case tok::lessless: 1909 case tok::pipe: 1910 case tok::pipepipe: 1911 case tok::ampamp: 1912 case tok::caret: 1913 case tok::equalequal: 1914 case tok::exclaimequal: 1915 case tok::greaterequal: 1916 case tok::lessequal: 1917 case tok::question: 1918 case tok::colon: 1919 case tok::ellipsis: 1920 case tok::kw_true: 1921 case tok::kw_false: 1922 if (SeenArrow || InTemplateParameterList) { 1923 nextToken(); 1924 break; 1925 } 1926 return true; 1927 case tok::arrow: 1928 // This might or might not actually be a lambda arrow (this could be an 1929 // ObjC method invocation followed by a dereferencing arrow). We might 1930 // reset this back to TT_Unknown in TokenAnnotator. 1931 FormatTok->setType(TT_LambdaArrow); 1932 SeenArrow = true; 1933 nextToken(); 1934 break; 1935 default: 1936 return true; 1937 } 1938 } 1939 FormatTok->setType(TT_LambdaLBrace); 1940 LSquare.setType(TT_LambdaLSquare); 1941 parseChildBlock(); 1942 return true; 1943 } 1944 1945 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1946 const FormatToken *Previous = FormatTok->Previous; 1947 if (Previous && 1948 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1949 tok::kw_delete, tok::l_square) || 1950 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1951 Previous->isSimpleTypeSpecifier())) { 1952 nextToken(); 1953 return false; 1954 } 1955 nextToken(); 1956 if (FormatTok->is(tok::l_square)) 1957 return false; 1958 parseSquare(/*LambdaIntroducer=*/true); 1959 return true; 1960 } 1961 1962 void UnwrappedLineParser::tryToParseJSFunction() { 1963 assert(FormatTok->is(Keywords.kw_function) || 1964 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1965 if (FormatTok->is(Keywords.kw_async)) 1966 nextToken(); 1967 // Consume "function". 1968 nextToken(); 1969 1970 // Consume * (generator function). Treat it like C++'s overloaded operators. 1971 if (FormatTok->is(tok::star)) { 1972 FormatTok->setType(TT_OverloadedOperator); 1973 nextToken(); 1974 } 1975 1976 // Consume function name. 1977 if (FormatTok->is(tok::identifier)) 1978 nextToken(); 1979 1980 if (FormatTok->isNot(tok::l_paren)) 1981 return; 1982 1983 // Parse formal parameter list. 1984 parseParens(); 1985 1986 if (FormatTok->is(tok::colon)) { 1987 // Parse a type definition. 1988 nextToken(); 1989 1990 // Eat the type declaration. For braced inline object types, balance braces, 1991 // otherwise just parse until finding an l_brace for the function body. 1992 if (FormatTok->is(tok::l_brace)) 1993 tryToParseBracedList(); 1994 else 1995 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1996 nextToken(); 1997 } 1998 1999 if (FormatTok->is(tok::semi)) 2000 return; 2001 2002 parseChildBlock(); 2003 } 2004 2005 bool UnwrappedLineParser::tryToParseBracedList() { 2006 if (FormatTok->is(BK_Unknown)) 2007 calculateBraceTypes(); 2008 assert(FormatTok->isNot(BK_Unknown)); 2009 if (FormatTok->is(BK_Block)) 2010 return false; 2011 nextToken(); 2012 parseBracedList(); 2013 return true; 2014 } 2015 2016 bool UnwrappedLineParser::tryToParseChildBlock() { 2017 assert(Style.isJavaScript() || Style.isCSharp()); 2018 assert(FormatTok->is(TT_FatArrow)); 2019 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2020 // They always start an expression or a child block if followed by a curly 2021 // brace. 2022 nextToken(); 2023 if (FormatTok->isNot(tok::l_brace)) 2024 return false; 2025 parseChildBlock(); 2026 return true; 2027 } 2028 2029 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2030 bool IsEnum, 2031 tok::TokenKind ClosingBraceKind) { 2032 bool HasError = false; 2033 2034 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2035 // replace this by using parseAssignmentExpression() inside. 2036 do { 2037 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2038 tryToParseChildBlock()) 2039 continue; 2040 if (Style.isJavaScript()) { 2041 if (FormatTok->is(Keywords.kw_function) || 2042 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2043 tryToParseJSFunction(); 2044 continue; 2045 } 2046 if (FormatTok->is(tok::l_brace)) { 2047 // Could be a method inside of a braced list `{a() { return 1; }}`. 2048 if (tryToParseBracedList()) 2049 continue; 2050 parseChildBlock(); 2051 } 2052 } 2053 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2054 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2055 addUnwrappedLine(); 2056 nextToken(); 2057 return !HasError; 2058 } 2059 switch (FormatTok->Tok.getKind()) { 2060 case tok::l_square: 2061 if (Style.isCSharp()) 2062 parseSquare(); 2063 else 2064 tryToParseLambda(); 2065 break; 2066 case tok::l_paren: 2067 parseParens(); 2068 // JavaScript can just have free standing methods and getters/setters in 2069 // object literals. Detect them by a "{" following ")". 2070 if (Style.isJavaScript()) { 2071 if (FormatTok->is(tok::l_brace)) 2072 parseChildBlock(); 2073 break; 2074 } 2075 break; 2076 case tok::l_brace: 2077 // Assume there are no blocks inside a braced init list apart 2078 // from the ones we explicitly parse out (like lambdas). 2079 FormatTok->setBlockKind(BK_BracedInit); 2080 nextToken(); 2081 parseBracedList(); 2082 break; 2083 case tok::less: 2084 if (Style.Language == FormatStyle::LK_Proto) { 2085 nextToken(); 2086 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2087 /*ClosingBraceKind=*/tok::greater); 2088 } else { 2089 nextToken(); 2090 } 2091 break; 2092 case tok::semi: 2093 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2094 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2095 // used for error recovery if we have otherwise determined that this is 2096 // a braced list. 2097 if (Style.isJavaScript()) { 2098 nextToken(); 2099 break; 2100 } 2101 HasError = true; 2102 if (!ContinueOnSemicolons) 2103 return !HasError; 2104 nextToken(); 2105 break; 2106 case tok::comma: 2107 nextToken(); 2108 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2109 addUnwrappedLine(); 2110 break; 2111 default: 2112 nextToken(); 2113 break; 2114 } 2115 } while (!eof()); 2116 return false; 2117 } 2118 2119 void UnwrappedLineParser::parseParens() { 2120 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 2121 nextToken(); 2122 do { 2123 switch (FormatTok->Tok.getKind()) { 2124 case tok::l_paren: 2125 parseParens(); 2126 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2127 parseChildBlock(); 2128 break; 2129 case tok::r_paren: 2130 nextToken(); 2131 return; 2132 case tok::r_brace: 2133 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2134 return; 2135 case tok::l_square: 2136 tryToParseLambda(); 2137 break; 2138 case tok::l_brace: 2139 if (!tryToParseBracedList()) 2140 parseChildBlock(); 2141 break; 2142 case tok::at: 2143 nextToken(); 2144 if (FormatTok->Tok.is(tok::l_brace)) { 2145 nextToken(); 2146 parseBracedList(); 2147 } 2148 break; 2149 case tok::equal: 2150 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2151 tryToParseChildBlock(); 2152 else 2153 nextToken(); 2154 break; 2155 case tok::kw_class: 2156 if (Style.isJavaScript()) 2157 parseRecord(/*ParseAsExpr=*/true); 2158 else 2159 nextToken(); 2160 break; 2161 case tok::identifier: 2162 if (Style.isJavaScript() && 2163 (FormatTok->is(Keywords.kw_function) || 2164 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2165 tryToParseJSFunction(); 2166 else 2167 nextToken(); 2168 break; 2169 default: 2170 nextToken(); 2171 break; 2172 } 2173 } while (!eof()); 2174 } 2175 2176 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2177 if (!LambdaIntroducer) { 2178 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 2179 if (tryToParseLambda()) 2180 return; 2181 } 2182 do { 2183 switch (FormatTok->Tok.getKind()) { 2184 case tok::l_paren: 2185 parseParens(); 2186 break; 2187 case tok::r_square: 2188 nextToken(); 2189 return; 2190 case tok::r_brace: 2191 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2192 return; 2193 case tok::l_square: 2194 parseSquare(); 2195 break; 2196 case tok::l_brace: { 2197 if (!tryToParseBracedList()) 2198 parseChildBlock(); 2199 break; 2200 } 2201 case tok::at: 2202 nextToken(); 2203 if (FormatTok->Tok.is(tok::l_brace)) { 2204 nextToken(); 2205 parseBracedList(); 2206 } 2207 break; 2208 default: 2209 nextToken(); 2210 break; 2211 } 2212 } while (!eof()); 2213 } 2214 2215 void UnwrappedLineParser::keepAncestorBraces() { 2216 if (!Style.RemoveBracesLLVM) 2217 return; 2218 2219 const int MaxNestingLevels = 2; 2220 const int Size = NestedTooDeep.size(); 2221 if (Size >= MaxNestingLevels) 2222 NestedTooDeep[Size - MaxNestingLevels] = true; 2223 NestedTooDeep.push_back(false); 2224 } 2225 2226 static void markOptionalBraces(FormatToken *LeftBrace) { 2227 if (!LeftBrace) 2228 return; 2229 2230 assert(LeftBrace->is(tok::l_brace)); 2231 2232 FormatToken *RightBrace = LeftBrace->MatchingParen; 2233 if (!RightBrace) { 2234 assert(!LeftBrace->Optional); 2235 return; 2236 } 2237 2238 assert(RightBrace->is(tok::r_brace)); 2239 assert(RightBrace->MatchingParen == LeftBrace); 2240 assert(LeftBrace->Optional == RightBrace->Optional); 2241 2242 LeftBrace->Optional = true; 2243 RightBrace->Optional = true; 2244 } 2245 2246 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2247 bool KeepBraces) { 2248 auto HandleAttributes = [this]() { 2249 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2250 if (FormatTok->is(TT_AttributeMacro)) 2251 nextToken(); 2252 // Handle [[likely]] / [[unlikely]] attributes. 2253 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) 2254 parseSquare(); 2255 }; 2256 2257 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 2258 nextToken(); 2259 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 2260 nextToken(); 2261 if (FormatTok->Tok.is(tok::l_paren)) 2262 parseParens(); 2263 HandleAttributes(); 2264 2265 bool NeedsUnwrappedLine = false; 2266 keepAncestorBraces(); 2267 2268 FormatToken *IfLeftBrace = nullptr; 2269 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2270 2271 if (FormatTok->Tok.is(tok::l_brace)) { 2272 IfLeftBrace = FormatTok; 2273 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2274 IfBlockKind = parseBlock(); 2275 if (Style.BraceWrapping.BeforeElse) 2276 addUnwrappedLine(); 2277 else 2278 NeedsUnwrappedLine = true; 2279 } else { 2280 addUnwrappedLine(); 2281 ++Line->Level; 2282 parseStructuralElement(); 2283 --Line->Level; 2284 } 2285 2286 bool KeepIfBraces = false; 2287 if (Style.RemoveBracesLLVM) { 2288 assert(!NestedTooDeep.empty()); 2289 KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2290 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2291 IfBlockKind == IfStmtKind::IfElseIf; 2292 } 2293 2294 FormatToken *ElseLeftBrace = nullptr; 2295 IfStmtKind Kind = IfStmtKind::IfOnly; 2296 2297 if (FormatTok->Tok.is(tok::kw_else)) { 2298 if (Style.RemoveBracesLLVM) { 2299 NestedTooDeep.back() = false; 2300 Kind = IfStmtKind::IfElse; 2301 } 2302 nextToken(); 2303 HandleAttributes(); 2304 if (FormatTok->Tok.is(tok::l_brace)) { 2305 ElseLeftBrace = FormatTok; 2306 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2307 if (parseBlock() == IfStmtKind::IfOnly) 2308 Kind = IfStmtKind::IfElseIf; 2309 addUnwrappedLine(); 2310 } else if (FormatTok->Tok.is(tok::kw_if)) { 2311 FormatToken *Previous = Tokens->getPreviousToken(); 2312 const bool IsPrecededByComment = Previous && Previous->is(tok::comment); 2313 if (IsPrecededByComment) { 2314 addUnwrappedLine(); 2315 ++Line->Level; 2316 } 2317 bool TooDeep = true; 2318 if (Style.RemoveBracesLLVM) { 2319 Kind = IfStmtKind::IfElseIf; 2320 TooDeep = NestedTooDeep.pop_back_val(); 2321 } 2322 ElseLeftBrace = 2323 parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces); 2324 if (Style.RemoveBracesLLVM) 2325 NestedTooDeep.push_back(TooDeep); 2326 if (IsPrecededByComment) 2327 --Line->Level; 2328 } else { 2329 addUnwrappedLine(); 2330 ++Line->Level; 2331 parseStructuralElement(); 2332 if (FormatTok->is(tok::eof)) 2333 addUnwrappedLine(); 2334 --Line->Level; 2335 } 2336 } else { 2337 if (Style.RemoveBracesLLVM) 2338 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2339 if (NeedsUnwrappedLine) 2340 addUnwrappedLine(); 2341 } 2342 2343 if (!Style.RemoveBracesLLVM) 2344 return nullptr; 2345 2346 assert(!NestedTooDeep.empty()); 2347 const bool KeepElseBraces = 2348 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back(); 2349 2350 NestedTooDeep.pop_back(); 2351 2352 if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) { 2353 markOptionalBraces(IfLeftBrace); 2354 markOptionalBraces(ElseLeftBrace); 2355 } else if (IfLeftBrace) { 2356 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2357 if (IfRightBrace) { 2358 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2359 assert(!IfLeftBrace->Optional); 2360 assert(!IfRightBrace->Optional); 2361 IfLeftBrace->MatchingParen = nullptr; 2362 IfRightBrace->MatchingParen = nullptr; 2363 } 2364 } 2365 2366 if (IfKind) 2367 *IfKind = Kind; 2368 2369 return IfLeftBrace; 2370 } 2371 2372 void UnwrappedLineParser::parseTryCatch() { 2373 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2374 nextToken(); 2375 bool NeedsUnwrappedLine = false; 2376 if (FormatTok->is(tok::colon)) { 2377 // We are in a function try block, what comes is an initializer list. 2378 nextToken(); 2379 2380 // In case identifiers were removed by clang-tidy, what might follow is 2381 // multiple commas in sequence - before the first identifier. 2382 while (FormatTok->is(tok::comma)) 2383 nextToken(); 2384 2385 while (FormatTok->is(tok::identifier)) { 2386 nextToken(); 2387 if (FormatTok->is(tok::l_paren)) 2388 parseParens(); 2389 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2390 FormatTok->is(tok::l_brace)) { 2391 do { 2392 nextToken(); 2393 } while (!FormatTok->is(tok::r_brace)); 2394 nextToken(); 2395 } 2396 2397 // In case identifiers were removed by clang-tidy, what might follow is 2398 // multiple commas in sequence - after the first identifier. 2399 while (FormatTok->is(tok::comma)) 2400 nextToken(); 2401 } 2402 } 2403 // Parse try with resource. 2404 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2405 parseParens(); 2406 2407 keepAncestorBraces(); 2408 2409 if (FormatTok->is(tok::l_brace)) { 2410 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2411 parseBlock(); 2412 if (Style.BraceWrapping.BeforeCatch) 2413 addUnwrappedLine(); 2414 else 2415 NeedsUnwrappedLine = true; 2416 } else if (!FormatTok->is(tok::kw_catch)) { 2417 // The C++ standard requires a compound-statement after a try. 2418 // If there's none, we try to assume there's a structuralElement 2419 // and try to continue. 2420 addUnwrappedLine(); 2421 ++Line->Level; 2422 parseStructuralElement(); 2423 --Line->Level; 2424 } 2425 while (true) { 2426 if (FormatTok->is(tok::at)) 2427 nextToken(); 2428 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2429 tok::kw___finally) || 2430 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2431 FormatTok->is(Keywords.kw_finally)) || 2432 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 2433 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 2434 break; 2435 nextToken(); 2436 while (FormatTok->isNot(tok::l_brace)) { 2437 if (FormatTok->is(tok::l_paren)) { 2438 parseParens(); 2439 continue; 2440 } 2441 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2442 if (Style.RemoveBracesLLVM) 2443 NestedTooDeep.pop_back(); 2444 return; 2445 } 2446 nextToken(); 2447 } 2448 NeedsUnwrappedLine = false; 2449 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2450 parseBlock(); 2451 if (Style.BraceWrapping.BeforeCatch) 2452 addUnwrappedLine(); 2453 else 2454 NeedsUnwrappedLine = true; 2455 } 2456 2457 if (Style.RemoveBracesLLVM) 2458 NestedTooDeep.pop_back(); 2459 2460 if (NeedsUnwrappedLine) 2461 addUnwrappedLine(); 2462 } 2463 2464 void UnwrappedLineParser::parseNamespace() { 2465 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2466 "'namespace' expected"); 2467 2468 const FormatToken &InitialToken = *FormatTok; 2469 nextToken(); 2470 if (InitialToken.is(TT_NamespaceMacro)) { 2471 parseParens(); 2472 } else { 2473 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2474 tok::l_square, tok::period) || 2475 (Style.isCSharp() && FormatTok->is(tok::kw_union))) 2476 if (FormatTok->is(tok::l_square)) 2477 parseSquare(); 2478 else 2479 nextToken(); 2480 } 2481 if (FormatTok->Tok.is(tok::l_brace)) { 2482 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2483 addUnwrappedLine(); 2484 2485 unsigned AddLevels = 2486 Style.NamespaceIndentation == FormatStyle::NI_All || 2487 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2488 DeclarationScopeStack.size() > 1) 2489 ? 1u 2490 : 0u; 2491 bool ManageWhitesmithsBraces = 2492 AddLevels == 0u && 2493 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2494 2495 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2496 // the whole block. 2497 if (ManageWhitesmithsBraces) 2498 ++Line->Level; 2499 2500 parseBlock(/*MustBeDeclaration=*/true, AddLevels, 2501 /*MunchSemi=*/true, 2502 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); 2503 2504 // Munch the semicolon after a namespace. This is more common than one would 2505 // think. Putting the semicolon into its own line is very ugly. 2506 if (FormatTok->Tok.is(tok::semi)) 2507 nextToken(); 2508 2509 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2510 2511 if (ManageWhitesmithsBraces) 2512 --Line->Level; 2513 } 2514 // FIXME: Add error handling. 2515 } 2516 2517 void UnwrappedLineParser::parseNew() { 2518 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2519 nextToken(); 2520 2521 if (Style.isCSharp()) { 2522 do { 2523 if (FormatTok->is(tok::l_brace)) 2524 parseBracedList(); 2525 2526 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2527 return; 2528 2529 nextToken(); 2530 } while (!eof()); 2531 } 2532 2533 if (Style.Language != FormatStyle::LK_Java) 2534 return; 2535 2536 // In Java, we can parse everything up to the parens, which aren't optional. 2537 do { 2538 // There should not be a ;, { or } before the new's open paren. 2539 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2540 return; 2541 2542 // Consume the parens. 2543 if (FormatTok->is(tok::l_paren)) { 2544 parseParens(); 2545 2546 // If there is a class body of an anonymous class, consume that as child. 2547 if (FormatTok->is(tok::l_brace)) 2548 parseChildBlock(); 2549 return; 2550 } 2551 nextToken(); 2552 } while (!eof()); 2553 } 2554 2555 void UnwrappedLineParser::parseForOrWhileLoop() { 2556 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2557 "'for', 'while' or foreach macro expected"); 2558 nextToken(); 2559 // JS' for await ( ... 2560 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2561 nextToken(); 2562 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2563 nextToken(); 2564 if (FormatTok->Tok.is(tok::l_paren)) 2565 parseParens(); 2566 2567 keepAncestorBraces(); 2568 2569 if (FormatTok->Tok.is(tok::l_brace)) { 2570 FormatToken *LeftBrace = FormatTok; 2571 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2572 parseBlock(); 2573 if (Style.RemoveBracesLLVM) { 2574 assert(!NestedTooDeep.empty()); 2575 if (!NestedTooDeep.back()) 2576 markOptionalBraces(LeftBrace); 2577 } 2578 addUnwrappedLine(); 2579 } else { 2580 addUnwrappedLine(); 2581 ++Line->Level; 2582 parseStructuralElement(); 2583 --Line->Level; 2584 } 2585 2586 if (Style.RemoveBracesLLVM) 2587 NestedTooDeep.pop_back(); 2588 } 2589 2590 void UnwrappedLineParser::parseDoWhile() { 2591 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 2592 nextToken(); 2593 2594 keepAncestorBraces(); 2595 2596 if (FormatTok->Tok.is(tok::l_brace)) { 2597 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2598 parseBlock(); 2599 if (Style.BraceWrapping.BeforeWhile) 2600 addUnwrappedLine(); 2601 } else { 2602 addUnwrappedLine(); 2603 ++Line->Level; 2604 parseStructuralElement(); 2605 --Line->Level; 2606 } 2607 2608 if (Style.RemoveBracesLLVM) 2609 NestedTooDeep.pop_back(); 2610 2611 // FIXME: Add error handling. 2612 if (!FormatTok->Tok.is(tok::kw_while)) { 2613 addUnwrappedLine(); 2614 return; 2615 } 2616 2617 // If in Whitesmiths mode, the line with the while() needs to be indented 2618 // to the same level as the block. 2619 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2620 ++Line->Level; 2621 2622 nextToken(); 2623 parseStructuralElement(); 2624 } 2625 2626 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2627 nextToken(); 2628 unsigned OldLineLevel = Line->Level; 2629 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2630 --Line->Level; 2631 if (LeftAlignLabel) 2632 Line->Level = 0; 2633 2634 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2635 FormatTok->Tok.is(tok::l_brace)) { 2636 2637 CompoundStatementIndenter Indenter(this, Line->Level, 2638 Style.BraceWrapping.AfterCaseLabel, 2639 Style.BraceWrapping.IndentBraces); 2640 parseBlock(); 2641 if (FormatTok->Tok.is(tok::kw_break)) { 2642 if (Style.BraceWrapping.AfterControlStatement == 2643 FormatStyle::BWACS_Always) { 2644 addUnwrappedLine(); 2645 if (!Style.IndentCaseBlocks && 2646 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2647 ++Line->Level; 2648 } 2649 parseStructuralElement(); 2650 } 2651 addUnwrappedLine(); 2652 } else { 2653 if (FormatTok->is(tok::semi)) 2654 nextToken(); 2655 addUnwrappedLine(); 2656 } 2657 Line->Level = OldLineLevel; 2658 if (FormatTok->isNot(tok::l_brace)) { 2659 parseStructuralElement(); 2660 addUnwrappedLine(); 2661 } 2662 } 2663 2664 void UnwrappedLineParser::parseCaseLabel() { 2665 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2666 2667 // FIXME: fix handling of complex expressions here. 2668 do { 2669 nextToken(); 2670 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2671 parseLabel(); 2672 } 2673 2674 void UnwrappedLineParser::parseSwitch() { 2675 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2676 nextToken(); 2677 if (FormatTok->Tok.is(tok::l_paren)) 2678 parseParens(); 2679 2680 keepAncestorBraces(); 2681 2682 if (FormatTok->Tok.is(tok::l_brace)) { 2683 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2684 parseBlock(); 2685 addUnwrappedLine(); 2686 } else { 2687 addUnwrappedLine(); 2688 ++Line->Level; 2689 parseStructuralElement(); 2690 --Line->Level; 2691 } 2692 2693 if (Style.RemoveBracesLLVM) 2694 NestedTooDeep.pop_back(); 2695 } 2696 2697 void UnwrappedLineParser::parseAccessSpecifier() { 2698 FormatToken *AccessSpecifierCandidate = FormatTok; 2699 nextToken(); 2700 // Understand Qt's slots. 2701 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2702 nextToken(); 2703 // Otherwise, we don't know what it is, and we'd better keep the next token. 2704 if (FormatTok->Tok.is(tok::colon)) { 2705 nextToken(); 2706 addUnwrappedLine(); 2707 } else if (!FormatTok->Tok.is(tok::coloncolon) && 2708 !std::binary_search(COperatorsFollowingVar.begin(), 2709 COperatorsFollowingVar.end(), 2710 FormatTok->Tok.getKind())) { 2711 // Not a variable name nor namespace name. 2712 addUnwrappedLine(); 2713 } else if (AccessSpecifierCandidate) { 2714 // Consider the access specifier to be a C identifier. 2715 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 2716 } 2717 } 2718 2719 void UnwrappedLineParser::parseConcept() { 2720 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); 2721 nextToken(); 2722 if (!FormatTok->Tok.is(tok::identifier)) 2723 return; 2724 nextToken(); 2725 if (!FormatTok->Tok.is(tok::equal)) 2726 return; 2727 nextToken(); 2728 if (FormatTok->Tok.is(tok::kw_requires)) { 2729 nextToken(); 2730 parseRequiresExpression(Line->Level); 2731 } else { 2732 parseConstraintExpression(Line->Level); 2733 } 2734 } 2735 2736 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) { 2737 // requires (R range) 2738 if (FormatTok->Tok.is(tok::l_paren)) { 2739 parseParens(); 2740 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2741 addUnwrappedLine(); 2742 --Line->Level; 2743 } 2744 } 2745 2746 if (FormatTok->Tok.is(tok::l_brace)) { 2747 if (Style.BraceWrapping.AfterFunction) 2748 addUnwrappedLine(); 2749 FormatTok->setType(TT_FunctionLBrace); 2750 parseBlock(); 2751 addUnwrappedLine(); 2752 } else { 2753 parseConstraintExpression(OriginalLevel); 2754 } 2755 } 2756 2757 void UnwrappedLineParser::parseConstraintExpression( 2758 unsigned int OriginalLevel) { 2759 // requires Id<T> && Id<T> || Id<T> 2760 while ( 2761 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) { 2762 nextToken(); 2763 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less, 2764 tok::greater, tok::comma, tok::ellipsis)) { 2765 if (FormatTok->Tok.is(tok::less)) { 2766 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2767 /*ClosingBraceKind=*/tok::greater); 2768 continue; 2769 } 2770 nextToken(); 2771 } 2772 if (FormatTok->Tok.is(tok::kw_requires)) 2773 parseRequiresExpression(OriginalLevel); 2774 if (FormatTok->Tok.is(tok::less)) { 2775 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2776 /*ClosingBraceKind=*/tok::greater); 2777 } 2778 2779 if (FormatTok->Tok.is(tok::l_paren)) 2780 parseParens(); 2781 if (FormatTok->Tok.is(tok::l_brace)) { 2782 if (Style.BraceWrapping.AfterFunction) 2783 addUnwrappedLine(); 2784 FormatTok->setType(TT_FunctionLBrace); 2785 parseBlock(); 2786 } 2787 if (FormatTok->Tok.is(tok::semi)) { 2788 // Eat any trailing semi. 2789 nextToken(); 2790 addUnwrappedLine(); 2791 } 2792 if (FormatTok->Tok.is(tok::colon)) 2793 return; 2794 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { 2795 if (FormatTok->Previous && 2796 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires, 2797 tok::coloncolon)) 2798 addUnwrappedLine(); 2799 if (Style.IndentRequires && OriginalLevel != Line->Level) 2800 --Line->Level; 2801 break; 2802 } else { 2803 FormatTok->setType(TT_ConstraintJunctions); 2804 } 2805 2806 nextToken(); 2807 } 2808 } 2809 2810 void UnwrappedLineParser::parseRequires() { 2811 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); 2812 2813 unsigned OriginalLevel = Line->Level; 2814 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { 2815 addUnwrappedLine(); 2816 if (Style.IndentRequires) 2817 ++Line->Level; 2818 } 2819 nextToken(); 2820 2821 parseRequiresExpression(OriginalLevel); 2822 } 2823 2824 bool UnwrappedLineParser::parseEnum() { 2825 const FormatToken &InitialToken = *FormatTok; 2826 2827 // Won't be 'enum' for NS_ENUMs. 2828 if (FormatTok->Tok.is(tok::kw_enum)) 2829 nextToken(); 2830 2831 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2832 // declarations. An "enum" keyword followed by a colon would be a syntax 2833 // error and thus assume it is just an identifier. 2834 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 2835 return false; 2836 2837 // In protobuf, "enum" can be used as a field name. 2838 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2839 return false; 2840 2841 // Eat up enum class ... 2842 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2843 nextToken(); 2844 2845 while (FormatTok->Tok.getIdentifierInfo() || 2846 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2847 tok::greater, tok::comma, tok::question)) { 2848 nextToken(); 2849 // We can have macros or attributes in between 'enum' and the enum name. 2850 if (FormatTok->is(tok::l_paren)) 2851 parseParens(); 2852 if (FormatTok->is(tok::identifier)) { 2853 nextToken(); 2854 // If there are two identifiers in a row, this is likely an elaborate 2855 // return type. In Java, this can be "implements", etc. 2856 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2857 return false; 2858 } 2859 } 2860 2861 // Just a declaration or something is wrong. 2862 if (FormatTok->isNot(tok::l_brace)) 2863 return true; 2864 FormatTok->setType(TT_RecordLBrace); 2865 FormatTok->setBlockKind(BK_Block); 2866 2867 if (Style.Language == FormatStyle::LK_Java) { 2868 // Java enums are different. 2869 parseJavaEnumBody(); 2870 return true; 2871 } 2872 if (Style.Language == FormatStyle::LK_Proto) { 2873 parseBlock(/*MustBeDeclaration=*/true); 2874 return true; 2875 } 2876 2877 if (!Style.AllowShortEnumsOnASingleLine && 2878 ShouldBreakBeforeBrace(Style, InitialToken)) 2879 addUnwrappedLine(); 2880 // Parse enum body. 2881 nextToken(); 2882 if (!Style.AllowShortEnumsOnASingleLine) { 2883 addUnwrappedLine(); 2884 Line->Level += 1; 2885 } 2886 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 2887 /*IsEnum=*/true); 2888 if (!Style.AllowShortEnumsOnASingleLine) 2889 Line->Level -= 1; 2890 if (HasError) { 2891 if (FormatTok->is(tok::semi)) 2892 nextToken(); 2893 addUnwrappedLine(); 2894 } 2895 return true; 2896 2897 // There is no addUnwrappedLine() here so that we fall through to parsing a 2898 // structural element afterwards. Thus, in "enum A {} n, m;", 2899 // "} n, m;" will end up in one unwrapped line. 2900 } 2901 2902 bool UnwrappedLineParser::parseStructLike() { 2903 // parseRecord falls through and does not yet add an unwrapped line as a 2904 // record declaration or definition can start a structural element. 2905 parseRecord(); 2906 // This does not apply to Java, JavaScript and C#. 2907 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 2908 Style.isCSharp()) { 2909 if (FormatTok->is(tok::semi)) 2910 nextToken(); 2911 addUnwrappedLine(); 2912 return true; 2913 } 2914 return false; 2915 } 2916 2917 namespace { 2918 // A class used to set and restore the Token position when peeking 2919 // ahead in the token source. 2920 class ScopedTokenPosition { 2921 unsigned StoredPosition; 2922 FormatTokenSource *Tokens; 2923 2924 public: 2925 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 2926 assert(Tokens && "Tokens expected to not be null"); 2927 StoredPosition = Tokens->getPosition(); 2928 } 2929 2930 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 2931 }; 2932 } // namespace 2933 2934 // Look to see if we have [[ by looking ahead, if 2935 // its not then rewind to the original position. 2936 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 2937 ScopedTokenPosition AutoPosition(Tokens); 2938 FormatToken *Tok = Tokens->getNextToken(); 2939 // We already read the first [ check for the second. 2940 if (!Tok->is(tok::l_square)) 2941 return false; 2942 // Double check that the attribute is just something 2943 // fairly simple. 2944 while (Tok->isNot(tok::eof)) { 2945 if (Tok->is(tok::r_square)) 2946 break; 2947 Tok = Tokens->getNextToken(); 2948 } 2949 if (Tok->is(tok::eof)) 2950 return false; 2951 Tok = Tokens->getNextToken(); 2952 if (!Tok->is(tok::r_square)) 2953 return false; 2954 Tok = Tokens->getNextToken(); 2955 if (Tok->is(tok::semi)) 2956 return false; 2957 return true; 2958 } 2959 2960 void UnwrappedLineParser::parseJavaEnumBody() { 2961 // Determine whether the enum is simple, i.e. does not have a semicolon or 2962 // constants with class bodies. Simple enums can be formatted like braced 2963 // lists, contracted to a single line, etc. 2964 unsigned StoredPosition = Tokens->getPosition(); 2965 bool IsSimple = true; 2966 FormatToken *Tok = Tokens->getNextToken(); 2967 while (!Tok->is(tok::eof)) { 2968 if (Tok->is(tok::r_brace)) 2969 break; 2970 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2971 IsSimple = false; 2972 break; 2973 } 2974 // FIXME: This will also mark enums with braces in the arguments to enum 2975 // constants as "not simple". This is probably fine in practice, though. 2976 Tok = Tokens->getNextToken(); 2977 } 2978 FormatTok = Tokens->setPosition(StoredPosition); 2979 2980 if (IsSimple) { 2981 nextToken(); 2982 parseBracedList(); 2983 addUnwrappedLine(); 2984 return; 2985 } 2986 2987 // Parse the body of a more complex enum. 2988 // First add a line for everything up to the "{". 2989 nextToken(); 2990 addUnwrappedLine(); 2991 ++Line->Level; 2992 2993 // Parse the enum constants. 2994 while (FormatTok) { 2995 if (FormatTok->is(tok::l_brace)) { 2996 // Parse the constant's class body. 2997 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 2998 /*MunchSemi=*/false); 2999 } else if (FormatTok->is(tok::l_paren)) { 3000 parseParens(); 3001 } else if (FormatTok->is(tok::comma)) { 3002 nextToken(); 3003 addUnwrappedLine(); 3004 } else if (FormatTok->is(tok::semi)) { 3005 nextToken(); 3006 addUnwrappedLine(); 3007 break; 3008 } else if (FormatTok->is(tok::r_brace)) { 3009 addUnwrappedLine(); 3010 break; 3011 } else { 3012 nextToken(); 3013 } 3014 } 3015 3016 // Parse the class body after the enum's ";" if any. 3017 parseLevel(/*HasOpeningBrace=*/true); 3018 nextToken(); 3019 --Line->Level; 3020 addUnwrappedLine(); 3021 } 3022 3023 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3024 const FormatToken &InitialToken = *FormatTok; 3025 nextToken(); 3026 3027 // The actual identifier can be a nested name specifier, and in macros 3028 // it is often token-pasted. 3029 // An [[attribute]] can be before the identifier. 3030 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3031 tok::kw___attribute, tok::kw___declspec, 3032 tok::kw_alignas, tok::l_square, tok::r_square) || 3033 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3034 FormatTok->isOneOf(tok::period, tok::comma))) { 3035 if (Style.isJavaScript() && 3036 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3037 // JavaScript/TypeScript supports inline object types in 3038 // extends/implements positions: 3039 // class Foo implements {bar: number} { } 3040 nextToken(); 3041 if (FormatTok->is(tok::l_brace)) { 3042 tryToParseBracedList(); 3043 continue; 3044 } 3045 } 3046 bool IsNonMacroIdentifier = 3047 FormatTok->is(tok::identifier) && 3048 FormatTok->TokenText != FormatTok->TokenText.upper(); 3049 nextToken(); 3050 // We can have macros or attributes in between 'class' and the class name. 3051 if (!IsNonMacroIdentifier) { 3052 if (FormatTok->Tok.is(tok::l_paren)) { 3053 parseParens(); 3054 } else if (FormatTok->is(TT_AttributeSquare)) { 3055 parseSquare(); 3056 // Consume the closing TT_AttributeSquare. 3057 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3058 nextToken(); 3059 } 3060 } 3061 } 3062 3063 // Note that parsing away template declarations here leads to incorrectly 3064 // accepting function declarations as record declarations. 3065 // In general, we cannot solve this problem. Consider: 3066 // class A<int> B() {} 3067 // which can be a function definition or a class definition when B() is a 3068 // macro. If we find enough real-world cases where this is a problem, we 3069 // can parse for the 'template' keyword in the beginning of the statement, 3070 // and thus rule out the record production in case there is no template 3071 // (this would still leave us with an ambiguity between template function 3072 // and class declarations). 3073 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3074 while (!eof()) { 3075 if (FormatTok->is(tok::l_brace)) { 3076 calculateBraceTypes(/*ExpectClassBody=*/true); 3077 if (!tryToParseBracedList()) 3078 break; 3079 } 3080 if (FormatTok->is(tok::l_square)) { 3081 FormatToken *Previous = FormatTok->Previous; 3082 if (!Previous || Previous->isNot(tok::r_paren)) { 3083 // Don't try parsing a lambda if we had a closing parenthesis before, 3084 // it was probably a pointer to an array: int (*)[]. 3085 if (!tryToParseLambda()) 3086 break; 3087 } 3088 } 3089 if (FormatTok->Tok.is(tok::semi)) 3090 return; 3091 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3092 addUnwrappedLine(); 3093 nextToken(); 3094 parseCSharpGenericTypeConstraint(); 3095 break; 3096 } 3097 nextToken(); 3098 } 3099 } 3100 if (FormatTok->Tok.is(tok::l_brace)) { 3101 FormatTok->setType(TT_RecordLBrace); 3102 if (ParseAsExpr) { 3103 parseChildBlock(); 3104 } else { 3105 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3106 addUnwrappedLine(); 3107 3108 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3109 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3110 } 3111 } 3112 // There is no addUnwrappedLine() here so that we fall through to parsing a 3113 // structural element afterwards. Thus, in "class A {} n, m;", 3114 // "} n, m;" will end up in one unwrapped line. 3115 } 3116 3117 void UnwrappedLineParser::parseObjCMethod() { 3118 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 3119 "'(' or identifier expected."); 3120 do { 3121 if (FormatTok->Tok.is(tok::semi)) { 3122 nextToken(); 3123 addUnwrappedLine(); 3124 return; 3125 } else if (FormatTok->Tok.is(tok::l_brace)) { 3126 if (Style.BraceWrapping.AfterFunction) 3127 addUnwrappedLine(); 3128 parseBlock(); 3129 addUnwrappedLine(); 3130 return; 3131 } else { 3132 nextToken(); 3133 } 3134 } while (!eof()); 3135 } 3136 3137 void UnwrappedLineParser::parseObjCProtocolList() { 3138 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 3139 do { 3140 nextToken(); 3141 // Early exit in case someone forgot a close angle. 3142 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3143 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3144 return; 3145 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 3146 nextToken(); // Skip '>'. 3147 } 3148 3149 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3150 do { 3151 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 3152 nextToken(); 3153 addUnwrappedLine(); 3154 break; 3155 } 3156 if (FormatTok->is(tok::l_brace)) { 3157 parseBlock(); 3158 // In ObjC interfaces, nothing should be following the "}". 3159 addUnwrappedLine(); 3160 } else if (FormatTok->is(tok::r_brace)) { 3161 // Ignore stray "}". parseStructuralElement doesn't consume them. 3162 nextToken(); 3163 addUnwrappedLine(); 3164 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3165 nextToken(); 3166 parseObjCMethod(); 3167 } else { 3168 parseStructuralElement(); 3169 } 3170 } while (!eof()); 3171 } 3172 3173 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3174 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3175 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3176 nextToken(); 3177 nextToken(); // interface name 3178 3179 // @interface can be followed by a lightweight generic 3180 // specialization list, then either a base class or a category. 3181 if (FormatTok->Tok.is(tok::less)) 3182 parseObjCLightweightGenerics(); 3183 if (FormatTok->Tok.is(tok::colon)) { 3184 nextToken(); 3185 nextToken(); // base class name 3186 // The base class can also have lightweight generics applied to it. 3187 if (FormatTok->Tok.is(tok::less)) 3188 parseObjCLightweightGenerics(); 3189 } else if (FormatTok->Tok.is(tok::l_paren)) 3190 // Skip category, if present. 3191 parseParens(); 3192 3193 if (FormatTok->Tok.is(tok::less)) 3194 parseObjCProtocolList(); 3195 3196 if (FormatTok->Tok.is(tok::l_brace)) { 3197 if (Style.BraceWrapping.AfterObjCDeclaration) 3198 addUnwrappedLine(); 3199 parseBlock(/*MustBeDeclaration=*/true); 3200 } 3201 3202 // With instance variables, this puts '}' on its own line. Without instance 3203 // variables, this ends the @interface line. 3204 addUnwrappedLine(); 3205 3206 parseObjCUntilAtEnd(); 3207 } 3208 3209 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3210 assert(FormatTok->Tok.is(tok::less)); 3211 // Unlike protocol lists, generic parameterizations support 3212 // nested angles: 3213 // 3214 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3215 // NSObject <NSCopying, NSSecureCoding> 3216 // 3217 // so we need to count how many open angles we have left. 3218 unsigned NumOpenAngles = 1; 3219 do { 3220 nextToken(); 3221 // Early exit in case someone forgot a close angle. 3222 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3223 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3224 break; 3225 if (FormatTok->Tok.is(tok::less)) 3226 ++NumOpenAngles; 3227 else if (FormatTok->Tok.is(tok::greater)) { 3228 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3229 --NumOpenAngles; 3230 } 3231 } while (!eof() && NumOpenAngles != 0); 3232 nextToken(); // Skip '>'. 3233 } 3234 3235 // Returns true for the declaration/definition form of @protocol, 3236 // false for the expression form. 3237 bool UnwrappedLineParser::parseObjCProtocol() { 3238 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3239 nextToken(); 3240 3241 if (FormatTok->is(tok::l_paren)) 3242 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3243 return false; 3244 3245 // The definition/declaration form, 3246 // @protocol Foo 3247 // - (int)someMethod; 3248 // @end 3249 3250 nextToken(); // protocol name 3251 3252 if (FormatTok->Tok.is(tok::less)) 3253 parseObjCProtocolList(); 3254 3255 // Check for protocol declaration. 3256 if (FormatTok->Tok.is(tok::semi)) { 3257 nextToken(); 3258 addUnwrappedLine(); 3259 return true; 3260 } 3261 3262 addUnwrappedLine(); 3263 parseObjCUntilAtEnd(); 3264 return true; 3265 } 3266 3267 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3268 bool IsImport = FormatTok->is(Keywords.kw_import); 3269 assert(IsImport || FormatTok->is(tok::kw_export)); 3270 nextToken(); 3271 3272 // Consume the "default" in "export default class/function". 3273 if (FormatTok->is(tok::kw_default)) 3274 nextToken(); 3275 3276 // Consume "async function", "function" and "default function", so that these 3277 // get parsed as free-standing JS functions, i.e. do not require a trailing 3278 // semicolon. 3279 if (FormatTok->is(Keywords.kw_async)) 3280 nextToken(); 3281 if (FormatTok->is(Keywords.kw_function)) { 3282 nextToken(); 3283 return; 3284 } 3285 3286 // For imports, `export *`, `export {...}`, consume the rest of the line up 3287 // to the terminating `;`. For everything else, just return and continue 3288 // parsing the structural element, i.e. the declaration or expression for 3289 // `export default`. 3290 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3291 !FormatTok->isStringLiteral()) 3292 return; 3293 3294 while (!eof()) { 3295 if (FormatTok->is(tok::semi)) 3296 return; 3297 if (Line->Tokens.empty()) { 3298 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3299 // import statement should terminate. 3300 return; 3301 } 3302 if (FormatTok->is(tok::l_brace)) { 3303 FormatTok->setBlockKind(BK_Block); 3304 nextToken(); 3305 parseBracedList(); 3306 } else { 3307 nextToken(); 3308 } 3309 } 3310 } 3311 3312 void UnwrappedLineParser::parseStatementMacro() { 3313 nextToken(); 3314 if (FormatTok->is(tok::l_paren)) 3315 parseParens(); 3316 if (FormatTok->is(tok::semi)) 3317 nextToken(); 3318 addUnwrappedLine(); 3319 } 3320 3321 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3322 StringRef Prefix = "") { 3323 llvm::dbgs() << Prefix << "Line(" << Line.Level 3324 << ", FSC=" << Line.FirstStartColumn << ")" 3325 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3326 for (const auto &Node : Line.Tokens) { 3327 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3328 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3329 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3330 } 3331 for (const auto &Node : Line.Tokens) 3332 for (const auto &ChildNode : Node.Children) 3333 printDebugInfo(ChildNode, "\nChild: "); 3334 3335 llvm::dbgs() << "\n"; 3336 } 3337 3338 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3339 if (Line->Tokens.empty()) 3340 return; 3341 LLVM_DEBUG({ 3342 if (CurrentLines == &Lines) 3343 printDebugInfo(*Line); 3344 }); 3345 3346 // If this line closes a block when in Whitesmiths mode, remember that 3347 // information so that the level can be decreased after the line is added. 3348 // This has to happen after the addition of the line since the line itself 3349 // needs to be indented. 3350 bool ClosesWhitesmithsBlock = 3351 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3352 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3353 3354 CurrentLines->push_back(std::move(*Line)); 3355 Line->Tokens.clear(); 3356 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3357 Line->FirstStartColumn = 0; 3358 3359 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3360 --Line->Level; 3361 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3362 CurrentLines->append( 3363 std::make_move_iterator(PreprocessorDirectives.begin()), 3364 std::make_move_iterator(PreprocessorDirectives.end())); 3365 PreprocessorDirectives.clear(); 3366 } 3367 // Disconnect the current token from the last token on the previous line. 3368 FormatTok->Previous = nullptr; 3369 } 3370 3371 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 3372 3373 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3374 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3375 FormatTok.NewlinesBefore > 0; 3376 } 3377 3378 // Checks if \p FormatTok is a line comment that continues the line comment 3379 // section on \p Line. 3380 static bool 3381 continuesLineCommentSection(const FormatToken &FormatTok, 3382 const UnwrappedLine &Line, 3383 const llvm::Regex &CommentPragmasRegex) { 3384 if (Line.Tokens.empty()) 3385 return false; 3386 3387 StringRef IndentContent = FormatTok.TokenText; 3388 if (FormatTok.TokenText.startswith("//") || 3389 FormatTok.TokenText.startswith("/*")) 3390 IndentContent = FormatTok.TokenText.substr(2); 3391 if (CommentPragmasRegex.match(IndentContent)) 3392 return false; 3393 3394 // If Line starts with a line comment, then FormatTok continues the comment 3395 // section if its original column is greater or equal to the original start 3396 // column of the line. 3397 // 3398 // Define the min column token of a line as follows: if a line ends in '{' or 3399 // contains a '{' followed by a line comment, then the min column token is 3400 // that '{'. Otherwise, the min column token of the line is the first token of 3401 // the line. 3402 // 3403 // If Line starts with a token other than a line comment, then FormatTok 3404 // continues the comment section if its original column is greater than the 3405 // original start column of the min column token of the line. 3406 // 3407 // For example, the second line comment continues the first in these cases: 3408 // 3409 // // first line 3410 // // second line 3411 // 3412 // and: 3413 // 3414 // // first line 3415 // // second line 3416 // 3417 // and: 3418 // 3419 // int i; // first line 3420 // // second line 3421 // 3422 // and: 3423 // 3424 // do { // first line 3425 // // second line 3426 // int i; 3427 // } while (true); 3428 // 3429 // and: 3430 // 3431 // enum { 3432 // a, // first line 3433 // // second line 3434 // b 3435 // }; 3436 // 3437 // The second line comment doesn't continue the first in these cases: 3438 // 3439 // // first line 3440 // // second line 3441 // 3442 // and: 3443 // 3444 // int i; // first line 3445 // // second line 3446 // 3447 // and: 3448 // 3449 // do { // first line 3450 // // second line 3451 // int i; 3452 // } while (true); 3453 // 3454 // and: 3455 // 3456 // enum { 3457 // a, // first line 3458 // // second line 3459 // }; 3460 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3461 3462 // Scan for '{//'. If found, use the column of '{' as a min column for line 3463 // comment section continuation. 3464 const FormatToken *PreviousToken = nullptr; 3465 for (const UnwrappedLineNode &Node : Line.Tokens) { 3466 if (PreviousToken && PreviousToken->is(tok::l_brace) && 3467 isLineComment(*Node.Tok)) { 3468 MinColumnToken = PreviousToken; 3469 break; 3470 } 3471 PreviousToken = Node.Tok; 3472 3473 // Grab the last newline preceding a token in this unwrapped line. 3474 if (Node.Tok->NewlinesBefore > 0) 3475 MinColumnToken = Node.Tok; 3476 } 3477 if (PreviousToken && PreviousToken->is(tok::l_brace)) 3478 MinColumnToken = PreviousToken; 3479 3480 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 3481 MinColumnToken); 3482 } 3483 3484 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 3485 bool JustComments = Line->Tokens.empty(); 3486 for (FormatToken *Tok : CommentsBeforeNextToken) { 3487 // Line comments that belong to the same line comment section are put on the 3488 // same line since later we might want to reflow content between them. 3489 // Additional fine-grained breaking of line comment sections is controlled 3490 // by the class BreakableLineCommentSection in case it is desirable to keep 3491 // several line comment sections in the same unwrapped line. 3492 // 3493 // FIXME: Consider putting separate line comment sections as children to the 3494 // unwrapped line instead. 3495 Tok->ContinuesLineCommentSection = 3496 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 3497 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 3498 addUnwrappedLine(); 3499 pushToken(Tok); 3500 } 3501 if (NewlineBeforeNext && JustComments) 3502 addUnwrappedLine(); 3503 CommentsBeforeNextToken.clear(); 3504 } 3505 3506 void UnwrappedLineParser::nextToken(int LevelDifference) { 3507 if (eof()) 3508 return; 3509 flushComments(isOnNewLine(*FormatTok)); 3510 pushToken(FormatTok); 3511 FormatToken *Previous = FormatTok; 3512 if (!Style.isJavaScript()) 3513 readToken(LevelDifference); 3514 else 3515 readTokenWithJavaScriptASI(); 3516 FormatTok->Previous = Previous; 3517 } 3518 3519 void UnwrappedLineParser::distributeComments( 3520 const SmallVectorImpl<FormatToken *> &Comments, 3521 const FormatToken *NextTok) { 3522 // Whether or not a line comment token continues a line is controlled by 3523 // the method continuesLineCommentSection, with the following caveat: 3524 // 3525 // Define a trail of Comments to be a nonempty proper postfix of Comments such 3526 // that each comment line from the trail is aligned with the next token, if 3527 // the next token exists. If a trail exists, the beginning of the maximal 3528 // trail is marked as a start of a new comment section. 3529 // 3530 // For example in this code: 3531 // 3532 // int a; // line about a 3533 // // line 1 about b 3534 // // line 2 about b 3535 // int b; 3536 // 3537 // the two lines about b form a maximal trail, so there are two sections, the 3538 // first one consisting of the single comment "// line about a" and the 3539 // second one consisting of the next two comments. 3540 if (Comments.empty()) 3541 return; 3542 bool ShouldPushCommentsInCurrentLine = true; 3543 bool HasTrailAlignedWithNextToken = false; 3544 unsigned StartOfTrailAlignedWithNextToken = 0; 3545 if (NextTok) { 3546 // We are skipping the first element intentionally. 3547 for (unsigned i = Comments.size() - 1; i > 0; --i) { 3548 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 3549 HasTrailAlignedWithNextToken = true; 3550 StartOfTrailAlignedWithNextToken = i; 3551 } 3552 } 3553 } 3554 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 3555 FormatToken *FormatTok = Comments[i]; 3556 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 3557 FormatTok->ContinuesLineCommentSection = false; 3558 } else { 3559 FormatTok->ContinuesLineCommentSection = 3560 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 3561 } 3562 if (!FormatTok->ContinuesLineCommentSection && 3563 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) 3564 ShouldPushCommentsInCurrentLine = false; 3565 if (ShouldPushCommentsInCurrentLine) 3566 pushToken(FormatTok); 3567 else 3568 CommentsBeforeNextToken.push_back(FormatTok); 3569 } 3570 } 3571 3572 void UnwrappedLineParser::readToken(int LevelDifference) { 3573 SmallVector<FormatToken *, 1> Comments; 3574 do { 3575 FormatTok = Tokens->getNextToken(); 3576 assert(FormatTok); 3577 while (FormatTok->getType() == TT_ConflictStart || 3578 FormatTok->getType() == TT_ConflictEnd || 3579 FormatTok->getType() == TT_ConflictAlternative) { 3580 if (FormatTok->getType() == TT_ConflictStart) 3581 conditionalCompilationStart(/*Unreachable=*/false); 3582 else if (FormatTok->getType() == TT_ConflictAlternative) 3583 conditionalCompilationAlternative(); 3584 else if (FormatTok->getType() == TT_ConflictEnd) 3585 conditionalCompilationEnd(); 3586 FormatTok = Tokens->getNextToken(); 3587 FormatTok->MustBreakBefore = true; 3588 } 3589 3590 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 3591 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 3592 distributeComments(Comments, FormatTok); 3593 Comments.clear(); 3594 // If there is an unfinished unwrapped line, we flush the preprocessor 3595 // directives only after that unwrapped line was finished later. 3596 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 3597 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 3598 assert((LevelDifference >= 0 || 3599 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 3600 "LevelDifference makes Line->Level negative"); 3601 Line->Level += LevelDifference; 3602 // Comments stored before the preprocessor directive need to be output 3603 // before the preprocessor directive, at the same level as the 3604 // preprocessor directive, as we consider them to apply to the directive. 3605 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 3606 PPBranchLevel > 0) 3607 Line->Level += PPBranchLevel; 3608 flushComments(isOnNewLine(*FormatTok)); 3609 parsePPDirective(); 3610 } 3611 3612 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 3613 !Line->InPPDirective) 3614 continue; 3615 3616 if (!FormatTok->Tok.is(tok::comment)) { 3617 distributeComments(Comments, FormatTok); 3618 Comments.clear(); 3619 return; 3620 } 3621 3622 Comments.push_back(FormatTok); 3623 } while (!eof()); 3624 3625 distributeComments(Comments, nullptr); 3626 Comments.clear(); 3627 } 3628 3629 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 3630 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 3631 if (MustBreakBeforeNextToken) { 3632 Line->Tokens.back().Tok->MustBreakBefore = true; 3633 MustBreakBeforeNextToken = false; 3634 } 3635 } 3636 3637 } // end namespace format 3638 } // end namespace clang 3639