1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "TokenAnnotator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-parser" 25 26 namespace clang { 27 namespace format { 28 29 class FormatTokenSource { 30 public: 31 virtual ~FormatTokenSource() {} 32 33 // Returns the next token in the token stream. 34 virtual FormatToken *getNextToken() = 0; 35 36 // Returns the token preceding the token returned by the last call to 37 // getNextToken() in the token stream, or nullptr if no such token exists. 38 virtual FormatToken *getPreviousToken() = 0; 39 40 // Returns the token that would be returned by the next call to 41 // getNextToken(). 42 virtual FormatToken *peekNextToken() = 0; 43 44 // Returns whether we are at the end of the file. 45 // This can be different from whether getNextToken() returned an eof token 46 // when the FormatTokenSource is a view on a part of the token stream. 47 virtual bool isEOF() = 0; 48 49 // Gets the current position in the token stream, to be used by setPosition(). 50 virtual unsigned getPosition() = 0; 51 52 // Resets the token stream to the state it was in when getPosition() returned 53 // Position, and return the token at that position in the stream. 54 virtual FormatToken *setPosition(unsigned Position) = 0; 55 }; 56 57 namespace { 58 59 class ScopedDeclarationState { 60 public: 61 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 62 bool MustBeDeclaration) 63 : Line(Line), Stack(Stack) { 64 Line.MustBeDeclaration = MustBeDeclaration; 65 Stack.push_back(MustBeDeclaration); 66 } 67 ~ScopedDeclarationState() { 68 Stack.pop_back(); 69 if (!Stack.empty()) 70 Line.MustBeDeclaration = Stack.back(); 71 else 72 Line.MustBeDeclaration = true; 73 } 74 75 private: 76 UnwrappedLine &Line; 77 std::vector<bool> &Stack; 78 }; 79 80 static bool isLineComment(const FormatToken &FormatTok) { 81 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 82 } 83 84 // Checks if \p FormatTok is a line comment that continues the line comment 85 // \p Previous. The original column of \p MinColumnToken is used to determine 86 // whether \p FormatTok is indented enough to the right to continue \p Previous. 87 static bool continuesLineComment(const FormatToken &FormatTok, 88 const FormatToken *Previous, 89 const FormatToken *MinColumnToken) { 90 if (!Previous || !MinColumnToken) 91 return false; 92 unsigned MinContinueColumn = 93 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 94 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 95 isLineComment(*Previous) && 96 FormatTok.OriginalColumn >= MinContinueColumn; 97 } 98 99 class ScopedMacroState : public FormatTokenSource { 100 public: 101 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 102 FormatToken *&ResetToken) 103 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 104 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 105 Token(nullptr), PreviousToken(nullptr) { 106 FakeEOF.Tok.startToken(); 107 FakeEOF.Tok.setKind(tok::eof); 108 TokenSource = this; 109 Line.Level = 0; 110 Line.InPPDirective = true; 111 } 112 113 ~ScopedMacroState() override { 114 TokenSource = PreviousTokenSource; 115 ResetToken = Token; 116 Line.InPPDirective = false; 117 Line.Level = PreviousLineLevel; 118 } 119 120 FormatToken *getNextToken() override { 121 // The \c UnwrappedLineParser guards against this by never calling 122 // \c getNextToken() after it has encountered the first eof token. 123 assert(!eof()); 124 PreviousToken = Token; 125 Token = PreviousTokenSource->getNextToken(); 126 if (eof()) 127 return &FakeEOF; 128 return Token; 129 } 130 131 FormatToken *getPreviousToken() override { 132 return PreviousTokenSource->getPreviousToken(); 133 } 134 135 FormatToken *peekNextToken() override { 136 if (eof()) 137 return &FakeEOF; 138 return PreviousTokenSource->peekNextToken(); 139 } 140 141 bool isEOF() override { return PreviousTokenSource->isEOF(); } 142 143 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 144 145 FormatToken *setPosition(unsigned Position) override { 146 PreviousToken = nullptr; 147 Token = PreviousTokenSource->setPosition(Position); 148 return Token; 149 } 150 151 private: 152 bool eof() { 153 return Token && Token->HasUnescapedNewline && 154 !continuesLineComment(*Token, PreviousToken, 155 /*MinColumnToken=*/PreviousToken); 156 } 157 158 FormatToken FakeEOF; 159 UnwrappedLine &Line; 160 FormatTokenSource *&TokenSource; 161 FormatToken *&ResetToken; 162 unsigned PreviousLineLevel; 163 FormatTokenSource *PreviousTokenSource; 164 165 FormatToken *Token; 166 FormatToken *PreviousToken; 167 }; 168 169 } // end anonymous namespace 170 171 class ScopedLineState { 172 public: 173 ScopedLineState(UnwrappedLineParser &Parser, 174 bool SwitchToPreprocessorLines = false) 175 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 176 if (SwitchToPreprocessorLines) 177 Parser.CurrentLines = &Parser.PreprocessorDirectives; 178 else if (!Parser.Line->Tokens.empty()) 179 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 180 PreBlockLine = std::move(Parser.Line); 181 Parser.Line = std::make_unique<UnwrappedLine>(); 182 Parser.Line->Level = PreBlockLine->Level; 183 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 184 } 185 186 ~ScopedLineState() { 187 if (!Parser.Line->Tokens.empty()) { 188 Parser.addUnwrappedLine(); 189 } 190 assert(Parser.Line->Tokens.empty()); 191 Parser.Line = std::move(PreBlockLine); 192 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 193 Parser.MustBreakBeforeNextToken = true; 194 Parser.CurrentLines = OriginalLines; 195 } 196 197 private: 198 UnwrappedLineParser &Parser; 199 200 std::unique_ptr<UnwrappedLine> PreBlockLine; 201 SmallVectorImpl<UnwrappedLine> *OriginalLines; 202 }; 203 204 class CompoundStatementIndenter { 205 public: 206 CompoundStatementIndenter(UnwrappedLineParser *Parser, 207 const FormatStyle &Style, unsigned &LineLevel) 208 : CompoundStatementIndenter(Parser, LineLevel, 209 Style.BraceWrapping.AfterControlStatement, 210 Style.BraceWrapping.IndentBraces) {} 211 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 212 bool WrapBrace, bool IndentBrace) 213 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 214 if (WrapBrace) 215 Parser->addUnwrappedLine(); 216 if (IndentBrace) 217 ++LineLevel; 218 } 219 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 220 221 private: 222 unsigned &LineLevel; 223 unsigned OldLineLevel; 224 }; 225 226 namespace { 227 228 class IndexedTokenSource : public FormatTokenSource { 229 public: 230 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 231 : Tokens(Tokens), Position(-1) {} 232 233 FormatToken *getNextToken() override { 234 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 235 LLVM_DEBUG({ 236 llvm::dbgs() << "Next "; 237 dbgToken(Position); 238 }); 239 return Tokens[Position]; 240 } 241 ++Position; 242 LLVM_DEBUG({ 243 llvm::dbgs() << "Next "; 244 dbgToken(Position); 245 }); 246 return Tokens[Position]; 247 } 248 249 FormatToken *getPreviousToken() override { 250 return Position > 0 ? Tokens[Position - 1] : nullptr; 251 } 252 253 FormatToken *peekNextToken() override { 254 int Next = Position + 1; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Peeking "; 257 dbgToken(Next); 258 }); 259 return Tokens[Next]; 260 } 261 262 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 263 264 unsigned getPosition() override { 265 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 266 assert(Position >= 0); 267 return Position; 268 } 269 270 FormatToken *setPosition(unsigned P) override { 271 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 272 Position = P; 273 return Tokens[Position]; 274 } 275 276 void reset() { Position = -1; } 277 278 private: 279 void dbgToken(int Position, llvm::StringRef Indent = "") { 280 FormatToken *Tok = Tokens[Position]; 281 llvm::dbgs() << Indent << "[" << Position 282 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 283 << ", Macro: " << !!Tok->MacroCtx << "\n"; 284 } 285 286 ArrayRef<FormatToken *> Tokens; 287 int Position; 288 }; 289 290 } // end anonymous namespace 291 292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 293 const AdditionalKeywords &Keywords, 294 unsigned FirstStartColumn, 295 ArrayRef<FormatToken *> Tokens, 296 UnwrappedLineConsumer &Callback) 297 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 298 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 299 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 300 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 301 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 302 ? IG_Rejected 303 : IG_Inited), 304 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 305 306 void UnwrappedLineParser::reset() { 307 PPBranchLevel = -1; 308 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 309 ? IG_Rejected 310 : IG_Inited; 311 IncludeGuardToken = nullptr; 312 Line.reset(new UnwrappedLine); 313 CommentsBeforeNextToken.clear(); 314 FormatTok = nullptr; 315 MustBreakBeforeNextToken = false; 316 PreprocessorDirectives.clear(); 317 CurrentLines = &Lines; 318 DeclarationScopeStack.clear(); 319 NestedTooDeep.clear(); 320 PPStack.clear(); 321 Line->FirstStartColumn = FirstStartColumn; 322 } 323 324 void UnwrappedLineParser::parse() { 325 IndexedTokenSource TokenSource(AllTokens); 326 Line->FirstStartColumn = FirstStartColumn; 327 do { 328 LLVM_DEBUG(llvm::dbgs() << "----\n"); 329 reset(); 330 Tokens = &TokenSource; 331 TokenSource.reset(); 332 333 readToken(); 334 parseFile(); 335 336 // If we found an include guard then all preprocessor directives (other than 337 // the guard) are over-indented by one. 338 if (IncludeGuard == IG_Found) 339 for (auto &Line : Lines) 340 if (Line.InPPDirective && Line.Level > 0) 341 --Line.Level; 342 343 // Create line with eof token. 344 pushToken(FormatTok); 345 addUnwrappedLine(); 346 347 for (const UnwrappedLine &Line : Lines) 348 Callback.consumeUnwrappedLine(Line); 349 350 Callback.finishRun(); 351 Lines.clear(); 352 while (!PPLevelBranchIndex.empty() && 353 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 354 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 355 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 356 } 357 if (!PPLevelBranchIndex.empty()) { 358 ++PPLevelBranchIndex.back(); 359 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 360 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 361 } 362 } while (!PPLevelBranchIndex.empty()); 363 } 364 365 void UnwrappedLineParser::parseFile() { 366 // The top-level context in a file always has declarations, except for pre- 367 // processor directives and JavaScript files. 368 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 369 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 370 MustBeDeclaration); 371 if (Style.Language == FormatStyle::LK_TextProto) 372 parseBracedList(); 373 else 374 parseLevel(/*HasOpeningBrace=*/false); 375 // Make sure to format the remaining tokens. 376 // 377 // LK_TextProto is special since its top-level is parsed as the body of a 378 // braced list, which does not necessarily have natural line separators such 379 // as a semicolon. Comments after the last entry that have been determined to 380 // not belong to that line, as in: 381 // key: value 382 // // endfile comment 383 // do not have a chance to be put on a line of their own until this point. 384 // Here we add this newline before end-of-file comments. 385 if (Style.Language == FormatStyle::LK_TextProto && 386 !CommentsBeforeNextToken.empty()) 387 addUnwrappedLine(); 388 flushComments(true); 389 addUnwrappedLine(); 390 } 391 392 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 393 do { 394 switch (FormatTok->Tok.getKind()) { 395 case tok::l_brace: 396 return; 397 default: 398 if (FormatTok->is(Keywords.kw_where)) { 399 addUnwrappedLine(); 400 nextToken(); 401 parseCSharpGenericTypeConstraint(); 402 break; 403 } 404 nextToken(); 405 break; 406 } 407 } while (!eof()); 408 } 409 410 void UnwrappedLineParser::parseCSharpAttribute() { 411 int UnpairedSquareBrackets = 1; 412 do { 413 switch (FormatTok->Tok.getKind()) { 414 case tok::r_square: 415 nextToken(); 416 --UnpairedSquareBrackets; 417 if (UnpairedSquareBrackets == 0) { 418 addUnwrappedLine(); 419 return; 420 } 421 break; 422 case tok::l_square: 423 ++UnpairedSquareBrackets; 424 nextToken(); 425 break; 426 default: 427 nextToken(); 428 break; 429 } 430 } while (!eof()); 431 } 432 433 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 434 if (!Lines.empty() && Lines.back().InPPDirective) 435 return true; 436 437 const FormatToken *Previous = Tokens->getPreviousToken(); 438 return Previous && Previous->is(tok::comment) && 439 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 440 } 441 442 bool UnwrappedLineParser::mightFitOnOneLine() const { 443 const auto ColumnLimit = Style.ColumnLimit; 444 if (ColumnLimit == 0) 445 return true; 446 447 if (Lines.empty()) 448 return true; 449 450 const auto &PreviousLine = Lines.back(); 451 const auto &Tokens = PreviousLine.Tokens; 452 assert(!Tokens.empty()); 453 const auto *LastToken = Tokens.back().Tok; 454 assert(LastToken); 455 if (!LastToken->isOneOf(tok::semi, tok::comment)) 456 return true; 457 458 AnnotatedLine Line(PreviousLine); 459 assert(Line.Last == LastToken); 460 461 TokenAnnotator Annotator(Style, Keywords); 462 Annotator.annotate(Line); 463 Annotator.calculateFormattingInformation(Line); 464 465 return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit; 466 } 467 468 // Returns true if a simple block, or false otherwise. (A simple block has a 469 // single statement that fits on a single line.) 470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) { 471 const bool IsPrecededByCommentOrPPDirective = 472 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 473 unsigned StatementCount = 0; 474 bool SwitchLabelEncountered = false; 475 do { 476 tok::TokenKind kind = FormatTok->Tok.getKind(); 477 if (FormatTok->getType() == TT_MacroBlockBegin) { 478 kind = tok::l_brace; 479 } else if (FormatTok->getType() == TT_MacroBlockEnd) { 480 kind = tok::r_brace; 481 } 482 483 switch (kind) { 484 case tok::comment: 485 nextToken(); 486 addUnwrappedLine(); 487 break; 488 case tok::l_brace: 489 // FIXME: Add parameter whether this can happen - if this happens, we must 490 // be in a non-declaration context. 491 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 492 continue; 493 parseBlock(); 494 ++StatementCount; 495 assert(StatementCount > 0 && "StatementCount overflow!"); 496 addUnwrappedLine(); 497 break; 498 case tok::r_brace: 499 if (HasOpeningBrace) { 500 if (!Style.RemoveBracesLLVM) 501 return false; 502 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || 503 IsPrecededByCommentOrPPDirective || 504 precededByCommentOrPPDirective()) { 505 return false; 506 } 507 const FormatToken *Next = Tokens->peekNextToken(); 508 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 509 return false; 510 return mightFitOnOneLine(); 511 } 512 nextToken(); 513 addUnwrappedLine(); 514 break; 515 case tok::kw_default: { 516 unsigned StoredPosition = Tokens->getPosition(); 517 FormatToken *Next; 518 do { 519 Next = Tokens->getNextToken(); 520 } while (Next->is(tok::comment)); 521 FormatTok = Tokens->setPosition(StoredPosition); 522 if (Next && Next->isNot(tok::colon)) { 523 // default not followed by ':' is not a case label; treat it like 524 // an identifier. 525 parseStructuralElement(); 526 break; 527 } 528 // Else, if it is 'default:', fall through to the case handling. 529 LLVM_FALLTHROUGH; 530 } 531 case tok::kw_case: 532 if (Style.isJavaScript() && Line->MustBeDeclaration) { 533 // A 'case: string' style field declaration. 534 parseStructuralElement(); 535 break; 536 } 537 if (!SwitchLabelEncountered && 538 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 539 ++Line->Level; 540 SwitchLabelEncountered = true; 541 parseStructuralElement(); 542 break; 543 case tok::l_square: 544 if (Style.isCSharp()) { 545 nextToken(); 546 parseCSharpAttribute(); 547 break; 548 } 549 LLVM_FALLTHROUGH; 550 default: 551 parseStructuralElement(IfKind, !HasOpeningBrace); 552 ++StatementCount; 553 assert(StatementCount > 0 && "StatementCount overflow!"); 554 break; 555 } 556 } while (!eof()); 557 return false; 558 } 559 560 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 561 // We'll parse forward through the tokens until we hit 562 // a closing brace or eof - note that getNextToken() will 563 // parse macros, so this will magically work inside macro 564 // definitions, too. 565 unsigned StoredPosition = Tokens->getPosition(); 566 FormatToken *Tok = FormatTok; 567 const FormatToken *PrevTok = Tok->Previous; 568 // Keep a stack of positions of lbrace tokens. We will 569 // update information about whether an lbrace starts a 570 // braced init list or a different block during the loop. 571 SmallVector<FormatToken *, 8> LBraceStack; 572 assert(Tok->Tok.is(tok::l_brace)); 573 do { 574 // Get next non-comment token. 575 FormatToken *NextTok; 576 unsigned ReadTokens = 0; 577 do { 578 NextTok = Tokens->getNextToken(); 579 ++ReadTokens; 580 } while (NextTok->is(tok::comment)); 581 582 switch (Tok->Tok.getKind()) { 583 case tok::l_brace: 584 if (Style.isJavaScript() && PrevTok) { 585 if (PrevTok->isOneOf(tok::colon, tok::less)) 586 // A ':' indicates this code is in a type, or a braced list 587 // following a label in an object literal ({a: {b: 1}}). 588 // A '<' could be an object used in a comparison, but that is nonsense 589 // code (can never return true), so more likely it is a generic type 590 // argument (`X<{a: string; b: number}>`). 591 // The code below could be confused by semicolons between the 592 // individual members in a type member list, which would normally 593 // trigger BK_Block. In both cases, this must be parsed as an inline 594 // braced init. 595 Tok->setBlockKind(BK_BracedInit); 596 else if (PrevTok->is(tok::r_paren)) 597 // `) { }` can only occur in function or method declarations in JS. 598 Tok->setBlockKind(BK_Block); 599 } else { 600 Tok->setBlockKind(BK_Unknown); 601 } 602 LBraceStack.push_back(Tok); 603 break; 604 case tok::r_brace: 605 if (LBraceStack.empty()) 606 break; 607 if (LBraceStack.back()->is(BK_Unknown)) { 608 bool ProbablyBracedList = false; 609 if (Style.Language == FormatStyle::LK_Proto) { 610 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 611 } else { 612 // Skip NextTok over preprocessor lines, otherwise we may not 613 // properly diagnose the block as a braced intializer 614 // if the comma separator appears after the pp directive. 615 while (NextTok->is(tok::hash)) { 616 ScopedMacroState MacroState(*Line, Tokens, NextTok); 617 do { 618 NextTok = Tokens->getNextToken(); 619 ++ReadTokens; 620 } while (NextTok->isNot(tok::eof)); 621 } 622 623 // Using OriginalColumn to distinguish between ObjC methods and 624 // binary operators is a bit hacky. 625 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 626 NextTok->OriginalColumn == 0; 627 628 // If there is a comma, semicolon or right paren after the closing 629 // brace, we assume this is a braced initializer list. Note that 630 // regardless how we mark inner braces here, we will overwrite the 631 // BlockKind later if we parse a braced list (where all blocks 632 // inside are by default braced lists), or when we explicitly detect 633 // blocks (for example while parsing lambdas). 634 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 635 // braced list in JS. 636 ProbablyBracedList = 637 (Style.isJavaScript() && 638 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 639 Keywords.kw_as)) || 640 (Style.isCpp() && NextTok->is(tok::l_paren)) || 641 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 642 tok::r_paren, tok::r_square, tok::l_brace, 643 tok::ellipsis) || 644 (NextTok->is(tok::identifier) && 645 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 646 (NextTok->is(tok::semi) && 647 (!ExpectClassBody || LBraceStack.size() != 1)) || 648 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 649 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 650 // We can have an array subscript after a braced init 651 // list, but C++11 attributes are expected after blocks. 652 NextTok = Tokens->getNextToken(); 653 ++ReadTokens; 654 ProbablyBracedList = NextTok->isNot(tok::l_square); 655 } 656 } 657 if (ProbablyBracedList) { 658 Tok->setBlockKind(BK_BracedInit); 659 LBraceStack.back()->setBlockKind(BK_BracedInit); 660 } else { 661 Tok->setBlockKind(BK_Block); 662 LBraceStack.back()->setBlockKind(BK_Block); 663 } 664 } 665 LBraceStack.pop_back(); 666 break; 667 case tok::identifier: 668 if (!Tok->is(TT_StatementMacro)) 669 break; 670 LLVM_FALLTHROUGH; 671 case tok::at: 672 case tok::semi: 673 case tok::kw_if: 674 case tok::kw_while: 675 case tok::kw_for: 676 case tok::kw_switch: 677 case tok::kw_try: 678 case tok::kw___try: 679 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 680 LBraceStack.back()->setBlockKind(BK_Block); 681 break; 682 default: 683 break; 684 } 685 PrevTok = Tok; 686 Tok = NextTok; 687 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 688 689 // Assume other blocks for all unclosed opening braces. 690 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 691 if (LBraceStack[i]->is(BK_Unknown)) 692 LBraceStack[i]->setBlockKind(BK_Block); 693 } 694 695 FormatTok = Tokens->setPosition(StoredPosition); 696 } 697 698 template <class T> 699 static inline void hash_combine(std::size_t &seed, const T &v) { 700 std::hash<T> hasher; 701 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 702 } 703 704 size_t UnwrappedLineParser::computePPHash() const { 705 size_t h = 0; 706 for (const auto &i : PPStack) { 707 hash_combine(h, size_t(i.Kind)); 708 hash_combine(h, i.Line); 709 } 710 return h; 711 } 712 713 UnwrappedLineParser::IfStmtKind 714 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, 715 bool MunchSemi, 716 bool UnindentWhitesmithsBraces) { 717 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 718 "'{' or macro block token expected"); 719 FormatToken *Tok = FormatTok; 720 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 721 FormatTok->setBlockKind(BK_Block); 722 723 // For Whitesmiths mode, jump to the next level prior to skipping over the 724 // braces. 725 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 726 ++Line->Level; 727 728 size_t PPStartHash = computePPHash(); 729 730 unsigned InitialLevel = Line->Level; 731 nextToken(/*LevelDifference=*/AddLevels); 732 733 if (MacroBlock && FormatTok->is(tok::l_paren)) 734 parseParens(); 735 736 size_t NbPreprocessorDirectives = 737 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 738 addUnwrappedLine(); 739 size_t OpeningLineIndex = 740 CurrentLines->empty() 741 ? (UnwrappedLine::kInvalidIndex) 742 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 743 744 // Whitesmiths is weird here. The brace needs to be indented for the namespace 745 // block, but the block itself may not be indented depending on the style 746 // settings. This allows the format to back up one level in those cases. 747 if (UnindentWhitesmithsBraces) 748 --Line->Level; 749 750 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 751 MustBeDeclaration); 752 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 753 Line->Level += AddLevels; 754 755 IfStmtKind IfKind = IfStmtKind::NotIf; 756 const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind); 757 758 if (eof()) 759 return IfKind; 760 761 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 762 : !FormatTok->is(tok::r_brace)) { 763 Line->Level = InitialLevel; 764 FormatTok->setBlockKind(BK_Block); 765 return IfKind; 766 } 767 768 if (SimpleBlock && Tok->is(tok::l_brace)) { 769 assert(FormatTok->is(tok::r_brace)); 770 const FormatToken *Previous = Tokens->getPreviousToken(); 771 assert(Previous); 772 if (Previous->isNot(tok::r_brace) || Previous->Optional) { 773 Tok->MatchingParen = FormatTok; 774 FormatTok->MatchingParen = Tok; 775 } 776 } 777 778 size_t PPEndHash = computePPHash(); 779 780 // Munch the closing brace. 781 nextToken(/*LevelDifference=*/-AddLevels); 782 783 if (MacroBlock && FormatTok->is(tok::l_paren)) 784 parseParens(); 785 786 if (FormatTok->is(tok::arrow)) { 787 // Following the } we can find a trailing return type arrow 788 // as part of an implicit conversion constraint. 789 nextToken(); 790 parseStructuralElement(); 791 } 792 793 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 794 nextToken(); 795 796 Line->Level = InitialLevel; 797 798 if (PPStartHash == PPEndHash) { 799 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 800 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 801 // Update the opening line to add the forward reference as well 802 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 803 CurrentLines->size() - 1; 804 } 805 } 806 807 return IfKind; 808 } 809 810 static bool isGoogScope(const UnwrappedLine &Line) { 811 // FIXME: Closure-library specific stuff should not be hard-coded but be 812 // configurable. 813 if (Line.Tokens.size() < 4) 814 return false; 815 auto I = Line.Tokens.begin(); 816 if (I->Tok->TokenText != "goog") 817 return false; 818 ++I; 819 if (I->Tok->isNot(tok::period)) 820 return false; 821 ++I; 822 if (I->Tok->TokenText != "scope") 823 return false; 824 ++I; 825 return I->Tok->is(tok::l_paren); 826 } 827 828 static bool isIIFE(const UnwrappedLine &Line, 829 const AdditionalKeywords &Keywords) { 830 // Look for the start of an immediately invoked anonymous function. 831 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 832 // This is commonly done in JavaScript to create a new, anonymous scope. 833 // Example: (function() { ... })() 834 if (Line.Tokens.size() < 3) 835 return false; 836 auto I = Line.Tokens.begin(); 837 if (I->Tok->isNot(tok::l_paren)) 838 return false; 839 ++I; 840 if (I->Tok->isNot(Keywords.kw_function)) 841 return false; 842 ++I; 843 return I->Tok->is(tok::l_paren); 844 } 845 846 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 847 const FormatToken &InitialToken) { 848 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 849 return Style.BraceWrapping.AfterNamespace; 850 if (InitialToken.is(tok::kw_class)) 851 return Style.BraceWrapping.AfterClass; 852 if (InitialToken.is(tok::kw_union)) 853 return Style.BraceWrapping.AfterUnion; 854 if (InitialToken.is(tok::kw_struct)) 855 return Style.BraceWrapping.AfterStruct; 856 if (InitialToken.is(tok::kw_enum)) 857 return Style.BraceWrapping.AfterEnum; 858 return false; 859 } 860 861 void UnwrappedLineParser::parseChildBlock() { 862 FormatTok->setBlockKind(BK_Block); 863 nextToken(); 864 { 865 bool SkipIndent = (Style.isJavaScript() && 866 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 867 ScopedLineState LineState(*this); 868 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 869 /*MustBeDeclaration=*/false); 870 Line->Level += SkipIndent ? 0 : 1; 871 parseLevel(/*HasOpeningBrace=*/true); 872 flushComments(isOnNewLine(*FormatTok)); 873 Line->Level -= SkipIndent ? 0 : 1; 874 } 875 nextToken(); 876 } 877 878 void UnwrappedLineParser::parsePPDirective() { 879 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 880 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 881 882 nextToken(); 883 884 if (!FormatTok->Tok.getIdentifierInfo()) { 885 parsePPUnknown(); 886 return; 887 } 888 889 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 890 case tok::pp_define: 891 parsePPDefine(); 892 return; 893 case tok::pp_if: 894 parsePPIf(/*IfDef=*/false); 895 break; 896 case tok::pp_ifdef: 897 case tok::pp_ifndef: 898 parsePPIf(/*IfDef=*/true); 899 break; 900 case tok::pp_else: 901 parsePPElse(); 902 break; 903 case tok::pp_elifdef: 904 case tok::pp_elifndef: 905 case tok::pp_elif: 906 parsePPElIf(); 907 break; 908 case tok::pp_endif: 909 parsePPEndIf(); 910 break; 911 default: 912 parsePPUnknown(); 913 break; 914 } 915 } 916 917 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 918 size_t Line = CurrentLines->size(); 919 if (CurrentLines == &PreprocessorDirectives) 920 Line += Lines.size(); 921 922 if (Unreachable || 923 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 924 PPStack.push_back({PP_Unreachable, Line}); 925 else 926 PPStack.push_back({PP_Conditional, Line}); 927 } 928 929 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 930 ++PPBranchLevel; 931 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 932 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 933 PPLevelBranchIndex.push_back(0); 934 PPLevelBranchCount.push_back(0); 935 } 936 PPChainBranchIndex.push(0); 937 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 938 conditionalCompilationCondition(Unreachable || Skip); 939 } 940 941 void UnwrappedLineParser::conditionalCompilationAlternative() { 942 if (!PPStack.empty()) 943 PPStack.pop_back(); 944 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 945 if (!PPChainBranchIndex.empty()) 946 ++PPChainBranchIndex.top(); 947 conditionalCompilationCondition( 948 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 949 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 950 } 951 952 void UnwrappedLineParser::conditionalCompilationEnd() { 953 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 954 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 955 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 956 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 957 } 958 } 959 // Guard against #endif's without #if. 960 if (PPBranchLevel > -1) 961 --PPBranchLevel; 962 if (!PPChainBranchIndex.empty()) 963 PPChainBranchIndex.pop(); 964 if (!PPStack.empty()) 965 PPStack.pop_back(); 966 } 967 968 void UnwrappedLineParser::parsePPIf(bool IfDef) { 969 bool IfNDef = FormatTok->is(tok::pp_ifndef); 970 nextToken(); 971 bool Unreachable = false; 972 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 973 Unreachable = true; 974 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 975 Unreachable = true; 976 conditionalCompilationStart(Unreachable); 977 FormatToken *IfCondition = FormatTok; 978 // If there's a #ifndef on the first line, and the only lines before it are 979 // comments, it could be an include guard. 980 bool MaybeIncludeGuard = IfNDef; 981 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 982 for (auto &Line : Lines) { 983 if (!Line.Tokens.front().Tok->is(tok::comment)) { 984 MaybeIncludeGuard = false; 985 IncludeGuard = IG_Rejected; 986 break; 987 } 988 } 989 --PPBranchLevel; 990 parsePPUnknown(); 991 ++PPBranchLevel; 992 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 993 IncludeGuard = IG_IfNdefed; 994 IncludeGuardToken = IfCondition; 995 } 996 } 997 998 void UnwrappedLineParser::parsePPElse() { 999 // If a potential include guard has an #else, it's not an include guard. 1000 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1001 IncludeGuard = IG_Rejected; 1002 conditionalCompilationAlternative(); 1003 if (PPBranchLevel > -1) 1004 --PPBranchLevel; 1005 parsePPUnknown(); 1006 ++PPBranchLevel; 1007 } 1008 1009 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 1010 1011 void UnwrappedLineParser::parsePPEndIf() { 1012 conditionalCompilationEnd(); 1013 parsePPUnknown(); 1014 // If the #endif of a potential include guard is the last thing in the file, 1015 // then we found an include guard. 1016 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1017 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1018 IncludeGuard = IG_Found; 1019 } 1020 1021 void UnwrappedLineParser::parsePPDefine() { 1022 nextToken(); 1023 1024 if (!FormatTok->Tok.getIdentifierInfo()) { 1025 IncludeGuard = IG_Rejected; 1026 IncludeGuardToken = nullptr; 1027 parsePPUnknown(); 1028 return; 1029 } 1030 1031 if (IncludeGuard == IG_IfNdefed && 1032 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1033 IncludeGuard = IG_Defined; 1034 IncludeGuardToken = nullptr; 1035 for (auto &Line : Lines) { 1036 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1037 IncludeGuard = IG_Rejected; 1038 break; 1039 } 1040 } 1041 } 1042 1043 nextToken(); 1044 if (FormatTok->Tok.getKind() == tok::l_paren && 1045 FormatTok->WhitespaceRange.getBegin() == 1046 FormatTok->WhitespaceRange.getEnd()) { 1047 parseParens(); 1048 } 1049 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1050 Line->Level += PPBranchLevel + 1; 1051 addUnwrappedLine(); 1052 ++Line->Level; 1053 1054 // Errors during a preprocessor directive can only affect the layout of the 1055 // preprocessor directive, and thus we ignore them. An alternative approach 1056 // would be to use the same approach we use on the file level (no 1057 // re-indentation if there was a structural error) within the macro 1058 // definition. 1059 parseFile(); 1060 } 1061 1062 void UnwrappedLineParser::parsePPUnknown() { 1063 do { 1064 nextToken(); 1065 } while (!eof()); 1066 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1067 Line->Level += PPBranchLevel + 1; 1068 addUnwrappedLine(); 1069 } 1070 1071 // Here we exclude certain tokens that are not usually the first token in an 1072 // unwrapped line. This is used in attempt to distinguish macro calls without 1073 // trailing semicolons from other constructs split to several lines. 1074 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1075 // Semicolon can be a null-statement, l_square can be a start of a macro or 1076 // a C++11 attribute, but this doesn't seem to be common. 1077 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1078 Tok.isNot(TT_AttributeSquare) && 1079 // Tokens that can only be used as binary operators and a part of 1080 // overloaded operator names. 1081 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1082 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1083 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1084 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1085 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1086 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1087 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1088 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1089 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1090 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1091 Tok.isNot(tok::lesslessequal) && 1092 // Colon is used in labels, base class lists, initializer lists, 1093 // range-based for loops, ternary operator, but should never be the 1094 // first token in an unwrapped line. 1095 Tok.isNot(tok::colon) && 1096 // 'noexcept' is a trailing annotation. 1097 Tok.isNot(tok::kw_noexcept); 1098 } 1099 1100 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1101 const FormatToken *FormatTok) { 1102 // FIXME: This returns true for C/C++ keywords like 'struct'. 1103 return FormatTok->is(tok::identifier) && 1104 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1105 !FormatTok->isOneOf( 1106 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1107 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1108 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1109 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1110 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1111 Keywords.kw_instanceof, Keywords.kw_interface, 1112 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1113 } 1114 1115 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1116 const FormatToken *FormatTok) { 1117 return FormatTok->Tok.isLiteral() || 1118 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1119 mustBeJSIdent(Keywords, FormatTok); 1120 } 1121 1122 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1123 // when encountered after a value (see mustBeJSIdentOrValue). 1124 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1125 const FormatToken *FormatTok) { 1126 return FormatTok->isOneOf( 1127 tok::kw_return, Keywords.kw_yield, 1128 // conditionals 1129 tok::kw_if, tok::kw_else, 1130 // loops 1131 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1132 // switch/case 1133 tok::kw_switch, tok::kw_case, 1134 // exceptions 1135 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1136 // declaration 1137 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1138 Keywords.kw_async, Keywords.kw_function, 1139 // import/export 1140 Keywords.kw_import, tok::kw_export); 1141 } 1142 1143 // Checks whether a token is a type in K&R C (aka C78). 1144 static bool isC78Type(const FormatToken &Tok) { 1145 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1146 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1147 tok::identifier); 1148 } 1149 1150 // This function checks whether a token starts the first parameter declaration 1151 // in a K&R C (aka C78) function definition, e.g.: 1152 // int f(a, b) 1153 // short a, b; 1154 // { 1155 // return a + b; 1156 // } 1157 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1158 const FormatToken *FuncName) { 1159 assert(Tok); 1160 assert(Next); 1161 assert(FuncName); 1162 1163 if (FuncName->isNot(tok::identifier)) 1164 return false; 1165 1166 const FormatToken *Prev = FuncName->Previous; 1167 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1168 return false; 1169 1170 if (!isC78Type(*Tok) && 1171 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1172 return false; 1173 1174 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1175 return false; 1176 1177 Tok = Tok->Previous; 1178 if (!Tok || Tok->isNot(tok::r_paren)) 1179 return false; 1180 1181 Tok = Tok->Previous; 1182 if (!Tok || Tok->isNot(tok::identifier)) 1183 return false; 1184 1185 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1186 } 1187 1188 void UnwrappedLineParser::parseModuleImport() { 1189 nextToken(); 1190 while (!eof()) { 1191 if (FormatTok->is(tok::colon)) { 1192 FormatTok->setType(TT_ModulePartitionColon); 1193 } 1194 // Handle import <foo/bar.h> as we would an include statement. 1195 else if (FormatTok->is(tok::less)) { 1196 nextToken(); 1197 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1198 // Mark tokens up to the trailing line comments as implicit string 1199 // literals. 1200 if (FormatTok->isNot(tok::comment) && 1201 !FormatTok->TokenText.startswith("//")) 1202 FormatTok->setType(TT_ImplicitStringLiteral); 1203 nextToken(); 1204 } 1205 } 1206 if (FormatTok->is(tok::semi)) { 1207 nextToken(); 1208 break; 1209 } 1210 nextToken(); 1211 } 1212 1213 addUnwrappedLine(); 1214 } 1215 1216 // readTokenWithJavaScriptASI reads the next token and terminates the current 1217 // line if JavaScript Automatic Semicolon Insertion must 1218 // happen between the current token and the next token. 1219 // 1220 // This method is conservative - it cannot cover all edge cases of JavaScript, 1221 // but only aims to correctly handle certain well known cases. It *must not* 1222 // return true in speculative cases. 1223 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1224 FormatToken *Previous = FormatTok; 1225 readToken(); 1226 FormatToken *Next = FormatTok; 1227 1228 bool IsOnSameLine = 1229 CommentsBeforeNextToken.empty() 1230 ? Next->NewlinesBefore == 0 1231 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1232 if (IsOnSameLine) 1233 return; 1234 1235 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1236 bool PreviousStartsTemplateExpr = 1237 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1238 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1239 // If the line contains an '@' sign, the previous token might be an 1240 // annotation, which can precede another identifier/value. 1241 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1242 return LineNode.Tok->is(tok::at); 1243 }); 1244 if (HasAt) 1245 return; 1246 } 1247 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1248 return addUnwrappedLine(); 1249 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1250 bool NextEndsTemplateExpr = 1251 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1252 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1253 (PreviousMustBeValue || 1254 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1255 tok::minusminus))) 1256 return addUnwrappedLine(); 1257 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1258 isJSDeclOrStmt(Keywords, Next)) 1259 return addUnwrappedLine(); 1260 } 1261 1262 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, 1263 bool IsTopLevel) { 1264 if (Style.Language == FormatStyle::LK_TableGen && 1265 FormatTok->is(tok::pp_include)) { 1266 nextToken(); 1267 if (FormatTok->is(tok::string_literal)) 1268 nextToken(); 1269 addUnwrappedLine(); 1270 return; 1271 } 1272 switch (FormatTok->Tok.getKind()) { 1273 case tok::kw_asm: 1274 nextToken(); 1275 if (FormatTok->is(tok::l_brace)) { 1276 FormatTok->setType(TT_InlineASMBrace); 1277 nextToken(); 1278 while (FormatTok && FormatTok->isNot(tok::eof)) { 1279 if (FormatTok->is(tok::r_brace)) { 1280 FormatTok->setType(TT_InlineASMBrace); 1281 nextToken(); 1282 addUnwrappedLine(); 1283 break; 1284 } 1285 FormatTok->Finalized = true; 1286 nextToken(); 1287 } 1288 } 1289 break; 1290 case tok::kw_namespace: 1291 parseNamespace(); 1292 return; 1293 case tok::kw_public: 1294 case tok::kw_protected: 1295 case tok::kw_private: 1296 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1297 Style.isCSharp()) 1298 nextToken(); 1299 else 1300 parseAccessSpecifier(); 1301 return; 1302 case tok::kw_if: 1303 if (Style.isJavaScript() && Line->MustBeDeclaration) 1304 // field/method declaration. 1305 break; 1306 parseIfThenElse(IfKind); 1307 return; 1308 case tok::kw_for: 1309 case tok::kw_while: 1310 if (Style.isJavaScript() && Line->MustBeDeclaration) 1311 // field/method declaration. 1312 break; 1313 parseForOrWhileLoop(); 1314 return; 1315 case tok::kw_do: 1316 if (Style.isJavaScript() && Line->MustBeDeclaration) 1317 // field/method declaration. 1318 break; 1319 parseDoWhile(); 1320 return; 1321 case tok::kw_switch: 1322 if (Style.isJavaScript() && Line->MustBeDeclaration) 1323 // 'switch: string' field declaration. 1324 break; 1325 parseSwitch(); 1326 return; 1327 case tok::kw_default: 1328 if (Style.isJavaScript() && Line->MustBeDeclaration) 1329 // 'default: string' field declaration. 1330 break; 1331 nextToken(); 1332 if (FormatTok->is(tok::colon)) { 1333 parseLabel(); 1334 return; 1335 } 1336 // e.g. "default void f() {}" in a Java interface. 1337 break; 1338 case tok::kw_case: 1339 if (Style.isJavaScript() && Line->MustBeDeclaration) 1340 // 'case: string' field declaration. 1341 break; 1342 parseCaseLabel(); 1343 return; 1344 case tok::kw_try: 1345 case tok::kw___try: 1346 if (Style.isJavaScript() && Line->MustBeDeclaration) 1347 // field/method declaration. 1348 break; 1349 parseTryCatch(); 1350 return; 1351 case tok::kw_extern: 1352 nextToken(); 1353 if (FormatTok->Tok.is(tok::string_literal)) { 1354 nextToken(); 1355 if (FormatTok->Tok.is(tok::l_brace)) { 1356 if (Style.BraceWrapping.AfterExternBlock) 1357 addUnwrappedLine(); 1358 // Either we indent or for backwards compatibility we follow the 1359 // AfterExternBlock style. 1360 unsigned AddLevels = 1361 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1362 (Style.BraceWrapping.AfterExternBlock && 1363 Style.IndentExternBlock == 1364 FormatStyle::IEBS_AfterExternBlock) 1365 ? 1u 1366 : 0u; 1367 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1368 addUnwrappedLine(); 1369 return; 1370 } 1371 } 1372 break; 1373 case tok::kw_export: 1374 if (Style.isJavaScript()) { 1375 parseJavaScriptEs6ImportExport(); 1376 return; 1377 } 1378 if (!Style.isCpp()) 1379 break; 1380 // Handle C++ "(inline|export) namespace". 1381 LLVM_FALLTHROUGH; 1382 case tok::kw_inline: 1383 nextToken(); 1384 if (FormatTok->Tok.is(tok::kw_namespace)) { 1385 parseNamespace(); 1386 return; 1387 } 1388 break; 1389 case tok::identifier: 1390 if (FormatTok->is(TT_ForEachMacro)) { 1391 parseForOrWhileLoop(); 1392 return; 1393 } 1394 if (FormatTok->is(TT_MacroBlockBegin)) { 1395 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1396 /*MunchSemi=*/false); 1397 return; 1398 } 1399 if (FormatTok->is(Keywords.kw_import)) { 1400 if (Style.isJavaScript()) { 1401 parseJavaScriptEs6ImportExport(); 1402 return; 1403 } 1404 if (Style.Language == FormatStyle::LK_Proto) { 1405 nextToken(); 1406 if (FormatTok->is(tok::kw_public)) 1407 nextToken(); 1408 if (!FormatTok->is(tok::string_literal)) 1409 return; 1410 nextToken(); 1411 if (FormatTok->is(tok::semi)) 1412 nextToken(); 1413 addUnwrappedLine(); 1414 return; 1415 } 1416 if (Style.isCpp()) { 1417 parseModuleImport(); 1418 return; 1419 } 1420 } 1421 if (Style.isCpp() && 1422 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1423 Keywords.kw_slots, Keywords.kw_qslots)) { 1424 nextToken(); 1425 if (FormatTok->is(tok::colon)) { 1426 nextToken(); 1427 addUnwrappedLine(); 1428 return; 1429 } 1430 } 1431 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1432 parseStatementMacro(); 1433 return; 1434 } 1435 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1436 parseNamespace(); 1437 return; 1438 } 1439 // In all other cases, parse the declaration. 1440 break; 1441 default: 1442 break; 1443 } 1444 do { 1445 const FormatToken *Previous = FormatTok->Previous; 1446 switch (FormatTok->Tok.getKind()) { 1447 case tok::at: 1448 nextToken(); 1449 if (FormatTok->Tok.is(tok::l_brace)) { 1450 nextToken(); 1451 parseBracedList(); 1452 break; 1453 } else if (Style.Language == FormatStyle::LK_Java && 1454 FormatTok->is(Keywords.kw_interface)) { 1455 nextToken(); 1456 break; 1457 } 1458 switch (FormatTok->Tok.getObjCKeywordID()) { 1459 case tok::objc_public: 1460 case tok::objc_protected: 1461 case tok::objc_package: 1462 case tok::objc_private: 1463 return parseAccessSpecifier(); 1464 case tok::objc_interface: 1465 case tok::objc_implementation: 1466 return parseObjCInterfaceOrImplementation(); 1467 case tok::objc_protocol: 1468 if (parseObjCProtocol()) 1469 return; 1470 break; 1471 case tok::objc_end: 1472 return; // Handled by the caller. 1473 case tok::objc_optional: 1474 case tok::objc_required: 1475 nextToken(); 1476 addUnwrappedLine(); 1477 return; 1478 case tok::objc_autoreleasepool: 1479 nextToken(); 1480 if (FormatTok->Tok.is(tok::l_brace)) { 1481 if (Style.BraceWrapping.AfterControlStatement == 1482 FormatStyle::BWACS_Always) 1483 addUnwrappedLine(); 1484 parseBlock(); 1485 } 1486 addUnwrappedLine(); 1487 return; 1488 case tok::objc_synchronized: 1489 nextToken(); 1490 if (FormatTok->Tok.is(tok::l_paren)) 1491 // Skip synchronization object 1492 parseParens(); 1493 if (FormatTok->Tok.is(tok::l_brace)) { 1494 if (Style.BraceWrapping.AfterControlStatement == 1495 FormatStyle::BWACS_Always) 1496 addUnwrappedLine(); 1497 parseBlock(); 1498 } 1499 addUnwrappedLine(); 1500 return; 1501 case tok::objc_try: 1502 // This branch isn't strictly necessary (the kw_try case below would 1503 // do this too after the tok::at is parsed above). But be explicit. 1504 parseTryCatch(); 1505 return; 1506 default: 1507 break; 1508 } 1509 break; 1510 case tok::kw_concept: 1511 parseConcept(); 1512 return; 1513 case tok::kw_requires: 1514 parseRequires(); 1515 return; 1516 case tok::kw_enum: 1517 // Ignore if this is part of "template <enum ...". 1518 if (Previous && Previous->is(tok::less)) { 1519 nextToken(); 1520 break; 1521 } 1522 1523 // parseEnum falls through and does not yet add an unwrapped line as an 1524 // enum definition can start a structural element. 1525 if (!parseEnum()) 1526 break; 1527 // This only applies for C++. 1528 if (!Style.isCpp()) { 1529 addUnwrappedLine(); 1530 return; 1531 } 1532 break; 1533 case tok::kw_typedef: 1534 nextToken(); 1535 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1536 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1537 Keywords.kw_CF_CLOSED_ENUM, 1538 Keywords.kw_NS_CLOSED_ENUM)) 1539 parseEnum(); 1540 break; 1541 case tok::kw_struct: 1542 case tok::kw_union: 1543 case tok::kw_class: 1544 if (parseStructLike()) { 1545 return; 1546 } 1547 break; 1548 case tok::period: 1549 nextToken(); 1550 // In Java, classes have an implicit static member "class". 1551 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1552 FormatTok->is(tok::kw_class)) 1553 nextToken(); 1554 if (Style.isJavaScript() && FormatTok && 1555 FormatTok->Tok.getIdentifierInfo()) 1556 // JavaScript only has pseudo keywords, all keywords are allowed to 1557 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1558 nextToken(); 1559 break; 1560 case tok::semi: 1561 nextToken(); 1562 addUnwrappedLine(); 1563 return; 1564 case tok::r_brace: 1565 addUnwrappedLine(); 1566 return; 1567 case tok::l_paren: { 1568 parseParens(); 1569 // Break the unwrapped line if a K&R C function definition has a parameter 1570 // declaration. 1571 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1572 break; 1573 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1574 addUnwrappedLine(); 1575 return; 1576 } 1577 break; 1578 } 1579 case tok::kw_operator: 1580 nextToken(); 1581 if (FormatTok->isBinaryOperator()) 1582 nextToken(); 1583 break; 1584 case tok::caret: 1585 nextToken(); 1586 if (FormatTok->Tok.isAnyIdentifier() || 1587 FormatTok->isSimpleTypeSpecifier()) 1588 nextToken(); 1589 if (FormatTok->is(tok::l_paren)) 1590 parseParens(); 1591 if (FormatTok->is(tok::l_brace)) 1592 parseChildBlock(); 1593 break; 1594 case tok::l_brace: 1595 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1596 // A block outside of parentheses must be the last part of a 1597 // structural element. 1598 // FIXME: Figure out cases where this is not true, and add projections 1599 // for them (the one we know is missing are lambdas). 1600 if (Style.Language == FormatStyle::LK_Java && 1601 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1602 // If necessary, we could set the type to something different than 1603 // TT_FunctionLBrace. 1604 if (Style.BraceWrapping.AfterControlStatement == 1605 FormatStyle::BWACS_Always) 1606 addUnwrappedLine(); 1607 } else if (Style.BraceWrapping.AfterFunction) { 1608 addUnwrappedLine(); 1609 } 1610 FormatTok->setType(TT_FunctionLBrace); 1611 parseBlock(); 1612 addUnwrappedLine(); 1613 return; 1614 } 1615 // Otherwise this was a braced init list, and the structural 1616 // element continues. 1617 break; 1618 case tok::kw_try: 1619 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1620 // field/method declaration. 1621 nextToken(); 1622 break; 1623 } 1624 // We arrive here when parsing function-try blocks. 1625 if (Style.BraceWrapping.AfterFunction) 1626 addUnwrappedLine(); 1627 parseTryCatch(); 1628 return; 1629 case tok::identifier: { 1630 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1631 Line->MustBeDeclaration) { 1632 addUnwrappedLine(); 1633 parseCSharpGenericTypeConstraint(); 1634 break; 1635 } 1636 if (FormatTok->is(TT_MacroBlockEnd)) { 1637 addUnwrappedLine(); 1638 return; 1639 } 1640 1641 // Function declarations (as opposed to function expressions) are parsed 1642 // on their own unwrapped line by continuing this loop. Function 1643 // expressions (functions that are not on their own line) must not create 1644 // a new unwrapped line, so they are special cased below. 1645 size_t TokenCount = Line->Tokens.size(); 1646 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1647 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1648 Keywords.kw_async)))) { 1649 tryToParseJSFunction(); 1650 break; 1651 } 1652 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1653 FormatTok->is(Keywords.kw_interface)) { 1654 if (Style.isJavaScript()) { 1655 // In JavaScript/TypeScript, "interface" can be used as a standalone 1656 // identifier, e.g. in `var interface = 1;`. If "interface" is 1657 // followed by another identifier, it is very like to be an actual 1658 // interface declaration. 1659 unsigned StoredPosition = Tokens->getPosition(); 1660 FormatToken *Next = Tokens->getNextToken(); 1661 FormatTok = Tokens->setPosition(StoredPosition); 1662 if (!mustBeJSIdent(Keywords, Next)) { 1663 nextToken(); 1664 break; 1665 } 1666 } 1667 parseRecord(); 1668 addUnwrappedLine(); 1669 return; 1670 } 1671 1672 if (FormatTok->is(Keywords.kw_interface)) { 1673 if (parseStructLike()) { 1674 return; 1675 } 1676 break; 1677 } 1678 1679 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1680 parseStatementMacro(); 1681 return; 1682 } 1683 1684 // See if the following token should start a new unwrapped line. 1685 StringRef Text = FormatTok->TokenText; 1686 nextToken(); 1687 1688 // JS doesn't have macros, and within classes colons indicate fields, not 1689 // labels. 1690 if (Style.isJavaScript()) 1691 break; 1692 1693 TokenCount = Line->Tokens.size(); 1694 if (TokenCount == 1 || 1695 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1696 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1697 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1698 parseLabel(!Style.IndentGotoLabels); 1699 return; 1700 } 1701 // Recognize function-like macro usages without trailing semicolon as 1702 // well as free-standing macros like Q_OBJECT. 1703 bool FunctionLike = FormatTok->is(tok::l_paren); 1704 if (FunctionLike) 1705 parseParens(); 1706 1707 bool FollowedByNewline = 1708 CommentsBeforeNextToken.empty() 1709 ? FormatTok->NewlinesBefore > 0 1710 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1711 1712 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1713 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1714 addUnwrappedLine(); 1715 return; 1716 } 1717 } 1718 break; 1719 } 1720 case tok::equal: 1721 if ((Style.isJavaScript() || Style.isCSharp()) && 1722 FormatTok->is(TT_FatArrow)) { 1723 tryToParseChildBlock(); 1724 break; 1725 } 1726 1727 nextToken(); 1728 if (FormatTok->Tok.is(tok::l_brace)) { 1729 // Block kind should probably be set to BK_BracedInit for any language. 1730 // C# needs this change to ensure that array initialisers and object 1731 // initialisers are indented the same way. 1732 if (Style.isCSharp()) 1733 FormatTok->setBlockKind(BK_BracedInit); 1734 nextToken(); 1735 parseBracedList(); 1736 } else if (Style.Language == FormatStyle::LK_Proto && 1737 FormatTok->Tok.is(tok::less)) { 1738 nextToken(); 1739 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1740 /*ClosingBraceKind=*/tok::greater); 1741 } 1742 break; 1743 case tok::l_square: 1744 parseSquare(); 1745 break; 1746 case tok::kw_new: 1747 parseNew(); 1748 break; 1749 default: 1750 nextToken(); 1751 break; 1752 } 1753 } while (!eof()); 1754 } 1755 1756 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1757 assert(FormatTok->is(tok::l_brace)); 1758 if (!Style.isCSharp()) 1759 return false; 1760 // See if it's a property accessor. 1761 if (FormatTok->Previous->isNot(tok::identifier)) 1762 return false; 1763 1764 // See if we are inside a property accessor. 1765 // 1766 // Record the current tokenPosition so that we can advance and 1767 // reset the current token. `Next` is not set yet so we need 1768 // another way to advance along the token stream. 1769 unsigned int StoredPosition = Tokens->getPosition(); 1770 FormatToken *Tok = Tokens->getNextToken(); 1771 1772 // A trivial property accessor is of the form: 1773 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } 1774 // Track these as they do not require line breaks to be introduced. 1775 bool HasGetOrSet = false; 1776 bool IsTrivialPropertyAccessor = true; 1777 while (!eof()) { 1778 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1779 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1780 Keywords.kw_set)) { 1781 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) 1782 HasGetOrSet = true; 1783 Tok = Tokens->getNextToken(); 1784 continue; 1785 } 1786 if (Tok->isNot(tok::r_brace)) 1787 IsTrivialPropertyAccessor = false; 1788 break; 1789 } 1790 1791 if (!HasGetOrSet) { 1792 Tokens->setPosition(StoredPosition); 1793 return false; 1794 } 1795 1796 // Try to parse the property accessor: 1797 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1798 Tokens->setPosition(StoredPosition); 1799 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1800 addUnwrappedLine(); 1801 nextToken(); 1802 do { 1803 switch (FormatTok->Tok.getKind()) { 1804 case tok::r_brace: 1805 nextToken(); 1806 if (FormatTok->is(tok::equal)) { 1807 while (!eof() && FormatTok->isNot(tok::semi)) 1808 nextToken(); 1809 nextToken(); 1810 } 1811 addUnwrappedLine(); 1812 return true; 1813 case tok::l_brace: 1814 ++Line->Level; 1815 parseBlock(/*MustBeDeclaration=*/true); 1816 addUnwrappedLine(); 1817 --Line->Level; 1818 break; 1819 case tok::equal: 1820 if (FormatTok->is(TT_FatArrow)) { 1821 ++Line->Level; 1822 do { 1823 nextToken(); 1824 } while (!eof() && FormatTok->isNot(tok::semi)); 1825 nextToken(); 1826 addUnwrappedLine(); 1827 --Line->Level; 1828 break; 1829 } 1830 nextToken(); 1831 break; 1832 default: 1833 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && 1834 !IsTrivialPropertyAccessor) { 1835 // Non-trivial get/set needs to be on its own line. 1836 addUnwrappedLine(); 1837 } 1838 nextToken(); 1839 } 1840 } while (!eof()); 1841 1842 // Unreachable for well-formed code (paired '{' and '}'). 1843 return true; 1844 } 1845 1846 bool UnwrappedLineParser::tryToParseLambda() { 1847 if (!Style.isCpp()) { 1848 nextToken(); 1849 return false; 1850 } 1851 assert(FormatTok->is(tok::l_square)); 1852 FormatToken &LSquare = *FormatTok; 1853 if (!tryToParseLambdaIntroducer()) 1854 return false; 1855 1856 bool SeenArrow = false; 1857 1858 while (FormatTok->isNot(tok::l_brace)) { 1859 if (FormatTok->isSimpleTypeSpecifier()) { 1860 nextToken(); 1861 continue; 1862 } 1863 switch (FormatTok->Tok.getKind()) { 1864 case tok::l_brace: 1865 break; 1866 case tok::l_paren: 1867 parseParens(); 1868 break; 1869 case tok::l_square: 1870 parseSquare(); 1871 break; 1872 case tok::amp: 1873 case tok::star: 1874 case tok::kw_const: 1875 case tok::comma: 1876 case tok::less: 1877 case tok::greater: 1878 case tok::identifier: 1879 case tok::numeric_constant: 1880 case tok::coloncolon: 1881 case tok::kw_class: 1882 case tok::kw_mutable: 1883 case tok::kw_noexcept: 1884 case tok::kw_template: 1885 case tok::kw_typename: 1886 nextToken(); 1887 break; 1888 // Specialization of a template with an integer parameter can contain 1889 // arithmetic, logical, comparison and ternary operators. 1890 // 1891 // FIXME: This also accepts sequences of operators that are not in the scope 1892 // of a template argument list. 1893 // 1894 // In a C++ lambda a template type can only occur after an arrow. We use 1895 // this as an heuristic to distinguish between Objective-C expressions 1896 // followed by an `a->b` expression, such as: 1897 // ([obj func:arg] + a->b) 1898 // Otherwise the code below would parse as a lambda. 1899 // 1900 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1901 // explicit template lists: []<bool b = true && false>(U &&u){} 1902 case tok::plus: 1903 case tok::minus: 1904 case tok::exclaim: 1905 case tok::tilde: 1906 case tok::slash: 1907 case tok::percent: 1908 case tok::lessless: 1909 case tok::pipe: 1910 case tok::pipepipe: 1911 case tok::ampamp: 1912 case tok::caret: 1913 case tok::equalequal: 1914 case tok::exclaimequal: 1915 case tok::greaterequal: 1916 case tok::lessequal: 1917 case tok::question: 1918 case tok::colon: 1919 case tok::ellipsis: 1920 case tok::kw_true: 1921 case tok::kw_false: 1922 if (SeenArrow) { 1923 nextToken(); 1924 break; 1925 } 1926 return true; 1927 case tok::arrow: 1928 // This might or might not actually be a lambda arrow (this could be an 1929 // ObjC method invocation followed by a dereferencing arrow). We might 1930 // reset this back to TT_Unknown in TokenAnnotator. 1931 FormatTok->setType(TT_LambdaArrow); 1932 SeenArrow = true; 1933 nextToken(); 1934 break; 1935 default: 1936 return true; 1937 } 1938 } 1939 FormatTok->setType(TT_LambdaLBrace); 1940 LSquare.setType(TT_LambdaLSquare); 1941 parseChildBlock(); 1942 return true; 1943 } 1944 1945 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1946 const FormatToken *Previous = FormatTok->Previous; 1947 if (Previous && 1948 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1949 tok::kw_delete, tok::l_square) || 1950 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1951 Previous->isSimpleTypeSpecifier())) { 1952 nextToken(); 1953 return false; 1954 } 1955 nextToken(); 1956 if (FormatTok->is(tok::l_square)) { 1957 return false; 1958 } 1959 parseSquare(/*LambdaIntroducer=*/true); 1960 return true; 1961 } 1962 1963 void UnwrappedLineParser::tryToParseJSFunction() { 1964 assert(FormatTok->is(Keywords.kw_function) || 1965 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1966 if (FormatTok->is(Keywords.kw_async)) 1967 nextToken(); 1968 // Consume "function". 1969 nextToken(); 1970 1971 // Consume * (generator function). Treat it like C++'s overloaded operators. 1972 if (FormatTok->is(tok::star)) { 1973 FormatTok->setType(TT_OverloadedOperator); 1974 nextToken(); 1975 } 1976 1977 // Consume function name. 1978 if (FormatTok->is(tok::identifier)) 1979 nextToken(); 1980 1981 if (FormatTok->isNot(tok::l_paren)) 1982 return; 1983 1984 // Parse formal parameter list. 1985 parseParens(); 1986 1987 if (FormatTok->is(tok::colon)) { 1988 // Parse a type definition. 1989 nextToken(); 1990 1991 // Eat the type declaration. For braced inline object types, balance braces, 1992 // otherwise just parse until finding an l_brace for the function body. 1993 if (FormatTok->is(tok::l_brace)) 1994 tryToParseBracedList(); 1995 else 1996 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1997 nextToken(); 1998 } 1999 2000 if (FormatTok->is(tok::semi)) 2001 return; 2002 2003 parseChildBlock(); 2004 } 2005 2006 bool UnwrappedLineParser::tryToParseBracedList() { 2007 if (FormatTok->is(BK_Unknown)) 2008 calculateBraceTypes(); 2009 assert(FormatTok->isNot(BK_Unknown)); 2010 if (FormatTok->is(BK_Block)) 2011 return false; 2012 nextToken(); 2013 parseBracedList(); 2014 return true; 2015 } 2016 2017 bool UnwrappedLineParser::tryToParseChildBlock() { 2018 assert(Style.isJavaScript() || Style.isCSharp()); 2019 assert(FormatTok->is(TT_FatArrow)); 2020 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2021 // They always start an expression or a child block if followed by a curly 2022 // brace. 2023 nextToken(); 2024 if (FormatTok->isNot(tok::l_brace)) 2025 return false; 2026 parseChildBlock(); 2027 return true; 2028 } 2029 2030 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 2031 bool IsEnum, 2032 tok::TokenKind ClosingBraceKind) { 2033 bool HasError = false; 2034 2035 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2036 // replace this by using parseAssignmentExpression() inside. 2037 do { 2038 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2039 tryToParseChildBlock()) 2040 continue; 2041 if (Style.isJavaScript()) { 2042 if (FormatTok->is(Keywords.kw_function) || 2043 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 2044 tryToParseJSFunction(); 2045 continue; 2046 } 2047 if (FormatTok->is(tok::l_brace)) { 2048 // Could be a method inside of a braced list `{a() { return 1; }}`. 2049 if (tryToParseBracedList()) 2050 continue; 2051 parseChildBlock(); 2052 } 2053 } 2054 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 2055 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2056 addUnwrappedLine(); 2057 nextToken(); 2058 return !HasError; 2059 } 2060 switch (FormatTok->Tok.getKind()) { 2061 case tok::l_square: 2062 if (Style.isCSharp()) 2063 parseSquare(); 2064 else 2065 tryToParseLambda(); 2066 break; 2067 case tok::l_paren: 2068 parseParens(); 2069 // JavaScript can just have free standing methods and getters/setters in 2070 // object literals. Detect them by a "{" following ")". 2071 if (Style.isJavaScript()) { 2072 if (FormatTok->is(tok::l_brace)) 2073 parseChildBlock(); 2074 break; 2075 } 2076 break; 2077 case tok::l_brace: 2078 // Assume there are no blocks inside a braced init list apart 2079 // from the ones we explicitly parse out (like lambdas). 2080 FormatTok->setBlockKind(BK_BracedInit); 2081 nextToken(); 2082 parseBracedList(); 2083 break; 2084 case tok::less: 2085 if (Style.Language == FormatStyle::LK_Proto) { 2086 nextToken(); 2087 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2088 /*ClosingBraceKind=*/tok::greater); 2089 } else { 2090 nextToken(); 2091 } 2092 break; 2093 case tok::semi: 2094 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2095 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2096 // used for error recovery if we have otherwise determined that this is 2097 // a braced list. 2098 if (Style.isJavaScript()) { 2099 nextToken(); 2100 break; 2101 } 2102 HasError = true; 2103 if (!ContinueOnSemicolons) 2104 return !HasError; 2105 nextToken(); 2106 break; 2107 case tok::comma: 2108 nextToken(); 2109 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2110 addUnwrappedLine(); 2111 break; 2112 default: 2113 nextToken(); 2114 break; 2115 } 2116 } while (!eof()); 2117 return false; 2118 } 2119 2120 void UnwrappedLineParser::parseParens() { 2121 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 2122 nextToken(); 2123 do { 2124 switch (FormatTok->Tok.getKind()) { 2125 case tok::l_paren: 2126 parseParens(); 2127 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2128 parseChildBlock(); 2129 break; 2130 case tok::r_paren: 2131 nextToken(); 2132 return; 2133 case tok::r_brace: 2134 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2135 return; 2136 case tok::l_square: 2137 tryToParseLambda(); 2138 break; 2139 case tok::l_brace: 2140 if (!tryToParseBracedList()) 2141 parseChildBlock(); 2142 break; 2143 case tok::at: 2144 nextToken(); 2145 if (FormatTok->Tok.is(tok::l_brace)) { 2146 nextToken(); 2147 parseBracedList(); 2148 } 2149 break; 2150 case tok::equal: 2151 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2152 tryToParseChildBlock(); 2153 else 2154 nextToken(); 2155 break; 2156 case tok::kw_class: 2157 if (Style.isJavaScript()) 2158 parseRecord(/*ParseAsExpr=*/true); 2159 else 2160 nextToken(); 2161 break; 2162 case tok::identifier: 2163 if (Style.isJavaScript() && 2164 (FormatTok->is(Keywords.kw_function) || 2165 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2166 tryToParseJSFunction(); 2167 else 2168 nextToken(); 2169 break; 2170 default: 2171 nextToken(); 2172 break; 2173 } 2174 } while (!eof()); 2175 } 2176 2177 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2178 if (!LambdaIntroducer) { 2179 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 2180 if (tryToParseLambda()) 2181 return; 2182 } 2183 do { 2184 switch (FormatTok->Tok.getKind()) { 2185 case tok::l_paren: 2186 parseParens(); 2187 break; 2188 case tok::r_square: 2189 nextToken(); 2190 return; 2191 case tok::r_brace: 2192 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2193 return; 2194 case tok::l_square: 2195 parseSquare(); 2196 break; 2197 case tok::l_brace: { 2198 if (!tryToParseBracedList()) 2199 parseChildBlock(); 2200 break; 2201 } 2202 case tok::at: 2203 nextToken(); 2204 if (FormatTok->Tok.is(tok::l_brace)) { 2205 nextToken(); 2206 parseBracedList(); 2207 } 2208 break; 2209 default: 2210 nextToken(); 2211 break; 2212 } 2213 } while (!eof()); 2214 } 2215 2216 void UnwrappedLineParser::keepAncestorBraces() { 2217 if (!Style.RemoveBracesLLVM) 2218 return; 2219 2220 const int MaxNestingLevels = 2; 2221 const int Size = NestedTooDeep.size(); 2222 if (Size >= MaxNestingLevels) 2223 NestedTooDeep[Size - MaxNestingLevels] = true; 2224 NestedTooDeep.push_back(false); 2225 } 2226 2227 static void markOptionalBraces(FormatToken *LeftBrace) { 2228 if (!LeftBrace) 2229 return; 2230 2231 assert(LeftBrace->is(tok::l_brace)); 2232 2233 FormatToken *RightBrace = LeftBrace->MatchingParen; 2234 if (!RightBrace) { 2235 assert(!LeftBrace->Optional); 2236 return; 2237 } 2238 2239 assert(RightBrace->is(tok::r_brace)); 2240 assert(RightBrace->MatchingParen == LeftBrace); 2241 assert(LeftBrace->Optional == RightBrace->Optional); 2242 2243 LeftBrace->Optional = true; 2244 RightBrace->Optional = true; 2245 } 2246 2247 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2248 bool KeepBraces) { 2249 auto HandleAttributes = [this]() { 2250 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2251 if (FormatTok->is(TT_AttributeMacro)) 2252 nextToken(); 2253 // Handle [[likely]] / [[unlikely]] attributes. 2254 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) 2255 parseSquare(); 2256 }; 2257 2258 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 2259 nextToken(); 2260 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 2261 nextToken(); 2262 if (FormatTok->Tok.is(tok::l_paren)) 2263 parseParens(); 2264 HandleAttributes(); 2265 2266 bool NeedsUnwrappedLine = false; 2267 keepAncestorBraces(); 2268 2269 FormatToken *IfLeftBrace = nullptr; 2270 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2271 2272 if (FormatTok->Tok.is(tok::l_brace)) { 2273 IfLeftBrace = FormatTok; 2274 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2275 IfBlockKind = parseBlock(); 2276 if (Style.BraceWrapping.BeforeElse) 2277 addUnwrappedLine(); 2278 else 2279 NeedsUnwrappedLine = true; 2280 } else { 2281 addUnwrappedLine(); 2282 ++Line->Level; 2283 parseStructuralElement(); 2284 --Line->Level; 2285 } 2286 2287 bool KeepIfBraces = false; 2288 if (Style.RemoveBracesLLVM) { 2289 assert(!NestedTooDeep.empty()); 2290 KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2291 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2292 IfBlockKind == IfStmtKind::IfElseIf; 2293 } 2294 2295 FormatToken *ElseLeftBrace = nullptr; 2296 IfStmtKind Kind = IfStmtKind::IfOnly; 2297 2298 if (FormatTok->Tok.is(tok::kw_else)) { 2299 if (Style.RemoveBracesLLVM) { 2300 NestedTooDeep.back() = false; 2301 Kind = IfStmtKind::IfElse; 2302 } 2303 nextToken(); 2304 HandleAttributes(); 2305 if (FormatTok->Tok.is(tok::l_brace)) { 2306 ElseLeftBrace = FormatTok; 2307 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2308 if (parseBlock() == IfStmtKind::IfOnly) 2309 Kind = IfStmtKind::IfElseIf; 2310 addUnwrappedLine(); 2311 } else if (FormatTok->Tok.is(tok::kw_if)) { 2312 FormatToken *Previous = Tokens->getPreviousToken(); 2313 const bool IsPrecededByComment = Previous && Previous->is(tok::comment); 2314 if (IsPrecededByComment) { 2315 addUnwrappedLine(); 2316 ++Line->Level; 2317 } 2318 bool TooDeep = true; 2319 if (Style.RemoveBracesLLVM) { 2320 Kind = IfStmtKind::IfElseIf; 2321 TooDeep = NestedTooDeep.pop_back_val(); 2322 } 2323 ElseLeftBrace = 2324 parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces); 2325 if (Style.RemoveBracesLLVM) 2326 NestedTooDeep.push_back(TooDeep); 2327 if (IsPrecededByComment) 2328 --Line->Level; 2329 } else { 2330 addUnwrappedLine(); 2331 ++Line->Level; 2332 parseStructuralElement(); 2333 if (FormatTok->is(tok::eof)) 2334 addUnwrappedLine(); 2335 --Line->Level; 2336 } 2337 } else { 2338 if (Style.RemoveBracesLLVM) 2339 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2340 if (NeedsUnwrappedLine) 2341 addUnwrappedLine(); 2342 } 2343 2344 if (!Style.RemoveBracesLLVM) 2345 return nullptr; 2346 2347 assert(!NestedTooDeep.empty()); 2348 const bool KeepElseBraces = 2349 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back(); 2350 2351 NestedTooDeep.pop_back(); 2352 2353 if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) { 2354 markOptionalBraces(IfLeftBrace); 2355 markOptionalBraces(ElseLeftBrace); 2356 } else if (IfLeftBrace) { 2357 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2358 if (IfRightBrace) { 2359 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2360 assert(!IfLeftBrace->Optional); 2361 assert(!IfRightBrace->Optional); 2362 IfLeftBrace->MatchingParen = nullptr; 2363 IfRightBrace->MatchingParen = nullptr; 2364 } 2365 } 2366 2367 if (IfKind) 2368 *IfKind = Kind; 2369 2370 return IfLeftBrace; 2371 } 2372 2373 void UnwrappedLineParser::parseTryCatch() { 2374 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2375 nextToken(); 2376 bool NeedsUnwrappedLine = false; 2377 if (FormatTok->is(tok::colon)) { 2378 // We are in a function try block, what comes is an initializer list. 2379 nextToken(); 2380 2381 // In case identifiers were removed by clang-tidy, what might follow is 2382 // multiple commas in sequence - before the first identifier. 2383 while (FormatTok->is(tok::comma)) 2384 nextToken(); 2385 2386 while (FormatTok->is(tok::identifier)) { 2387 nextToken(); 2388 if (FormatTok->is(tok::l_paren)) 2389 parseParens(); 2390 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2391 FormatTok->is(tok::l_brace)) { 2392 do { 2393 nextToken(); 2394 } while (!FormatTok->is(tok::r_brace)); 2395 nextToken(); 2396 } 2397 2398 // In case identifiers were removed by clang-tidy, what might follow is 2399 // multiple commas in sequence - after the first identifier. 2400 while (FormatTok->is(tok::comma)) 2401 nextToken(); 2402 } 2403 } 2404 // Parse try with resource. 2405 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 2406 parseParens(); 2407 } 2408 2409 keepAncestorBraces(); 2410 2411 if (FormatTok->is(tok::l_brace)) { 2412 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2413 parseBlock(); 2414 if (Style.BraceWrapping.BeforeCatch) { 2415 addUnwrappedLine(); 2416 } else { 2417 NeedsUnwrappedLine = true; 2418 } 2419 } else if (!FormatTok->is(tok::kw_catch)) { 2420 // The C++ standard requires a compound-statement after a try. 2421 // If there's none, we try to assume there's a structuralElement 2422 // and try to continue. 2423 addUnwrappedLine(); 2424 ++Line->Level; 2425 parseStructuralElement(); 2426 --Line->Level; 2427 } 2428 while (true) { 2429 if (FormatTok->is(tok::at)) 2430 nextToken(); 2431 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2432 tok::kw___finally) || 2433 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2434 FormatTok->is(Keywords.kw_finally)) || 2435 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 2436 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 2437 break; 2438 nextToken(); 2439 while (FormatTok->isNot(tok::l_brace)) { 2440 if (FormatTok->is(tok::l_paren)) { 2441 parseParens(); 2442 continue; 2443 } 2444 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 2445 if (Style.RemoveBracesLLVM) 2446 NestedTooDeep.pop_back(); 2447 return; 2448 } 2449 nextToken(); 2450 } 2451 NeedsUnwrappedLine = false; 2452 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2453 parseBlock(); 2454 if (Style.BraceWrapping.BeforeCatch) 2455 addUnwrappedLine(); 2456 else 2457 NeedsUnwrappedLine = true; 2458 } 2459 2460 if (Style.RemoveBracesLLVM) 2461 NestedTooDeep.pop_back(); 2462 2463 if (NeedsUnwrappedLine) 2464 addUnwrappedLine(); 2465 } 2466 2467 void UnwrappedLineParser::parseNamespace() { 2468 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2469 "'namespace' expected"); 2470 2471 const FormatToken &InitialToken = *FormatTok; 2472 nextToken(); 2473 if (InitialToken.is(TT_NamespaceMacro)) { 2474 parseParens(); 2475 } else { 2476 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2477 tok::l_square, tok::period)) { 2478 if (FormatTok->is(tok::l_square)) 2479 parseSquare(); 2480 else 2481 nextToken(); 2482 } 2483 } 2484 if (FormatTok->Tok.is(tok::l_brace)) { 2485 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2486 addUnwrappedLine(); 2487 2488 unsigned AddLevels = 2489 Style.NamespaceIndentation == FormatStyle::NI_All || 2490 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2491 DeclarationScopeStack.size() > 1) 2492 ? 1u 2493 : 0u; 2494 bool ManageWhitesmithsBraces = 2495 AddLevels == 0u && 2496 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2497 2498 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2499 // the whole block. 2500 if (ManageWhitesmithsBraces) 2501 ++Line->Level; 2502 2503 parseBlock(/*MustBeDeclaration=*/true, AddLevels, 2504 /*MunchSemi=*/true, 2505 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); 2506 2507 // Munch the semicolon after a namespace. This is more common than one would 2508 // think. Putting the semicolon into its own line is very ugly. 2509 if (FormatTok->Tok.is(tok::semi)) 2510 nextToken(); 2511 2512 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2513 2514 if (ManageWhitesmithsBraces) 2515 --Line->Level; 2516 } 2517 // FIXME: Add error handling. 2518 } 2519 2520 void UnwrappedLineParser::parseNew() { 2521 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2522 nextToken(); 2523 2524 if (Style.isCSharp()) { 2525 do { 2526 if (FormatTok->is(tok::l_brace)) 2527 parseBracedList(); 2528 2529 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2530 return; 2531 2532 nextToken(); 2533 } while (!eof()); 2534 } 2535 2536 if (Style.Language != FormatStyle::LK_Java) 2537 return; 2538 2539 // In Java, we can parse everything up to the parens, which aren't optional. 2540 do { 2541 // There should not be a ;, { or } before the new's open paren. 2542 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2543 return; 2544 2545 // Consume the parens. 2546 if (FormatTok->is(tok::l_paren)) { 2547 parseParens(); 2548 2549 // If there is a class body of an anonymous class, consume that as child. 2550 if (FormatTok->is(tok::l_brace)) 2551 parseChildBlock(); 2552 return; 2553 } 2554 nextToken(); 2555 } while (!eof()); 2556 } 2557 2558 void UnwrappedLineParser::parseForOrWhileLoop() { 2559 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2560 "'for', 'while' or foreach macro expected"); 2561 nextToken(); 2562 // JS' for await ( ... 2563 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2564 nextToken(); 2565 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2566 nextToken(); 2567 if (FormatTok->Tok.is(tok::l_paren)) 2568 parseParens(); 2569 2570 keepAncestorBraces(); 2571 2572 if (FormatTok->Tok.is(tok::l_brace)) { 2573 FormatToken *LeftBrace = FormatTok; 2574 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2575 parseBlock(); 2576 if (Style.RemoveBracesLLVM) { 2577 assert(!NestedTooDeep.empty()); 2578 if (!NestedTooDeep.back()) 2579 markOptionalBraces(LeftBrace); 2580 } 2581 addUnwrappedLine(); 2582 } else { 2583 addUnwrappedLine(); 2584 ++Line->Level; 2585 parseStructuralElement(); 2586 --Line->Level; 2587 } 2588 2589 if (Style.RemoveBracesLLVM) 2590 NestedTooDeep.pop_back(); 2591 } 2592 2593 void UnwrappedLineParser::parseDoWhile() { 2594 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 2595 nextToken(); 2596 2597 keepAncestorBraces(); 2598 2599 if (FormatTok->Tok.is(tok::l_brace)) { 2600 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2601 parseBlock(); 2602 if (Style.BraceWrapping.BeforeWhile) 2603 addUnwrappedLine(); 2604 } else { 2605 addUnwrappedLine(); 2606 ++Line->Level; 2607 parseStructuralElement(); 2608 --Line->Level; 2609 } 2610 2611 if (Style.RemoveBracesLLVM) 2612 NestedTooDeep.pop_back(); 2613 2614 // FIXME: Add error handling. 2615 if (!FormatTok->Tok.is(tok::kw_while)) { 2616 addUnwrappedLine(); 2617 return; 2618 } 2619 2620 // If in Whitesmiths mode, the line with the while() needs to be indented 2621 // to the same level as the block. 2622 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2623 ++Line->Level; 2624 2625 nextToken(); 2626 parseStructuralElement(); 2627 } 2628 2629 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2630 nextToken(); 2631 unsigned OldLineLevel = Line->Level; 2632 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2633 --Line->Level; 2634 if (LeftAlignLabel) 2635 Line->Level = 0; 2636 2637 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2638 FormatTok->Tok.is(tok::l_brace)) { 2639 2640 CompoundStatementIndenter Indenter(this, Line->Level, 2641 Style.BraceWrapping.AfterCaseLabel, 2642 Style.BraceWrapping.IndentBraces); 2643 parseBlock(); 2644 if (FormatTok->Tok.is(tok::kw_break)) { 2645 if (Style.BraceWrapping.AfterControlStatement == 2646 FormatStyle::BWACS_Always) { 2647 addUnwrappedLine(); 2648 if (!Style.IndentCaseBlocks && 2649 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2650 ++Line->Level; 2651 } 2652 } 2653 parseStructuralElement(); 2654 } 2655 addUnwrappedLine(); 2656 } else { 2657 if (FormatTok->is(tok::semi)) 2658 nextToken(); 2659 addUnwrappedLine(); 2660 } 2661 Line->Level = OldLineLevel; 2662 if (FormatTok->isNot(tok::l_brace)) { 2663 parseStructuralElement(); 2664 addUnwrappedLine(); 2665 } 2666 } 2667 2668 void UnwrappedLineParser::parseCaseLabel() { 2669 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2670 2671 // FIXME: fix handling of complex expressions here. 2672 do { 2673 nextToken(); 2674 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2675 parseLabel(); 2676 } 2677 2678 void UnwrappedLineParser::parseSwitch() { 2679 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2680 nextToken(); 2681 if (FormatTok->Tok.is(tok::l_paren)) 2682 parseParens(); 2683 2684 keepAncestorBraces(); 2685 2686 if (FormatTok->Tok.is(tok::l_brace)) { 2687 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2688 parseBlock(); 2689 addUnwrappedLine(); 2690 } else { 2691 addUnwrappedLine(); 2692 ++Line->Level; 2693 parseStructuralElement(); 2694 --Line->Level; 2695 } 2696 2697 if (Style.RemoveBracesLLVM) 2698 NestedTooDeep.pop_back(); 2699 } 2700 2701 void UnwrappedLineParser::parseAccessSpecifier() { 2702 nextToken(); 2703 // Understand Qt's slots. 2704 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2705 nextToken(); 2706 // Otherwise, we don't know what it is, and we'd better keep the next token. 2707 if (FormatTok->Tok.is(tok::colon)) 2708 nextToken(); 2709 addUnwrappedLine(); 2710 } 2711 2712 void UnwrappedLineParser::parseConcept() { 2713 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); 2714 nextToken(); 2715 if (!FormatTok->Tok.is(tok::identifier)) 2716 return; 2717 nextToken(); 2718 if (!FormatTok->Tok.is(tok::equal)) 2719 return; 2720 nextToken(); 2721 if (FormatTok->Tok.is(tok::kw_requires)) { 2722 nextToken(); 2723 parseRequiresExpression(Line->Level); 2724 } else { 2725 parseConstraintExpression(Line->Level); 2726 } 2727 } 2728 2729 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) { 2730 // requires (R range) 2731 if (FormatTok->Tok.is(tok::l_paren)) { 2732 parseParens(); 2733 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2734 addUnwrappedLine(); 2735 --Line->Level; 2736 } 2737 } 2738 2739 if (FormatTok->Tok.is(tok::l_brace)) { 2740 if (Style.BraceWrapping.AfterFunction) 2741 addUnwrappedLine(); 2742 FormatTok->setType(TT_FunctionLBrace); 2743 parseBlock(); 2744 addUnwrappedLine(); 2745 } else { 2746 parseConstraintExpression(OriginalLevel); 2747 } 2748 } 2749 2750 void UnwrappedLineParser::parseConstraintExpression( 2751 unsigned int OriginalLevel) { 2752 // requires Id<T> && Id<T> || Id<T> 2753 while ( 2754 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) { 2755 nextToken(); 2756 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less, 2757 tok::greater, tok::comma, tok::ellipsis)) { 2758 if (FormatTok->Tok.is(tok::less)) { 2759 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2760 /*ClosingBraceKind=*/tok::greater); 2761 continue; 2762 } 2763 nextToken(); 2764 } 2765 if (FormatTok->Tok.is(tok::kw_requires)) { 2766 parseRequiresExpression(OriginalLevel); 2767 } 2768 if (FormatTok->Tok.is(tok::less)) { 2769 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2770 /*ClosingBraceKind=*/tok::greater); 2771 } 2772 2773 if (FormatTok->Tok.is(tok::l_paren)) { 2774 parseParens(); 2775 } 2776 if (FormatTok->Tok.is(tok::l_brace)) { 2777 if (Style.BraceWrapping.AfterFunction) 2778 addUnwrappedLine(); 2779 FormatTok->setType(TT_FunctionLBrace); 2780 parseBlock(); 2781 } 2782 if (FormatTok->Tok.is(tok::semi)) { 2783 // Eat any trailing semi. 2784 nextToken(); 2785 addUnwrappedLine(); 2786 } 2787 if (FormatTok->Tok.is(tok::colon)) { 2788 return; 2789 } 2790 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { 2791 if (FormatTok->Previous && 2792 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires, 2793 tok::coloncolon)) { 2794 addUnwrappedLine(); 2795 } 2796 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2797 --Line->Level; 2798 } 2799 break; 2800 } else { 2801 FormatTok->setType(TT_ConstraintJunctions); 2802 } 2803 2804 nextToken(); 2805 } 2806 } 2807 2808 void UnwrappedLineParser::parseRequires() { 2809 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); 2810 2811 unsigned OriginalLevel = Line->Level; 2812 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { 2813 addUnwrappedLine(); 2814 if (Style.IndentRequires) { 2815 ++Line->Level; 2816 } 2817 } 2818 nextToken(); 2819 2820 parseRequiresExpression(OriginalLevel); 2821 } 2822 2823 bool UnwrappedLineParser::parseEnum() { 2824 const FormatToken &InitialToken = *FormatTok; 2825 2826 // Won't be 'enum' for NS_ENUMs. 2827 if (FormatTok->Tok.is(tok::kw_enum)) 2828 nextToken(); 2829 2830 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2831 // declarations. An "enum" keyword followed by a colon would be a syntax 2832 // error and thus assume it is just an identifier. 2833 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 2834 return false; 2835 2836 // In protobuf, "enum" can be used as a field name. 2837 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2838 return false; 2839 2840 // Eat up enum class ... 2841 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2842 nextToken(); 2843 2844 while (FormatTok->Tok.getIdentifierInfo() || 2845 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2846 tok::greater, tok::comma, tok::question)) { 2847 nextToken(); 2848 // We can have macros or attributes in between 'enum' and the enum name. 2849 if (FormatTok->is(tok::l_paren)) 2850 parseParens(); 2851 if (FormatTok->is(tok::identifier)) { 2852 nextToken(); 2853 // If there are two identifiers in a row, this is likely an elaborate 2854 // return type. In Java, this can be "implements", etc. 2855 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2856 return false; 2857 } 2858 } 2859 2860 // Just a declaration or something is wrong. 2861 if (FormatTok->isNot(tok::l_brace)) 2862 return true; 2863 FormatTok->setBlockKind(BK_Block); 2864 2865 if (Style.Language == FormatStyle::LK_Java) { 2866 // Java enums are different. 2867 parseJavaEnumBody(); 2868 return true; 2869 } 2870 if (Style.Language == FormatStyle::LK_Proto) { 2871 parseBlock(/*MustBeDeclaration=*/true); 2872 return true; 2873 } 2874 2875 if (!Style.AllowShortEnumsOnASingleLine && 2876 ShouldBreakBeforeBrace(Style, InitialToken)) 2877 addUnwrappedLine(); 2878 // Parse enum body. 2879 nextToken(); 2880 if (!Style.AllowShortEnumsOnASingleLine) { 2881 addUnwrappedLine(); 2882 Line->Level += 1; 2883 } 2884 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 2885 /*IsEnum=*/true); 2886 if (!Style.AllowShortEnumsOnASingleLine) 2887 Line->Level -= 1; 2888 if (HasError) { 2889 if (FormatTok->is(tok::semi)) 2890 nextToken(); 2891 addUnwrappedLine(); 2892 } 2893 return true; 2894 2895 // There is no addUnwrappedLine() here so that we fall through to parsing a 2896 // structural element afterwards. Thus, in "enum A {} n, m;", 2897 // "} n, m;" will end up in one unwrapped line. 2898 } 2899 2900 bool UnwrappedLineParser::parseStructLike() { 2901 // parseRecord falls through and does not yet add an unwrapped line as a 2902 // record declaration or definition can start a structural element. 2903 parseRecord(); 2904 // This does not apply to Java, JavaScript and C#. 2905 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 2906 Style.isCSharp()) { 2907 if (FormatTok->is(tok::semi)) 2908 nextToken(); 2909 addUnwrappedLine(); 2910 return true; 2911 } 2912 return false; 2913 } 2914 2915 namespace { 2916 // A class used to set and restore the Token position when peeking 2917 // ahead in the token source. 2918 class ScopedTokenPosition { 2919 unsigned StoredPosition; 2920 FormatTokenSource *Tokens; 2921 2922 public: 2923 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 2924 assert(Tokens && "Tokens expected to not be null"); 2925 StoredPosition = Tokens->getPosition(); 2926 } 2927 2928 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 2929 }; 2930 } // namespace 2931 2932 // Look to see if we have [[ by looking ahead, if 2933 // its not then rewind to the original position. 2934 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 2935 ScopedTokenPosition AutoPosition(Tokens); 2936 FormatToken *Tok = Tokens->getNextToken(); 2937 // We already read the first [ check for the second. 2938 if (!Tok->is(tok::l_square)) { 2939 return false; 2940 } 2941 // Double check that the attribute is just something 2942 // fairly simple. 2943 while (Tok->isNot(tok::eof)) { 2944 if (Tok->is(tok::r_square)) { 2945 break; 2946 } 2947 Tok = Tokens->getNextToken(); 2948 } 2949 if (Tok->is(tok::eof)) 2950 return false; 2951 Tok = Tokens->getNextToken(); 2952 if (!Tok->is(tok::r_square)) { 2953 return false; 2954 } 2955 Tok = Tokens->getNextToken(); 2956 if (Tok->is(tok::semi)) { 2957 return false; 2958 } 2959 return true; 2960 } 2961 2962 void UnwrappedLineParser::parseJavaEnumBody() { 2963 // Determine whether the enum is simple, i.e. does not have a semicolon or 2964 // constants with class bodies. Simple enums can be formatted like braced 2965 // lists, contracted to a single line, etc. 2966 unsigned StoredPosition = Tokens->getPosition(); 2967 bool IsSimple = true; 2968 FormatToken *Tok = Tokens->getNextToken(); 2969 while (!Tok->is(tok::eof)) { 2970 if (Tok->is(tok::r_brace)) 2971 break; 2972 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2973 IsSimple = false; 2974 break; 2975 } 2976 // FIXME: This will also mark enums with braces in the arguments to enum 2977 // constants as "not simple". This is probably fine in practice, though. 2978 Tok = Tokens->getNextToken(); 2979 } 2980 FormatTok = Tokens->setPosition(StoredPosition); 2981 2982 if (IsSimple) { 2983 nextToken(); 2984 parseBracedList(); 2985 addUnwrappedLine(); 2986 return; 2987 } 2988 2989 // Parse the body of a more complex enum. 2990 // First add a line for everything up to the "{". 2991 nextToken(); 2992 addUnwrappedLine(); 2993 ++Line->Level; 2994 2995 // Parse the enum constants. 2996 while (FormatTok) { 2997 if (FormatTok->is(tok::l_brace)) { 2998 // Parse the constant's class body. 2999 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3000 /*MunchSemi=*/false); 3001 } else if (FormatTok->is(tok::l_paren)) { 3002 parseParens(); 3003 } else if (FormatTok->is(tok::comma)) { 3004 nextToken(); 3005 addUnwrappedLine(); 3006 } else if (FormatTok->is(tok::semi)) { 3007 nextToken(); 3008 addUnwrappedLine(); 3009 break; 3010 } else if (FormatTok->is(tok::r_brace)) { 3011 addUnwrappedLine(); 3012 break; 3013 } else { 3014 nextToken(); 3015 } 3016 } 3017 3018 // Parse the class body after the enum's ";" if any. 3019 parseLevel(/*HasOpeningBrace=*/true); 3020 nextToken(); 3021 --Line->Level; 3022 addUnwrappedLine(); 3023 } 3024 3025 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3026 const FormatToken &InitialToken = *FormatTok; 3027 nextToken(); 3028 3029 // The actual identifier can be a nested name specifier, and in macros 3030 // it is often token-pasted. 3031 // An [[attribute]] can be before the identifier. 3032 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3033 tok::kw___attribute, tok::kw___declspec, 3034 tok::kw_alignas, tok::l_square, tok::r_square) || 3035 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3036 FormatTok->isOneOf(tok::period, tok::comma))) { 3037 if (Style.isJavaScript() && 3038 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3039 // JavaScript/TypeScript supports inline object types in 3040 // extends/implements positions: 3041 // class Foo implements {bar: number} { } 3042 nextToken(); 3043 if (FormatTok->is(tok::l_brace)) { 3044 tryToParseBracedList(); 3045 continue; 3046 } 3047 } 3048 bool IsNonMacroIdentifier = 3049 FormatTok->is(tok::identifier) && 3050 FormatTok->TokenText != FormatTok->TokenText.upper(); 3051 nextToken(); 3052 // We can have macros or attributes in between 'class' and the class name. 3053 if (!IsNonMacroIdentifier) { 3054 if (FormatTok->Tok.is(tok::l_paren)) { 3055 parseParens(); 3056 } else if (FormatTok->is(TT_AttributeSquare)) { 3057 parseSquare(); 3058 // Consume the closing TT_AttributeSquare. 3059 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 3060 nextToken(); 3061 } 3062 } 3063 } 3064 3065 // Note that parsing away template declarations here leads to incorrectly 3066 // accepting function declarations as record declarations. 3067 // In general, we cannot solve this problem. Consider: 3068 // class A<int> B() {} 3069 // which can be a function definition or a class definition when B() is a 3070 // macro. If we find enough real-world cases where this is a problem, we 3071 // can parse for the 'template' keyword in the beginning of the statement, 3072 // and thus rule out the record production in case there is no template 3073 // (this would still leave us with an ambiguity between template function 3074 // and class declarations). 3075 if (FormatTok->isOneOf(tok::colon, tok::less)) { 3076 while (!eof()) { 3077 if (FormatTok->is(tok::l_brace)) { 3078 calculateBraceTypes(/*ExpectClassBody=*/true); 3079 if (!tryToParseBracedList()) 3080 break; 3081 } 3082 if (FormatTok->is(tok::l_square) && !tryToParseLambda()) 3083 break; 3084 if (FormatTok->Tok.is(tok::semi)) 3085 return; 3086 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 3087 addUnwrappedLine(); 3088 nextToken(); 3089 parseCSharpGenericTypeConstraint(); 3090 break; 3091 } 3092 nextToken(); 3093 } 3094 } 3095 if (FormatTok->Tok.is(tok::l_brace)) { 3096 if (ParseAsExpr) { 3097 parseChildBlock(); 3098 } else { 3099 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3100 addUnwrappedLine(); 3101 3102 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 3103 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 3104 } 3105 } 3106 // There is no addUnwrappedLine() here so that we fall through to parsing a 3107 // structural element afterwards. Thus, in "class A {} n, m;", 3108 // "} n, m;" will end up in one unwrapped line. 3109 } 3110 3111 void UnwrappedLineParser::parseObjCMethod() { 3112 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 3113 "'(' or identifier expected."); 3114 do { 3115 if (FormatTok->Tok.is(tok::semi)) { 3116 nextToken(); 3117 addUnwrappedLine(); 3118 return; 3119 } else if (FormatTok->Tok.is(tok::l_brace)) { 3120 if (Style.BraceWrapping.AfterFunction) 3121 addUnwrappedLine(); 3122 parseBlock(); 3123 addUnwrappedLine(); 3124 return; 3125 } else { 3126 nextToken(); 3127 } 3128 } while (!eof()); 3129 } 3130 3131 void UnwrappedLineParser::parseObjCProtocolList() { 3132 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 3133 do { 3134 nextToken(); 3135 // Early exit in case someone forgot a close angle. 3136 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3137 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3138 return; 3139 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 3140 nextToken(); // Skip '>'. 3141 } 3142 3143 void UnwrappedLineParser::parseObjCUntilAtEnd() { 3144 do { 3145 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 3146 nextToken(); 3147 addUnwrappedLine(); 3148 break; 3149 } 3150 if (FormatTok->is(tok::l_brace)) { 3151 parseBlock(); 3152 // In ObjC interfaces, nothing should be following the "}". 3153 addUnwrappedLine(); 3154 } else if (FormatTok->is(tok::r_brace)) { 3155 // Ignore stray "}". parseStructuralElement doesn't consume them. 3156 nextToken(); 3157 addUnwrappedLine(); 3158 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 3159 nextToken(); 3160 parseObjCMethod(); 3161 } else { 3162 parseStructuralElement(); 3163 } 3164 } while (!eof()); 3165 } 3166 3167 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 3168 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 3169 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 3170 nextToken(); 3171 nextToken(); // interface name 3172 3173 // @interface can be followed by a lightweight generic 3174 // specialization list, then either a base class or a category. 3175 if (FormatTok->Tok.is(tok::less)) { 3176 parseObjCLightweightGenerics(); 3177 } 3178 if (FormatTok->Tok.is(tok::colon)) { 3179 nextToken(); 3180 nextToken(); // base class name 3181 // The base class can also have lightweight generics applied to it. 3182 if (FormatTok->Tok.is(tok::less)) { 3183 parseObjCLightweightGenerics(); 3184 } 3185 } else if (FormatTok->Tok.is(tok::l_paren)) 3186 // Skip category, if present. 3187 parseParens(); 3188 3189 if (FormatTok->Tok.is(tok::less)) 3190 parseObjCProtocolList(); 3191 3192 if (FormatTok->Tok.is(tok::l_brace)) { 3193 if (Style.BraceWrapping.AfterObjCDeclaration) 3194 addUnwrappedLine(); 3195 parseBlock(/*MustBeDeclaration=*/true); 3196 } 3197 3198 // With instance variables, this puts '}' on its own line. Without instance 3199 // variables, this ends the @interface line. 3200 addUnwrappedLine(); 3201 3202 parseObjCUntilAtEnd(); 3203 } 3204 3205 void UnwrappedLineParser::parseObjCLightweightGenerics() { 3206 assert(FormatTok->Tok.is(tok::less)); 3207 // Unlike protocol lists, generic parameterizations support 3208 // nested angles: 3209 // 3210 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 3211 // NSObject <NSCopying, NSSecureCoding> 3212 // 3213 // so we need to count how many open angles we have left. 3214 unsigned NumOpenAngles = 1; 3215 do { 3216 nextToken(); 3217 // Early exit in case someone forgot a close angle. 3218 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3219 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3220 break; 3221 if (FormatTok->Tok.is(tok::less)) 3222 ++NumOpenAngles; 3223 else if (FormatTok->Tok.is(tok::greater)) { 3224 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3225 --NumOpenAngles; 3226 } 3227 } while (!eof() && NumOpenAngles != 0); 3228 nextToken(); // Skip '>'. 3229 } 3230 3231 // Returns true for the declaration/definition form of @protocol, 3232 // false for the expression form. 3233 bool UnwrappedLineParser::parseObjCProtocol() { 3234 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3235 nextToken(); 3236 3237 if (FormatTok->is(tok::l_paren)) 3238 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3239 return false; 3240 3241 // The definition/declaration form, 3242 // @protocol Foo 3243 // - (int)someMethod; 3244 // @end 3245 3246 nextToken(); // protocol name 3247 3248 if (FormatTok->Tok.is(tok::less)) 3249 parseObjCProtocolList(); 3250 3251 // Check for protocol declaration. 3252 if (FormatTok->Tok.is(tok::semi)) { 3253 nextToken(); 3254 addUnwrappedLine(); 3255 return true; 3256 } 3257 3258 addUnwrappedLine(); 3259 parseObjCUntilAtEnd(); 3260 return true; 3261 } 3262 3263 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3264 bool IsImport = FormatTok->is(Keywords.kw_import); 3265 assert(IsImport || FormatTok->is(tok::kw_export)); 3266 nextToken(); 3267 3268 // Consume the "default" in "export default class/function". 3269 if (FormatTok->is(tok::kw_default)) 3270 nextToken(); 3271 3272 // Consume "async function", "function" and "default function", so that these 3273 // get parsed as free-standing JS functions, i.e. do not require a trailing 3274 // semicolon. 3275 if (FormatTok->is(Keywords.kw_async)) 3276 nextToken(); 3277 if (FormatTok->is(Keywords.kw_function)) { 3278 nextToken(); 3279 return; 3280 } 3281 3282 // For imports, `export *`, `export {...}`, consume the rest of the line up 3283 // to the terminating `;`. For everything else, just return and continue 3284 // parsing the structural element, i.e. the declaration or expression for 3285 // `export default`. 3286 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3287 !FormatTok->isStringLiteral()) 3288 return; 3289 3290 while (!eof()) { 3291 if (FormatTok->is(tok::semi)) 3292 return; 3293 if (Line->Tokens.empty()) { 3294 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3295 // import statement should terminate. 3296 return; 3297 } 3298 if (FormatTok->is(tok::l_brace)) { 3299 FormatTok->setBlockKind(BK_Block); 3300 nextToken(); 3301 parseBracedList(); 3302 } else { 3303 nextToken(); 3304 } 3305 } 3306 } 3307 3308 void UnwrappedLineParser::parseStatementMacro() { 3309 nextToken(); 3310 if (FormatTok->is(tok::l_paren)) 3311 parseParens(); 3312 if (FormatTok->is(tok::semi)) 3313 nextToken(); 3314 addUnwrappedLine(); 3315 } 3316 3317 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3318 StringRef Prefix = "") { 3319 llvm::dbgs() << Prefix << "Line(" << Line.Level 3320 << ", FSC=" << Line.FirstStartColumn << ")" 3321 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3322 for (const auto &Node : Line.Tokens) { 3323 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3324 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3325 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3326 } 3327 for (const auto &Node : Line.Tokens) 3328 for (const auto &ChildNode : Node.Children) 3329 printDebugInfo(ChildNode, "\nChild: "); 3330 3331 llvm::dbgs() << "\n"; 3332 } 3333 3334 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3335 if (Line->Tokens.empty()) 3336 return; 3337 LLVM_DEBUG({ 3338 if (CurrentLines == &Lines) 3339 printDebugInfo(*Line); 3340 }); 3341 3342 // If this line closes a block when in Whitesmiths mode, remember that 3343 // information so that the level can be decreased after the line is added. 3344 // This has to happen after the addition of the line since the line itself 3345 // needs to be indented. 3346 bool ClosesWhitesmithsBlock = 3347 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3348 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3349 3350 CurrentLines->push_back(std::move(*Line)); 3351 Line->Tokens.clear(); 3352 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3353 Line->FirstStartColumn = 0; 3354 3355 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3356 --Line->Level; 3357 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3358 CurrentLines->append( 3359 std::make_move_iterator(PreprocessorDirectives.begin()), 3360 std::make_move_iterator(PreprocessorDirectives.end())); 3361 PreprocessorDirectives.clear(); 3362 } 3363 // Disconnect the current token from the last token on the previous line. 3364 FormatTok->Previous = nullptr; 3365 } 3366 3367 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 3368 3369 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3370 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3371 FormatTok.NewlinesBefore > 0; 3372 } 3373 3374 // Checks if \p FormatTok is a line comment that continues the line comment 3375 // section on \p Line. 3376 static bool 3377 continuesLineCommentSection(const FormatToken &FormatTok, 3378 const UnwrappedLine &Line, 3379 const llvm::Regex &CommentPragmasRegex) { 3380 if (Line.Tokens.empty()) 3381 return false; 3382 3383 StringRef IndentContent = FormatTok.TokenText; 3384 if (FormatTok.TokenText.startswith("//") || 3385 FormatTok.TokenText.startswith("/*")) 3386 IndentContent = FormatTok.TokenText.substr(2); 3387 if (CommentPragmasRegex.match(IndentContent)) 3388 return false; 3389 3390 // If Line starts with a line comment, then FormatTok continues the comment 3391 // section if its original column is greater or equal to the original start 3392 // column of the line. 3393 // 3394 // Define the min column token of a line as follows: if a line ends in '{' or 3395 // contains a '{' followed by a line comment, then the min column token is 3396 // that '{'. Otherwise, the min column token of the line is the first token of 3397 // the line. 3398 // 3399 // If Line starts with a token other than a line comment, then FormatTok 3400 // continues the comment section if its original column is greater than the 3401 // original start column of the min column token of the line. 3402 // 3403 // For example, the second line comment continues the first in these cases: 3404 // 3405 // // first line 3406 // // second line 3407 // 3408 // and: 3409 // 3410 // // first line 3411 // // second line 3412 // 3413 // and: 3414 // 3415 // int i; // first line 3416 // // second line 3417 // 3418 // and: 3419 // 3420 // do { // first line 3421 // // second line 3422 // int i; 3423 // } while (true); 3424 // 3425 // and: 3426 // 3427 // enum { 3428 // a, // first line 3429 // // second line 3430 // b 3431 // }; 3432 // 3433 // The second line comment doesn't continue the first in these cases: 3434 // 3435 // // first line 3436 // // second line 3437 // 3438 // and: 3439 // 3440 // int i; // first line 3441 // // second line 3442 // 3443 // and: 3444 // 3445 // do { // first line 3446 // // second line 3447 // int i; 3448 // } while (true); 3449 // 3450 // and: 3451 // 3452 // enum { 3453 // a, // first line 3454 // // second line 3455 // }; 3456 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3457 3458 // Scan for '{//'. If found, use the column of '{' as a min column for line 3459 // comment section continuation. 3460 const FormatToken *PreviousToken = nullptr; 3461 for (const UnwrappedLineNode &Node : Line.Tokens) { 3462 if (PreviousToken && PreviousToken->is(tok::l_brace) && 3463 isLineComment(*Node.Tok)) { 3464 MinColumnToken = PreviousToken; 3465 break; 3466 } 3467 PreviousToken = Node.Tok; 3468 3469 // Grab the last newline preceding a token in this unwrapped line. 3470 if (Node.Tok->NewlinesBefore > 0) { 3471 MinColumnToken = Node.Tok; 3472 } 3473 } 3474 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 3475 MinColumnToken = PreviousToken; 3476 } 3477 3478 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 3479 MinColumnToken); 3480 } 3481 3482 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 3483 bool JustComments = Line->Tokens.empty(); 3484 for (FormatToken *Tok : CommentsBeforeNextToken) { 3485 // Line comments that belong to the same line comment section are put on the 3486 // same line since later we might want to reflow content between them. 3487 // Additional fine-grained breaking of line comment sections is controlled 3488 // by the class BreakableLineCommentSection in case it is desirable to keep 3489 // several line comment sections in the same unwrapped line. 3490 // 3491 // FIXME: Consider putting separate line comment sections as children to the 3492 // unwrapped line instead. 3493 Tok->ContinuesLineCommentSection = 3494 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 3495 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 3496 addUnwrappedLine(); 3497 pushToken(Tok); 3498 } 3499 if (NewlineBeforeNext && JustComments) 3500 addUnwrappedLine(); 3501 CommentsBeforeNextToken.clear(); 3502 } 3503 3504 void UnwrappedLineParser::nextToken(int LevelDifference) { 3505 if (eof()) 3506 return; 3507 flushComments(isOnNewLine(*FormatTok)); 3508 pushToken(FormatTok); 3509 FormatToken *Previous = FormatTok; 3510 if (!Style.isJavaScript()) 3511 readToken(LevelDifference); 3512 else 3513 readTokenWithJavaScriptASI(); 3514 FormatTok->Previous = Previous; 3515 } 3516 3517 void UnwrappedLineParser::distributeComments( 3518 const SmallVectorImpl<FormatToken *> &Comments, 3519 const FormatToken *NextTok) { 3520 // Whether or not a line comment token continues a line is controlled by 3521 // the method continuesLineCommentSection, with the following caveat: 3522 // 3523 // Define a trail of Comments to be a nonempty proper postfix of Comments such 3524 // that each comment line from the trail is aligned with the next token, if 3525 // the next token exists. If a trail exists, the beginning of the maximal 3526 // trail is marked as a start of a new comment section. 3527 // 3528 // For example in this code: 3529 // 3530 // int a; // line about a 3531 // // line 1 about b 3532 // // line 2 about b 3533 // int b; 3534 // 3535 // the two lines about b form a maximal trail, so there are two sections, the 3536 // first one consisting of the single comment "// line about a" and the 3537 // second one consisting of the next two comments. 3538 if (Comments.empty()) 3539 return; 3540 bool ShouldPushCommentsInCurrentLine = true; 3541 bool HasTrailAlignedWithNextToken = false; 3542 unsigned StartOfTrailAlignedWithNextToken = 0; 3543 if (NextTok) { 3544 // We are skipping the first element intentionally. 3545 for (unsigned i = Comments.size() - 1; i > 0; --i) { 3546 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 3547 HasTrailAlignedWithNextToken = true; 3548 StartOfTrailAlignedWithNextToken = i; 3549 } 3550 } 3551 } 3552 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 3553 FormatToken *FormatTok = Comments[i]; 3554 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 3555 FormatTok->ContinuesLineCommentSection = false; 3556 } else { 3557 FormatTok->ContinuesLineCommentSection = 3558 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 3559 } 3560 if (!FormatTok->ContinuesLineCommentSection && 3561 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 3562 ShouldPushCommentsInCurrentLine = false; 3563 } 3564 if (ShouldPushCommentsInCurrentLine) { 3565 pushToken(FormatTok); 3566 } else { 3567 CommentsBeforeNextToken.push_back(FormatTok); 3568 } 3569 } 3570 } 3571 3572 void UnwrappedLineParser::readToken(int LevelDifference) { 3573 SmallVector<FormatToken *, 1> Comments; 3574 do { 3575 FormatTok = Tokens->getNextToken(); 3576 assert(FormatTok); 3577 while (FormatTok->getType() == TT_ConflictStart || 3578 FormatTok->getType() == TT_ConflictEnd || 3579 FormatTok->getType() == TT_ConflictAlternative) { 3580 if (FormatTok->getType() == TT_ConflictStart) { 3581 conditionalCompilationStart(/*Unreachable=*/false); 3582 } else if (FormatTok->getType() == TT_ConflictAlternative) { 3583 conditionalCompilationAlternative(); 3584 } else if (FormatTok->getType() == TT_ConflictEnd) { 3585 conditionalCompilationEnd(); 3586 } 3587 FormatTok = Tokens->getNextToken(); 3588 FormatTok->MustBreakBefore = true; 3589 } 3590 3591 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 3592 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 3593 distributeComments(Comments, FormatTok); 3594 Comments.clear(); 3595 // If there is an unfinished unwrapped line, we flush the preprocessor 3596 // directives only after that unwrapped line was finished later. 3597 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 3598 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 3599 assert((LevelDifference >= 0 || 3600 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 3601 "LevelDifference makes Line->Level negative"); 3602 Line->Level += LevelDifference; 3603 // Comments stored before the preprocessor directive need to be output 3604 // before the preprocessor directive, at the same level as the 3605 // preprocessor directive, as we consider them to apply to the directive. 3606 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 3607 PPBranchLevel > 0) 3608 Line->Level += PPBranchLevel; 3609 flushComments(isOnNewLine(*FormatTok)); 3610 parsePPDirective(); 3611 } 3612 3613 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 3614 !Line->InPPDirective) { 3615 continue; 3616 } 3617 3618 if (!FormatTok->Tok.is(tok::comment)) { 3619 distributeComments(Comments, FormatTok); 3620 Comments.clear(); 3621 return; 3622 } 3623 3624 Comments.push_back(FormatTok); 3625 } while (!eof()); 3626 3627 distributeComments(Comments, nullptr); 3628 Comments.clear(); 3629 } 3630 3631 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 3632 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 3633 if (MustBreakBeforeNextToken) { 3634 Line->Tokens.back().Tok->MustBreakBefore = true; 3635 MustBreakBeforeNextToken = false; 3636 } 3637 } 3638 3639 } // end namespace format 3640 } // end namespace clang 3641