1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "clang/Basic/TokenKinds.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-parser" 25 26 namespace clang { 27 namespace format { 28 29 class FormatTokenSource { 30 public: 31 virtual ~FormatTokenSource() {} 32 virtual FormatToken *getNextToken() = 0; 33 34 virtual unsigned getPosition() = 0; 35 virtual FormatToken *setPosition(unsigned Position) = 0; 36 }; 37 38 namespace { 39 40 class ScopedDeclarationState { 41 public: 42 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 43 bool MustBeDeclaration) 44 : Line(Line), Stack(Stack) { 45 Line.MustBeDeclaration = MustBeDeclaration; 46 Stack.push_back(MustBeDeclaration); 47 } 48 ~ScopedDeclarationState() { 49 Stack.pop_back(); 50 if (!Stack.empty()) 51 Line.MustBeDeclaration = Stack.back(); 52 else 53 Line.MustBeDeclaration = true; 54 } 55 56 private: 57 UnwrappedLine &Line; 58 std::vector<bool> &Stack; 59 }; 60 61 static bool isLineComment(const FormatToken &FormatTok) { 62 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 63 } 64 65 // Checks if \p FormatTok is a line comment that continues the line comment 66 // \p Previous. The original column of \p MinColumnToken is used to determine 67 // whether \p FormatTok is indented enough to the right to continue \p Previous. 68 static bool continuesLineComment(const FormatToken &FormatTok, 69 const FormatToken *Previous, 70 const FormatToken *MinColumnToken) { 71 if (!Previous || !MinColumnToken) 72 return false; 73 unsigned MinContinueColumn = 74 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 75 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 76 isLineComment(*Previous) && 77 FormatTok.OriginalColumn >= MinContinueColumn; 78 } 79 80 class ScopedMacroState : public FormatTokenSource { 81 public: 82 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 83 FormatToken *&ResetToken) 84 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 85 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 86 Token(nullptr), PreviousToken(nullptr) { 87 FakeEOF.Tok.startToken(); 88 FakeEOF.Tok.setKind(tok::eof); 89 TokenSource = this; 90 Line.Level = 0; 91 Line.InPPDirective = true; 92 } 93 94 ~ScopedMacroState() override { 95 TokenSource = PreviousTokenSource; 96 ResetToken = Token; 97 Line.InPPDirective = false; 98 Line.Level = PreviousLineLevel; 99 } 100 101 FormatToken *getNextToken() override { 102 // The \c UnwrappedLineParser guards against this by never calling 103 // \c getNextToken() after it has encountered the first eof token. 104 assert(!eof()); 105 PreviousToken = Token; 106 Token = PreviousTokenSource->getNextToken(); 107 if (eof()) 108 return &FakeEOF; 109 return Token; 110 } 111 112 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 113 114 FormatToken *setPosition(unsigned Position) override { 115 PreviousToken = nullptr; 116 Token = PreviousTokenSource->setPosition(Position); 117 return Token; 118 } 119 120 private: 121 bool eof() { 122 return Token && Token->HasUnescapedNewline && 123 !continuesLineComment(*Token, PreviousToken, 124 /*MinColumnToken=*/PreviousToken); 125 } 126 127 FormatToken FakeEOF; 128 UnwrappedLine &Line; 129 FormatTokenSource *&TokenSource; 130 FormatToken *&ResetToken; 131 unsigned PreviousLineLevel; 132 FormatTokenSource *PreviousTokenSource; 133 134 FormatToken *Token; 135 FormatToken *PreviousToken; 136 }; 137 138 } // end anonymous namespace 139 140 class ScopedLineState { 141 public: 142 ScopedLineState(UnwrappedLineParser &Parser, 143 bool SwitchToPreprocessorLines = false) 144 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 145 if (SwitchToPreprocessorLines) 146 Parser.CurrentLines = &Parser.PreprocessorDirectives; 147 else if (!Parser.Line->Tokens.empty()) 148 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 149 PreBlockLine = std::move(Parser.Line); 150 Parser.Line = std::make_unique<UnwrappedLine>(); 151 Parser.Line->Level = PreBlockLine->Level; 152 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 153 } 154 155 ~ScopedLineState() { 156 if (!Parser.Line->Tokens.empty()) { 157 Parser.addUnwrappedLine(); 158 } 159 assert(Parser.Line->Tokens.empty()); 160 Parser.Line = std::move(PreBlockLine); 161 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 162 Parser.MustBreakBeforeNextToken = true; 163 Parser.CurrentLines = OriginalLines; 164 } 165 166 private: 167 UnwrappedLineParser &Parser; 168 169 std::unique_ptr<UnwrappedLine> PreBlockLine; 170 SmallVectorImpl<UnwrappedLine> *OriginalLines; 171 }; 172 173 class CompoundStatementIndenter { 174 public: 175 CompoundStatementIndenter(UnwrappedLineParser *Parser, 176 const FormatStyle &Style, unsigned &LineLevel) 177 : CompoundStatementIndenter(Parser, LineLevel, 178 Style.BraceWrapping.AfterControlStatement, 179 Style.BraceWrapping.IndentBraces) {} 180 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 181 bool WrapBrace, bool IndentBrace) 182 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 183 if (WrapBrace) 184 Parser->addUnwrappedLine(); 185 if (IndentBrace) 186 ++LineLevel; 187 } 188 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 189 190 private: 191 unsigned &LineLevel; 192 unsigned OldLineLevel; 193 }; 194 195 namespace { 196 197 class IndexedTokenSource : public FormatTokenSource { 198 public: 199 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 200 : Tokens(Tokens), Position(-1) {} 201 202 FormatToken *getNextToken() override { 203 ++Position; 204 return Tokens[Position]; 205 } 206 207 unsigned getPosition() override { 208 assert(Position >= 0); 209 return Position; 210 } 211 212 FormatToken *setPosition(unsigned P) override { 213 Position = P; 214 return Tokens[Position]; 215 } 216 217 void reset() { Position = -1; } 218 219 private: 220 ArrayRef<FormatToken *> Tokens; 221 int Position; 222 }; 223 224 } // end anonymous namespace 225 226 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 227 const AdditionalKeywords &Keywords, 228 unsigned FirstStartColumn, 229 ArrayRef<FormatToken *> Tokens, 230 UnwrappedLineConsumer &Callback) 231 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 232 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 233 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 234 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 235 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 236 ? IG_Rejected 237 : IG_Inited), 238 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 239 240 void UnwrappedLineParser::reset() { 241 PPBranchLevel = -1; 242 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 243 ? IG_Rejected 244 : IG_Inited; 245 IncludeGuardToken = nullptr; 246 Line.reset(new UnwrappedLine); 247 CommentsBeforeNextToken.clear(); 248 FormatTok = nullptr; 249 MustBreakBeforeNextToken = false; 250 PreprocessorDirectives.clear(); 251 CurrentLines = &Lines; 252 DeclarationScopeStack.clear(); 253 PPStack.clear(); 254 Line->FirstStartColumn = FirstStartColumn; 255 } 256 257 void UnwrappedLineParser::parse() { 258 IndexedTokenSource TokenSource(AllTokens); 259 Line->FirstStartColumn = FirstStartColumn; 260 do { 261 LLVM_DEBUG(llvm::dbgs() << "----\n"); 262 reset(); 263 Tokens = &TokenSource; 264 TokenSource.reset(); 265 266 readToken(); 267 parseFile(); 268 269 // If we found an include guard then all preprocessor directives (other than 270 // the guard) are over-indented by one. 271 if (IncludeGuard == IG_Found) 272 for (auto &Line : Lines) 273 if (Line.InPPDirective && Line.Level > 0) 274 --Line.Level; 275 276 // Create line with eof token. 277 pushToken(FormatTok); 278 addUnwrappedLine(); 279 280 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 281 E = Lines.end(); 282 I != E; ++I) { 283 Callback.consumeUnwrappedLine(*I); 284 } 285 Callback.finishRun(); 286 Lines.clear(); 287 while (!PPLevelBranchIndex.empty() && 288 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 289 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 290 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 291 } 292 if (!PPLevelBranchIndex.empty()) { 293 ++PPLevelBranchIndex.back(); 294 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 295 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 296 } 297 } while (!PPLevelBranchIndex.empty()); 298 } 299 300 void UnwrappedLineParser::parseFile() { 301 // The top-level context in a file always has declarations, except for pre- 302 // processor directives and JavaScript files. 303 bool MustBeDeclaration = 304 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 305 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 306 MustBeDeclaration); 307 if (Style.Language == FormatStyle::LK_TextProto) 308 parseBracedList(); 309 else 310 parseLevel(/*HasOpeningBrace=*/false); 311 // Make sure to format the remaining tokens. 312 // 313 // LK_TextProto is special since its top-level is parsed as the body of a 314 // braced list, which does not necessarily have natural line separators such 315 // as a semicolon. Comments after the last entry that have been determined to 316 // not belong to that line, as in: 317 // key: value 318 // // endfile comment 319 // do not have a chance to be put on a line of their own until this point. 320 // Here we add this newline before end-of-file comments. 321 if (Style.Language == FormatStyle::LK_TextProto && 322 !CommentsBeforeNextToken.empty()) 323 addUnwrappedLine(); 324 flushComments(true); 325 addUnwrappedLine(); 326 } 327 328 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 329 do { 330 switch (FormatTok->Tok.getKind()) { 331 case tok::l_brace: 332 return; 333 default: 334 if (FormatTok->is(Keywords.kw_where)) { 335 addUnwrappedLine(); 336 nextToken(); 337 parseCSharpGenericTypeConstraint(); 338 break; 339 } 340 nextToken(); 341 break; 342 } 343 } while (!eof()); 344 } 345 346 void UnwrappedLineParser::parseCSharpAttribute() { 347 int UnpairedSquareBrackets = 1; 348 do { 349 switch (FormatTok->Tok.getKind()) { 350 case tok::r_square: 351 nextToken(); 352 --UnpairedSquareBrackets; 353 if (UnpairedSquareBrackets == 0) { 354 addUnwrappedLine(); 355 return; 356 } 357 break; 358 case tok::l_square: 359 ++UnpairedSquareBrackets; 360 nextToken(); 361 break; 362 default: 363 nextToken(); 364 break; 365 } 366 } while (!eof()); 367 } 368 369 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 370 bool SwitchLabelEncountered = false; 371 do { 372 tok::TokenKind kind = FormatTok->Tok.getKind(); 373 if (FormatTok->getType() == TT_MacroBlockBegin) { 374 kind = tok::l_brace; 375 } else if (FormatTok->getType() == TT_MacroBlockEnd) { 376 kind = tok::r_brace; 377 } 378 379 switch (kind) { 380 case tok::comment: 381 nextToken(); 382 addUnwrappedLine(); 383 break; 384 case tok::l_brace: 385 // FIXME: Add parameter whether this can happen - if this happens, we must 386 // be in a non-declaration context. 387 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 388 continue; 389 parseBlock(/*MustBeDeclaration=*/false); 390 addUnwrappedLine(); 391 break; 392 case tok::r_brace: 393 if (HasOpeningBrace) 394 return; 395 nextToken(); 396 addUnwrappedLine(); 397 break; 398 case tok::kw_default: { 399 unsigned StoredPosition = Tokens->getPosition(); 400 FormatToken *Next; 401 do { 402 Next = Tokens->getNextToken(); 403 } while (Next && Next->is(tok::comment)); 404 FormatTok = Tokens->setPosition(StoredPosition); 405 if (Next && Next->isNot(tok::colon)) { 406 // default not followed by ':' is not a case label; treat it like 407 // an identifier. 408 parseStructuralElement(); 409 break; 410 } 411 // Else, if it is 'default:', fall through to the case handling. 412 LLVM_FALLTHROUGH; 413 } 414 case tok::kw_case: 415 if (Style.Language == FormatStyle::LK_JavaScript && 416 Line->MustBeDeclaration) { 417 // A 'case: string' style field declaration. 418 parseStructuralElement(); 419 break; 420 } 421 if (!SwitchLabelEncountered && 422 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 423 ++Line->Level; 424 SwitchLabelEncountered = true; 425 parseStructuralElement(); 426 break; 427 case tok::l_square: 428 if (Style.isCSharp()) { 429 nextToken(); 430 parseCSharpAttribute(); 431 break; 432 } 433 LLVM_FALLTHROUGH; 434 default: 435 parseStructuralElement(/*IsTopLevel=*/true); 436 break; 437 } 438 } while (!eof()); 439 } 440 441 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 442 // We'll parse forward through the tokens until we hit 443 // a closing brace or eof - note that getNextToken() will 444 // parse macros, so this will magically work inside macro 445 // definitions, too. 446 unsigned StoredPosition = Tokens->getPosition(); 447 FormatToken *Tok = FormatTok; 448 const FormatToken *PrevTok = Tok->Previous; 449 // Keep a stack of positions of lbrace tokens. We will 450 // update information about whether an lbrace starts a 451 // braced init list or a different block during the loop. 452 SmallVector<FormatToken *, 8> LBraceStack; 453 assert(Tok->Tok.is(tok::l_brace)); 454 do { 455 // Get next non-comment token. 456 FormatToken *NextTok; 457 unsigned ReadTokens = 0; 458 do { 459 NextTok = Tokens->getNextToken(); 460 ++ReadTokens; 461 } while (NextTok->is(tok::comment)); 462 463 switch (Tok->Tok.getKind()) { 464 case tok::l_brace: 465 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 466 if (PrevTok->isOneOf(tok::colon, tok::less)) 467 // A ':' indicates this code is in a type, or a braced list 468 // following a label in an object literal ({a: {b: 1}}). 469 // A '<' could be an object used in a comparison, but that is nonsense 470 // code (can never return true), so more likely it is a generic type 471 // argument (`X<{a: string; b: number}>`). 472 // The code below could be confused by semicolons between the 473 // individual members in a type member list, which would normally 474 // trigger BK_Block. In both cases, this must be parsed as an inline 475 // braced init. 476 Tok->setBlockKind(BK_BracedInit); 477 else if (PrevTok->is(tok::r_paren)) 478 // `) { }` can only occur in function or method declarations in JS. 479 Tok->setBlockKind(BK_Block); 480 } else { 481 Tok->setBlockKind(BK_Unknown); 482 } 483 LBraceStack.push_back(Tok); 484 break; 485 case tok::r_brace: 486 if (LBraceStack.empty()) 487 break; 488 if (LBraceStack.back()->is(BK_Unknown)) { 489 bool ProbablyBracedList = false; 490 if (Style.Language == FormatStyle::LK_Proto) { 491 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 492 } else { 493 // Using OriginalColumn to distinguish between ObjC methods and 494 // binary operators is a bit hacky. 495 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 496 NextTok->OriginalColumn == 0; 497 498 // If there is a comma, semicolon or right paren after the closing 499 // brace, we assume this is a braced initializer list. Note that 500 // regardless how we mark inner braces here, we will overwrite the 501 // BlockKind later if we parse a braced list (where all blocks 502 // inside are by default braced lists), or when we explicitly detect 503 // blocks (for example while parsing lambdas). 504 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 505 // braced list in JS. 506 ProbablyBracedList = 507 (Style.Language == FormatStyle::LK_JavaScript && 508 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 509 Keywords.kw_as)) || 510 (Style.isCpp() && NextTok->is(tok::l_paren)) || 511 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 512 tok::r_paren, tok::r_square, tok::l_brace, 513 tok::ellipsis) || 514 (NextTok->is(tok::identifier) && 515 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 516 (NextTok->is(tok::semi) && 517 (!ExpectClassBody || LBraceStack.size() != 1)) || 518 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 519 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 520 // We can have an array subscript after a braced init 521 // list, but C++11 attributes are expected after blocks. 522 NextTok = Tokens->getNextToken(); 523 ++ReadTokens; 524 ProbablyBracedList = NextTok->isNot(tok::l_square); 525 } 526 } 527 if (ProbablyBracedList) { 528 Tok->setBlockKind(BK_BracedInit); 529 LBraceStack.back()->setBlockKind(BK_BracedInit); 530 } else { 531 Tok->setBlockKind(BK_Block); 532 LBraceStack.back()->setBlockKind(BK_Block); 533 } 534 } 535 LBraceStack.pop_back(); 536 break; 537 case tok::identifier: 538 if (!Tok->is(TT_StatementMacro)) 539 break; 540 LLVM_FALLTHROUGH; 541 case tok::at: 542 case tok::semi: 543 case tok::kw_if: 544 case tok::kw_while: 545 case tok::kw_for: 546 case tok::kw_switch: 547 case tok::kw_try: 548 case tok::kw___try: 549 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 550 LBraceStack.back()->setBlockKind(BK_Block); 551 break; 552 default: 553 break; 554 } 555 PrevTok = Tok; 556 Tok = NextTok; 557 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 558 559 // Assume other blocks for all unclosed opening braces. 560 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 561 if (LBraceStack[i]->is(BK_Unknown)) 562 LBraceStack[i]->setBlockKind(BK_Block); 563 } 564 565 FormatTok = Tokens->setPosition(StoredPosition); 566 } 567 568 template <class T> 569 static inline void hash_combine(std::size_t &seed, const T &v) { 570 std::hash<T> hasher; 571 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 572 } 573 574 size_t UnwrappedLineParser::computePPHash() const { 575 size_t h = 0; 576 for (const auto &i : PPStack) { 577 hash_combine(h, size_t(i.Kind)); 578 hash_combine(h, i.Line); 579 } 580 return h; 581 } 582 583 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, 584 bool MunchSemi, 585 bool UnindentWhitesmithsBraces) { 586 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 587 "'{' or macro block token expected"); 588 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 589 FormatTok->setBlockKind(BK_Block); 590 591 // For Whitesmiths mode, jump to the next level prior to skipping over the 592 // braces. 593 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 594 ++Line->Level; 595 596 size_t PPStartHash = computePPHash(); 597 598 unsigned InitialLevel = Line->Level; 599 nextToken(/*LevelDifference=*/AddLevels); 600 601 if (MacroBlock && FormatTok->is(tok::l_paren)) 602 parseParens(); 603 604 size_t NbPreprocessorDirectives = 605 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 606 addUnwrappedLine(); 607 size_t OpeningLineIndex = 608 CurrentLines->empty() 609 ? (UnwrappedLine::kInvalidIndex) 610 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 611 612 // Whitesmiths is weird here. The brace needs to be indented for the namespace 613 // block, but the block itself may not be indented depending on the style 614 // settings. This allows the format to back up one level in those cases. 615 if (UnindentWhitesmithsBraces) 616 --Line->Level; 617 618 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 619 MustBeDeclaration); 620 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 621 Line->Level += AddLevels; 622 parseLevel(/*HasOpeningBrace=*/true); 623 624 if (eof()) 625 return; 626 627 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 628 : !FormatTok->is(tok::r_brace)) { 629 Line->Level = InitialLevel; 630 FormatTok->setBlockKind(BK_Block); 631 return; 632 } 633 634 size_t PPEndHash = computePPHash(); 635 636 // Munch the closing brace. 637 nextToken(/*LevelDifference=*/-AddLevels); 638 639 if (MacroBlock && FormatTok->is(tok::l_paren)) 640 parseParens(); 641 642 if (FormatTok->is(tok::arrow)) { 643 // Following the } we can find a trailing return type arrow 644 // as part of an implicit conversion constraint. 645 nextToken(); 646 parseStructuralElement(); 647 } 648 649 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 650 nextToken(); 651 652 Line->Level = InitialLevel; 653 654 if (PPStartHash == PPEndHash) { 655 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 656 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 657 // Update the opening line to add the forward reference as well 658 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 659 CurrentLines->size() - 1; 660 } 661 } 662 } 663 664 static bool isGoogScope(const UnwrappedLine &Line) { 665 // FIXME: Closure-library specific stuff should not be hard-coded but be 666 // configurable. 667 if (Line.Tokens.size() < 4) 668 return false; 669 auto I = Line.Tokens.begin(); 670 if (I->Tok->TokenText != "goog") 671 return false; 672 ++I; 673 if (I->Tok->isNot(tok::period)) 674 return false; 675 ++I; 676 if (I->Tok->TokenText != "scope") 677 return false; 678 ++I; 679 return I->Tok->is(tok::l_paren); 680 } 681 682 static bool isIIFE(const UnwrappedLine &Line, 683 const AdditionalKeywords &Keywords) { 684 // Look for the start of an immediately invoked anonymous function. 685 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 686 // This is commonly done in JavaScript to create a new, anonymous scope. 687 // Example: (function() { ... })() 688 if (Line.Tokens.size() < 3) 689 return false; 690 auto I = Line.Tokens.begin(); 691 if (I->Tok->isNot(tok::l_paren)) 692 return false; 693 ++I; 694 if (I->Tok->isNot(Keywords.kw_function)) 695 return false; 696 ++I; 697 return I->Tok->is(tok::l_paren); 698 } 699 700 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 701 const FormatToken &InitialToken) { 702 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 703 return Style.BraceWrapping.AfterNamespace; 704 if (InitialToken.is(tok::kw_class)) 705 return Style.BraceWrapping.AfterClass; 706 if (InitialToken.is(tok::kw_union)) 707 return Style.BraceWrapping.AfterUnion; 708 if (InitialToken.is(tok::kw_struct)) 709 return Style.BraceWrapping.AfterStruct; 710 return false; 711 } 712 713 void UnwrappedLineParser::parseChildBlock() { 714 FormatTok->setBlockKind(BK_Block); 715 nextToken(); 716 { 717 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 718 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 719 ScopedLineState LineState(*this); 720 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 721 /*MustBeDeclaration=*/false); 722 Line->Level += SkipIndent ? 0 : 1; 723 parseLevel(/*HasOpeningBrace=*/true); 724 flushComments(isOnNewLine(*FormatTok)); 725 Line->Level -= SkipIndent ? 0 : 1; 726 } 727 nextToken(); 728 } 729 730 void UnwrappedLineParser::parsePPDirective() { 731 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 732 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 733 734 nextToken(); 735 736 if (!FormatTok->Tok.getIdentifierInfo()) { 737 parsePPUnknown(); 738 return; 739 } 740 741 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 742 case tok::pp_define: 743 parsePPDefine(); 744 return; 745 case tok::pp_if: 746 parsePPIf(/*IfDef=*/false); 747 break; 748 case tok::pp_ifdef: 749 case tok::pp_ifndef: 750 parsePPIf(/*IfDef=*/true); 751 break; 752 case tok::pp_else: 753 parsePPElse(); 754 break; 755 case tok::pp_elifdef: 756 case tok::pp_elifndef: 757 case tok::pp_elif: 758 parsePPElIf(); 759 break; 760 case tok::pp_endif: 761 parsePPEndIf(); 762 break; 763 default: 764 parsePPUnknown(); 765 break; 766 } 767 } 768 769 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 770 size_t Line = CurrentLines->size(); 771 if (CurrentLines == &PreprocessorDirectives) 772 Line += Lines.size(); 773 774 if (Unreachable || 775 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 776 PPStack.push_back({PP_Unreachable, Line}); 777 else 778 PPStack.push_back({PP_Conditional, Line}); 779 } 780 781 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 782 ++PPBranchLevel; 783 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 784 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 785 PPLevelBranchIndex.push_back(0); 786 PPLevelBranchCount.push_back(0); 787 } 788 PPChainBranchIndex.push(0); 789 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 790 conditionalCompilationCondition(Unreachable || Skip); 791 } 792 793 void UnwrappedLineParser::conditionalCompilationAlternative() { 794 if (!PPStack.empty()) 795 PPStack.pop_back(); 796 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 797 if (!PPChainBranchIndex.empty()) 798 ++PPChainBranchIndex.top(); 799 conditionalCompilationCondition( 800 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 801 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 802 } 803 804 void UnwrappedLineParser::conditionalCompilationEnd() { 805 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 806 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 807 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 808 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 809 } 810 } 811 // Guard against #endif's without #if. 812 if (PPBranchLevel > -1) 813 --PPBranchLevel; 814 if (!PPChainBranchIndex.empty()) 815 PPChainBranchIndex.pop(); 816 if (!PPStack.empty()) 817 PPStack.pop_back(); 818 } 819 820 void UnwrappedLineParser::parsePPIf(bool IfDef) { 821 bool IfNDef = FormatTok->is(tok::pp_ifndef); 822 nextToken(); 823 bool Unreachable = false; 824 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 825 Unreachable = true; 826 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 827 Unreachable = true; 828 conditionalCompilationStart(Unreachable); 829 FormatToken *IfCondition = FormatTok; 830 // If there's a #ifndef on the first line, and the only lines before it are 831 // comments, it could be an include guard. 832 bool MaybeIncludeGuard = IfNDef; 833 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 834 for (auto &Line : Lines) { 835 if (!Line.Tokens.front().Tok->is(tok::comment)) { 836 MaybeIncludeGuard = false; 837 IncludeGuard = IG_Rejected; 838 break; 839 } 840 } 841 --PPBranchLevel; 842 parsePPUnknown(); 843 ++PPBranchLevel; 844 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 845 IncludeGuard = IG_IfNdefed; 846 IncludeGuardToken = IfCondition; 847 } 848 } 849 850 void UnwrappedLineParser::parsePPElse() { 851 // If a potential include guard has an #else, it's not an include guard. 852 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 853 IncludeGuard = IG_Rejected; 854 conditionalCompilationAlternative(); 855 if (PPBranchLevel > -1) 856 --PPBranchLevel; 857 parsePPUnknown(); 858 ++PPBranchLevel; 859 } 860 861 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 862 863 void UnwrappedLineParser::parsePPEndIf() { 864 conditionalCompilationEnd(); 865 parsePPUnknown(); 866 // If the #endif of a potential include guard is the last thing in the file, 867 // then we found an include guard. 868 unsigned TokenPosition = Tokens->getPosition(); 869 FormatToken *PeekNext = AllTokens[TokenPosition]; 870 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 871 PeekNext->is(tok::eof) && 872 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 873 IncludeGuard = IG_Found; 874 } 875 876 void UnwrappedLineParser::parsePPDefine() { 877 nextToken(); 878 879 if (!FormatTok->Tok.getIdentifierInfo()) { 880 IncludeGuard = IG_Rejected; 881 IncludeGuardToken = nullptr; 882 parsePPUnknown(); 883 return; 884 } 885 886 if (IncludeGuard == IG_IfNdefed && 887 IncludeGuardToken->TokenText == FormatTok->TokenText) { 888 IncludeGuard = IG_Defined; 889 IncludeGuardToken = nullptr; 890 for (auto &Line : Lines) { 891 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 892 IncludeGuard = IG_Rejected; 893 break; 894 } 895 } 896 } 897 898 nextToken(); 899 if (FormatTok->Tok.getKind() == tok::l_paren && 900 FormatTok->WhitespaceRange.getBegin() == 901 FormatTok->WhitespaceRange.getEnd()) { 902 parseParens(); 903 } 904 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 905 Line->Level += PPBranchLevel + 1; 906 addUnwrappedLine(); 907 ++Line->Level; 908 909 // Errors during a preprocessor directive can only affect the layout of the 910 // preprocessor directive, and thus we ignore them. An alternative approach 911 // would be to use the same approach we use on the file level (no 912 // re-indentation if there was a structural error) within the macro 913 // definition. 914 parseFile(); 915 } 916 917 void UnwrappedLineParser::parsePPUnknown() { 918 do { 919 nextToken(); 920 } while (!eof()); 921 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 922 Line->Level += PPBranchLevel + 1; 923 addUnwrappedLine(); 924 } 925 926 // Here we exclude certain tokens that are not usually the first token in an 927 // unwrapped line. This is used in attempt to distinguish macro calls without 928 // trailing semicolons from other constructs split to several lines. 929 static bool tokenCanStartNewLine(const FormatToken &Tok) { 930 // Semicolon can be a null-statement, l_square can be a start of a macro or 931 // a C++11 attribute, but this doesn't seem to be common. 932 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 933 Tok.isNot(TT_AttributeSquare) && 934 // Tokens that can only be used as binary operators and a part of 935 // overloaded operator names. 936 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 937 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 938 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 939 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 940 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 941 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 942 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 943 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 944 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 945 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 946 Tok.isNot(tok::lesslessequal) && 947 // Colon is used in labels, base class lists, initializer lists, 948 // range-based for loops, ternary operator, but should never be the 949 // first token in an unwrapped line. 950 Tok.isNot(tok::colon) && 951 // 'noexcept' is a trailing annotation. 952 Tok.isNot(tok::kw_noexcept); 953 } 954 955 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 956 const FormatToken *FormatTok) { 957 // FIXME: This returns true for C/C++ keywords like 'struct'. 958 return FormatTok->is(tok::identifier) && 959 (FormatTok->Tok.getIdentifierInfo() == nullptr || 960 !FormatTok->isOneOf( 961 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 962 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 963 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 964 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 965 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 966 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 967 Keywords.kw_from)); 968 } 969 970 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 971 const FormatToken *FormatTok) { 972 return FormatTok->Tok.isLiteral() || 973 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 974 mustBeJSIdent(Keywords, FormatTok); 975 } 976 977 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 978 // when encountered after a value (see mustBeJSIdentOrValue). 979 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 980 const FormatToken *FormatTok) { 981 return FormatTok->isOneOf( 982 tok::kw_return, Keywords.kw_yield, 983 // conditionals 984 tok::kw_if, tok::kw_else, 985 // loops 986 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 987 // switch/case 988 tok::kw_switch, tok::kw_case, 989 // exceptions 990 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 991 // declaration 992 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 993 Keywords.kw_async, Keywords.kw_function, 994 // import/export 995 Keywords.kw_import, tok::kw_export); 996 } 997 998 // This function checks whether a token starts the first parameter declaration 999 // in a K&R C (aka C78) function definition, e.g.: 1000 // int f(a, b) 1001 // short a, b; 1002 // { 1003 // return a + b; 1004 // } 1005 static bool isC78ParameterDecl(const FormatToken *Tok) { 1006 if (!Tok) 1007 return false; 1008 1009 if (!Tok->isOneOf(tok::kw_int, tok::kw_char, tok::kw_float, tok::kw_double, 1010 tok::kw_struct, tok::kw_union, tok::kw_long, tok::kw_short, 1011 tok::kw_unsigned, tok::kw_register)) 1012 return false; 1013 1014 Tok = Tok->Previous; 1015 if (!Tok || Tok->isNot(tok::r_paren)) 1016 return false; 1017 1018 Tok = Tok->Previous; 1019 if (!Tok || Tok->isNot(tok::identifier)) 1020 return false; 1021 1022 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1023 } 1024 1025 // readTokenWithJavaScriptASI reads the next token and terminates the current 1026 // line if JavaScript Automatic Semicolon Insertion must 1027 // happen between the current token and the next token. 1028 // 1029 // This method is conservative - it cannot cover all edge cases of JavaScript, 1030 // but only aims to correctly handle certain well known cases. It *must not* 1031 // return true in speculative cases. 1032 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1033 FormatToken *Previous = FormatTok; 1034 readToken(); 1035 FormatToken *Next = FormatTok; 1036 1037 bool IsOnSameLine = 1038 CommentsBeforeNextToken.empty() 1039 ? Next->NewlinesBefore == 0 1040 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1041 if (IsOnSameLine) 1042 return; 1043 1044 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1045 bool PreviousStartsTemplateExpr = 1046 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1047 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1048 // If the line contains an '@' sign, the previous token might be an 1049 // annotation, which can precede another identifier/value. 1050 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 1051 [](UnwrappedLineNode &LineNode) { 1052 return LineNode.Tok->is(tok::at); 1053 }) != Line->Tokens.end(); 1054 if (HasAt) 1055 return; 1056 } 1057 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1058 return addUnwrappedLine(); 1059 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1060 bool NextEndsTemplateExpr = 1061 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1062 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1063 (PreviousMustBeValue || 1064 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1065 tok::minusminus))) 1066 return addUnwrappedLine(); 1067 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1068 isJSDeclOrStmt(Keywords, Next)) 1069 return addUnwrappedLine(); 1070 } 1071 1072 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { 1073 assert(!FormatTok->is(tok::l_brace)); 1074 if (Style.Language == FormatStyle::LK_TableGen && 1075 FormatTok->is(tok::pp_include)) { 1076 nextToken(); 1077 if (FormatTok->is(tok::string_literal)) 1078 nextToken(); 1079 addUnwrappedLine(); 1080 return; 1081 } 1082 switch (FormatTok->Tok.getKind()) { 1083 case tok::kw_asm: 1084 nextToken(); 1085 if (FormatTok->is(tok::l_brace)) { 1086 FormatTok->setType(TT_InlineASMBrace); 1087 nextToken(); 1088 while (FormatTok && FormatTok->isNot(tok::eof)) { 1089 if (FormatTok->is(tok::r_brace)) { 1090 FormatTok->setType(TT_InlineASMBrace); 1091 nextToken(); 1092 addUnwrappedLine(); 1093 break; 1094 } 1095 FormatTok->Finalized = true; 1096 nextToken(); 1097 } 1098 } 1099 break; 1100 case tok::kw_namespace: 1101 parseNamespace(); 1102 return; 1103 case tok::kw_public: 1104 case tok::kw_protected: 1105 case tok::kw_private: 1106 if (Style.Language == FormatStyle::LK_Java || 1107 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) 1108 nextToken(); 1109 else 1110 parseAccessSpecifier(); 1111 return; 1112 case tok::kw_if: 1113 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1114 // field/method declaration. 1115 break; 1116 parseIfThenElse(); 1117 return; 1118 case tok::kw_for: 1119 case tok::kw_while: 1120 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1121 // field/method declaration. 1122 break; 1123 parseForOrWhileLoop(); 1124 return; 1125 case tok::kw_do: 1126 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1127 // field/method declaration. 1128 break; 1129 parseDoWhile(); 1130 return; 1131 case tok::kw_switch: 1132 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1133 // 'switch: string' field declaration. 1134 break; 1135 parseSwitch(); 1136 return; 1137 case tok::kw_default: 1138 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1139 // 'default: string' field declaration. 1140 break; 1141 nextToken(); 1142 if (FormatTok->is(tok::colon)) { 1143 parseLabel(); 1144 return; 1145 } 1146 // e.g. "default void f() {}" in a Java interface. 1147 break; 1148 case tok::kw_case: 1149 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1150 // 'case: string' field declaration. 1151 break; 1152 parseCaseLabel(); 1153 return; 1154 case tok::kw_try: 1155 case tok::kw___try: 1156 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1157 // field/method declaration. 1158 break; 1159 parseTryCatch(); 1160 return; 1161 case tok::kw_extern: 1162 nextToken(); 1163 if (FormatTok->Tok.is(tok::string_literal)) { 1164 nextToken(); 1165 if (FormatTok->Tok.is(tok::l_brace)) { 1166 if (!Style.IndentExternBlock) { 1167 if (Style.BraceWrapping.AfterExternBlock) { 1168 addUnwrappedLine(); 1169 } 1170 unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u; 1171 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1172 } else { 1173 unsigned AddLevels = 1174 Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u; 1175 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1176 } 1177 addUnwrappedLine(); 1178 return; 1179 } 1180 } 1181 break; 1182 case tok::kw_export: 1183 if (Style.Language == FormatStyle::LK_JavaScript) { 1184 parseJavaScriptEs6ImportExport(); 1185 return; 1186 } 1187 if (!Style.isCpp()) 1188 break; 1189 // Handle C++ "(inline|export) namespace". 1190 LLVM_FALLTHROUGH; 1191 case tok::kw_inline: 1192 nextToken(); 1193 if (FormatTok->Tok.is(tok::kw_namespace)) { 1194 parseNamespace(); 1195 return; 1196 } 1197 break; 1198 case tok::identifier: 1199 if (FormatTok->is(TT_ForEachMacro)) { 1200 parseForOrWhileLoop(); 1201 return; 1202 } 1203 if (FormatTok->is(TT_MacroBlockBegin)) { 1204 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1205 /*MunchSemi=*/false); 1206 return; 1207 } 1208 if (FormatTok->is(Keywords.kw_import)) { 1209 if (Style.Language == FormatStyle::LK_JavaScript) { 1210 parseJavaScriptEs6ImportExport(); 1211 return; 1212 } 1213 if (Style.Language == FormatStyle::LK_Proto) { 1214 nextToken(); 1215 if (FormatTok->is(tok::kw_public)) 1216 nextToken(); 1217 if (!FormatTok->is(tok::string_literal)) 1218 return; 1219 nextToken(); 1220 if (FormatTok->is(tok::semi)) 1221 nextToken(); 1222 addUnwrappedLine(); 1223 return; 1224 } 1225 } 1226 if (Style.isCpp() && 1227 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1228 Keywords.kw_slots, Keywords.kw_qslots)) { 1229 nextToken(); 1230 if (FormatTok->is(tok::colon)) { 1231 nextToken(); 1232 addUnwrappedLine(); 1233 return; 1234 } 1235 } 1236 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1237 parseStatementMacro(); 1238 return; 1239 } 1240 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1241 parseNamespace(); 1242 return; 1243 } 1244 // In all other cases, parse the declaration. 1245 break; 1246 default: 1247 break; 1248 } 1249 do { 1250 const FormatToken *Previous = FormatTok->Previous; 1251 switch (FormatTok->Tok.getKind()) { 1252 case tok::at: 1253 nextToken(); 1254 if (FormatTok->Tok.is(tok::l_brace)) { 1255 nextToken(); 1256 parseBracedList(); 1257 break; 1258 } else if (Style.Language == FormatStyle::LK_Java && 1259 FormatTok->is(Keywords.kw_interface)) { 1260 nextToken(); 1261 break; 1262 } 1263 switch (FormatTok->Tok.getObjCKeywordID()) { 1264 case tok::objc_public: 1265 case tok::objc_protected: 1266 case tok::objc_package: 1267 case tok::objc_private: 1268 return parseAccessSpecifier(); 1269 case tok::objc_interface: 1270 case tok::objc_implementation: 1271 return parseObjCInterfaceOrImplementation(); 1272 case tok::objc_protocol: 1273 if (parseObjCProtocol()) 1274 return; 1275 break; 1276 case tok::objc_end: 1277 return; // Handled by the caller. 1278 case tok::objc_optional: 1279 case tok::objc_required: 1280 nextToken(); 1281 addUnwrappedLine(); 1282 return; 1283 case tok::objc_autoreleasepool: 1284 nextToken(); 1285 if (FormatTok->Tok.is(tok::l_brace)) { 1286 if (Style.BraceWrapping.AfterControlStatement == 1287 FormatStyle::BWACS_Always) 1288 addUnwrappedLine(); 1289 parseBlock(/*MustBeDeclaration=*/false); 1290 } 1291 addUnwrappedLine(); 1292 return; 1293 case tok::objc_synchronized: 1294 nextToken(); 1295 if (FormatTok->Tok.is(tok::l_paren)) 1296 // Skip synchronization object 1297 parseParens(); 1298 if (FormatTok->Tok.is(tok::l_brace)) { 1299 if (Style.BraceWrapping.AfterControlStatement == 1300 FormatStyle::BWACS_Always) 1301 addUnwrappedLine(); 1302 parseBlock(/*MustBeDeclaration=*/false); 1303 } 1304 addUnwrappedLine(); 1305 return; 1306 case tok::objc_try: 1307 // This branch isn't strictly necessary (the kw_try case below would 1308 // do this too after the tok::at is parsed above). But be explicit. 1309 parseTryCatch(); 1310 return; 1311 default: 1312 break; 1313 } 1314 break; 1315 case tok::kw_concept: 1316 parseConcept(); 1317 break; 1318 case tok::kw_requires: 1319 parseRequires(); 1320 break; 1321 case tok::kw_enum: 1322 // Ignore if this is part of "template <enum ...". 1323 if (Previous && Previous->is(tok::less)) { 1324 nextToken(); 1325 break; 1326 } 1327 1328 // parseEnum falls through and does not yet add an unwrapped line as an 1329 // enum definition can start a structural element. 1330 if (!parseEnum()) 1331 break; 1332 // This only applies for C++. 1333 if (!Style.isCpp()) { 1334 addUnwrappedLine(); 1335 return; 1336 } 1337 break; 1338 case tok::kw_typedef: 1339 nextToken(); 1340 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1341 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1342 Keywords.kw_CF_CLOSED_ENUM, 1343 Keywords.kw_NS_CLOSED_ENUM)) 1344 parseEnum(); 1345 break; 1346 case tok::kw_struct: 1347 case tok::kw_union: 1348 case tok::kw_class: 1349 if (parseStructLike()) { 1350 return; 1351 } 1352 break; 1353 case tok::period: 1354 nextToken(); 1355 // In Java, classes have an implicit static member "class". 1356 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1357 FormatTok->is(tok::kw_class)) 1358 nextToken(); 1359 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1360 FormatTok->Tok.getIdentifierInfo()) 1361 // JavaScript only has pseudo keywords, all keywords are allowed to 1362 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1363 nextToken(); 1364 break; 1365 case tok::semi: 1366 nextToken(); 1367 addUnwrappedLine(); 1368 return; 1369 case tok::r_brace: 1370 addUnwrappedLine(); 1371 return; 1372 case tok::l_paren: 1373 parseParens(); 1374 // Break the unwrapped line if a K&R C function definition has a parameter 1375 // declaration. 1376 if (!IsTopLevel || !Style.isCpp()) 1377 break; 1378 if (!Previous || Previous->isNot(tok::identifier)) 1379 break; 1380 if (Previous->Previous && Previous->Previous->is(tok::at)) 1381 break; 1382 if (!Line->Tokens.begin()->Tok->is(tok::kw_typedef) && 1383 isC78ParameterDecl(FormatTok)) { 1384 addUnwrappedLine(); 1385 return; 1386 } 1387 break; 1388 case tok::kw_operator: 1389 nextToken(); 1390 if (FormatTok->isBinaryOperator()) 1391 nextToken(); 1392 break; 1393 case tok::caret: 1394 nextToken(); 1395 if (FormatTok->Tok.isAnyIdentifier() || 1396 FormatTok->isSimpleTypeSpecifier()) 1397 nextToken(); 1398 if (FormatTok->is(tok::l_paren)) 1399 parseParens(); 1400 if (FormatTok->is(tok::l_brace)) 1401 parseChildBlock(); 1402 break; 1403 case tok::l_brace: 1404 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1405 // A block outside of parentheses must be the last part of a 1406 // structural element. 1407 // FIXME: Figure out cases where this is not true, and add projections 1408 // for them (the one we know is missing are lambdas). 1409 if (Style.BraceWrapping.AfterFunction) 1410 addUnwrappedLine(); 1411 FormatTok->setType(TT_FunctionLBrace); 1412 parseBlock(/*MustBeDeclaration=*/false); 1413 addUnwrappedLine(); 1414 return; 1415 } 1416 // Otherwise this was a braced init list, and the structural 1417 // element continues. 1418 break; 1419 case tok::kw_try: 1420 if (Style.Language == FormatStyle::LK_JavaScript && 1421 Line->MustBeDeclaration) { 1422 // field/method declaration. 1423 nextToken(); 1424 break; 1425 } 1426 // We arrive here when parsing function-try blocks. 1427 if (Style.BraceWrapping.AfterFunction) 1428 addUnwrappedLine(); 1429 parseTryCatch(); 1430 return; 1431 case tok::identifier: { 1432 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1433 Line->MustBeDeclaration) { 1434 addUnwrappedLine(); 1435 parseCSharpGenericTypeConstraint(); 1436 break; 1437 } 1438 if (FormatTok->is(TT_MacroBlockEnd)) { 1439 addUnwrappedLine(); 1440 return; 1441 } 1442 1443 // Function declarations (as opposed to function expressions) are parsed 1444 // on their own unwrapped line by continuing this loop. Function 1445 // expressions (functions that are not on their own line) must not create 1446 // a new unwrapped line, so they are special cased below. 1447 size_t TokenCount = Line->Tokens.size(); 1448 if (Style.Language == FormatStyle::LK_JavaScript && 1449 FormatTok->is(Keywords.kw_function) && 1450 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1451 Keywords.kw_async)))) { 1452 tryToParseJSFunction(); 1453 break; 1454 } 1455 if ((Style.Language == FormatStyle::LK_JavaScript || 1456 Style.Language == FormatStyle::LK_Java) && 1457 FormatTok->is(Keywords.kw_interface)) { 1458 if (Style.Language == FormatStyle::LK_JavaScript) { 1459 // In JavaScript/TypeScript, "interface" can be used as a standalone 1460 // identifier, e.g. in `var interface = 1;`. If "interface" is 1461 // followed by another identifier, it is very like to be an actual 1462 // interface declaration. 1463 unsigned StoredPosition = Tokens->getPosition(); 1464 FormatToken *Next = Tokens->getNextToken(); 1465 FormatTok = Tokens->setPosition(StoredPosition); 1466 if (Next && !mustBeJSIdent(Keywords, Next)) { 1467 nextToken(); 1468 break; 1469 } 1470 } 1471 parseRecord(); 1472 addUnwrappedLine(); 1473 return; 1474 } 1475 1476 if (FormatTok->is(Keywords.kw_interface)) { 1477 if (parseStructLike()) { 1478 return; 1479 } 1480 break; 1481 } 1482 1483 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1484 parseStatementMacro(); 1485 return; 1486 } 1487 1488 // See if the following token should start a new unwrapped line. 1489 StringRef Text = FormatTok->TokenText; 1490 nextToken(); 1491 1492 // JS doesn't have macros, and within classes colons indicate fields, not 1493 // labels. 1494 if (Style.Language == FormatStyle::LK_JavaScript) 1495 break; 1496 1497 TokenCount = Line->Tokens.size(); 1498 if (TokenCount == 1 || 1499 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1500 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1501 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1502 parseLabel(!Style.IndentGotoLabels); 1503 return; 1504 } 1505 // Recognize function-like macro usages without trailing semicolon as 1506 // well as free-standing macros like Q_OBJECT. 1507 bool FunctionLike = FormatTok->is(tok::l_paren); 1508 if (FunctionLike) 1509 parseParens(); 1510 1511 bool FollowedByNewline = 1512 CommentsBeforeNextToken.empty() 1513 ? FormatTok->NewlinesBefore > 0 1514 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1515 1516 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1517 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1518 addUnwrappedLine(); 1519 return; 1520 } 1521 } 1522 break; 1523 } 1524 case tok::equal: 1525 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1526 // TT_FatArrow. They always start an expression or a child block if 1527 // followed by a curly brace. 1528 if (FormatTok->is(TT_FatArrow)) { 1529 nextToken(); 1530 if (FormatTok->is(tok::l_brace)) { 1531 // C# may break after => if the next character is a newline. 1532 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) { 1533 // calling `addUnwrappedLine()` here causes odd parsing errors. 1534 FormatTok->MustBreakBefore = true; 1535 } 1536 parseChildBlock(); 1537 } 1538 break; 1539 } 1540 1541 nextToken(); 1542 if (FormatTok->Tok.is(tok::l_brace)) { 1543 // Block kind should probably be set to BK_BracedInit for any language. 1544 // C# needs this change to ensure that array initialisers and object 1545 // initialisers are indented the same way. 1546 if (Style.isCSharp()) 1547 FormatTok->setBlockKind(BK_BracedInit); 1548 nextToken(); 1549 parseBracedList(); 1550 } else if (Style.Language == FormatStyle::LK_Proto && 1551 FormatTok->Tok.is(tok::less)) { 1552 nextToken(); 1553 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1554 /*ClosingBraceKind=*/tok::greater); 1555 } 1556 break; 1557 case tok::l_square: 1558 parseSquare(); 1559 break; 1560 case tok::kw_new: 1561 parseNew(); 1562 break; 1563 default: 1564 nextToken(); 1565 break; 1566 } 1567 } while (!eof()); 1568 } 1569 1570 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1571 assert(FormatTok->is(tok::l_brace)); 1572 if (!Style.isCSharp()) 1573 return false; 1574 // See if it's a property accessor. 1575 if (FormatTok->Previous->isNot(tok::identifier)) 1576 return false; 1577 1578 // See if we are inside a property accessor. 1579 // 1580 // Record the current tokenPosition so that we can advance and 1581 // reset the current token. `Next` is not set yet so we need 1582 // another way to advance along the token stream. 1583 unsigned int StoredPosition = Tokens->getPosition(); 1584 FormatToken *Tok = Tokens->getNextToken(); 1585 1586 // A trivial property accessor is of the form: 1587 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } 1588 // Track these as they do not require line breaks to be introduced. 1589 bool HasGetOrSet = false; 1590 bool IsTrivialPropertyAccessor = true; 1591 while (!eof()) { 1592 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1593 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1594 Keywords.kw_set)) { 1595 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) 1596 HasGetOrSet = true; 1597 Tok = Tokens->getNextToken(); 1598 continue; 1599 } 1600 if (Tok->isNot(tok::r_brace)) 1601 IsTrivialPropertyAccessor = false; 1602 break; 1603 } 1604 1605 if (!HasGetOrSet) { 1606 Tokens->setPosition(StoredPosition); 1607 return false; 1608 } 1609 1610 // Try to parse the property accessor: 1611 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1612 Tokens->setPosition(StoredPosition); 1613 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true) 1614 addUnwrappedLine(); 1615 nextToken(); 1616 do { 1617 switch (FormatTok->Tok.getKind()) { 1618 case tok::r_brace: 1619 nextToken(); 1620 if (FormatTok->is(tok::equal)) { 1621 while (!eof() && FormatTok->isNot(tok::semi)) 1622 nextToken(); 1623 nextToken(); 1624 } 1625 addUnwrappedLine(); 1626 return true; 1627 case tok::l_brace: 1628 ++Line->Level; 1629 parseBlock(/*MustBeDeclaration=*/true); 1630 addUnwrappedLine(); 1631 --Line->Level; 1632 break; 1633 case tok::equal: 1634 if (FormatTok->is(TT_FatArrow)) { 1635 ++Line->Level; 1636 do { 1637 nextToken(); 1638 } while (!eof() && FormatTok->isNot(tok::semi)); 1639 nextToken(); 1640 addUnwrappedLine(); 1641 --Line->Level; 1642 break; 1643 } 1644 nextToken(); 1645 break; 1646 default: 1647 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && 1648 !IsTrivialPropertyAccessor) { 1649 // Non-trivial get/set needs to be on its own line. 1650 addUnwrappedLine(); 1651 } 1652 nextToken(); 1653 } 1654 } while (!eof()); 1655 1656 // Unreachable for well-formed code (paired '{' and '}'). 1657 return true; 1658 } 1659 1660 bool UnwrappedLineParser::tryToParseLambda() { 1661 if (!Style.isCpp()) { 1662 nextToken(); 1663 return false; 1664 } 1665 assert(FormatTok->is(tok::l_square)); 1666 FormatToken &LSquare = *FormatTok; 1667 if (!tryToParseLambdaIntroducer()) 1668 return false; 1669 1670 bool SeenArrow = false; 1671 1672 while (FormatTok->isNot(tok::l_brace)) { 1673 if (FormatTok->isSimpleTypeSpecifier()) { 1674 nextToken(); 1675 continue; 1676 } 1677 switch (FormatTok->Tok.getKind()) { 1678 case tok::l_brace: 1679 break; 1680 case tok::l_paren: 1681 parseParens(); 1682 break; 1683 case tok::amp: 1684 case tok::star: 1685 case tok::kw_const: 1686 case tok::comma: 1687 case tok::less: 1688 case tok::greater: 1689 case tok::identifier: 1690 case tok::numeric_constant: 1691 case tok::coloncolon: 1692 case tok::kw_class: 1693 case tok::kw_mutable: 1694 case tok::kw_noexcept: 1695 case tok::kw_template: 1696 case tok::kw_typename: 1697 nextToken(); 1698 break; 1699 // Specialization of a template with an integer parameter can contain 1700 // arithmetic, logical, comparison and ternary operators. 1701 // 1702 // FIXME: This also accepts sequences of operators that are not in the scope 1703 // of a template argument list. 1704 // 1705 // In a C++ lambda a template type can only occur after an arrow. We use 1706 // this as an heuristic to distinguish between Objective-C expressions 1707 // followed by an `a->b` expression, such as: 1708 // ([obj func:arg] + a->b) 1709 // Otherwise the code below would parse as a lambda. 1710 // 1711 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1712 // explicit template lists: []<bool b = true && false>(U &&u){} 1713 case tok::plus: 1714 case tok::minus: 1715 case tok::exclaim: 1716 case tok::tilde: 1717 case tok::slash: 1718 case tok::percent: 1719 case tok::lessless: 1720 case tok::pipe: 1721 case tok::pipepipe: 1722 case tok::ampamp: 1723 case tok::caret: 1724 case tok::equalequal: 1725 case tok::exclaimequal: 1726 case tok::greaterequal: 1727 case tok::lessequal: 1728 case tok::question: 1729 case tok::colon: 1730 case tok::ellipsis: 1731 case tok::kw_true: 1732 case tok::kw_false: 1733 if (SeenArrow) { 1734 nextToken(); 1735 break; 1736 } 1737 return true; 1738 case tok::arrow: 1739 // This might or might not actually be a lambda arrow (this could be an 1740 // ObjC method invocation followed by a dereferencing arrow). We might 1741 // reset this back to TT_Unknown in TokenAnnotator. 1742 FormatTok->setType(TT_LambdaArrow); 1743 SeenArrow = true; 1744 nextToken(); 1745 break; 1746 default: 1747 return true; 1748 } 1749 } 1750 FormatTok->setType(TT_LambdaLBrace); 1751 LSquare.setType(TT_LambdaLSquare); 1752 parseChildBlock(); 1753 return true; 1754 } 1755 1756 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1757 const FormatToken *Previous = FormatTok->Previous; 1758 if (Previous && 1759 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1760 tok::kw_delete, tok::l_square) || 1761 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1762 Previous->isSimpleTypeSpecifier())) { 1763 nextToken(); 1764 return false; 1765 } 1766 nextToken(); 1767 if (FormatTok->is(tok::l_square)) { 1768 return false; 1769 } 1770 parseSquare(/*LambdaIntroducer=*/true); 1771 return true; 1772 } 1773 1774 void UnwrappedLineParser::tryToParseJSFunction() { 1775 assert(FormatTok->is(Keywords.kw_function) || 1776 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1777 if (FormatTok->is(Keywords.kw_async)) 1778 nextToken(); 1779 // Consume "function". 1780 nextToken(); 1781 1782 // Consume * (generator function). Treat it like C++'s overloaded operators. 1783 if (FormatTok->is(tok::star)) { 1784 FormatTok->setType(TT_OverloadedOperator); 1785 nextToken(); 1786 } 1787 1788 // Consume function name. 1789 if (FormatTok->is(tok::identifier)) 1790 nextToken(); 1791 1792 if (FormatTok->isNot(tok::l_paren)) 1793 return; 1794 1795 // Parse formal parameter list. 1796 parseParens(); 1797 1798 if (FormatTok->is(tok::colon)) { 1799 // Parse a type definition. 1800 nextToken(); 1801 1802 // Eat the type declaration. For braced inline object types, balance braces, 1803 // otherwise just parse until finding an l_brace for the function body. 1804 if (FormatTok->is(tok::l_brace)) 1805 tryToParseBracedList(); 1806 else 1807 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1808 nextToken(); 1809 } 1810 1811 if (FormatTok->is(tok::semi)) 1812 return; 1813 1814 parseChildBlock(); 1815 } 1816 1817 bool UnwrappedLineParser::tryToParseBracedList() { 1818 if (FormatTok->is(BK_Unknown)) 1819 calculateBraceTypes(); 1820 assert(FormatTok->isNot(BK_Unknown)); 1821 if (FormatTok->is(BK_Block)) 1822 return false; 1823 nextToken(); 1824 parseBracedList(); 1825 return true; 1826 } 1827 1828 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1829 bool IsEnum, 1830 tok::TokenKind ClosingBraceKind) { 1831 bool HasError = false; 1832 1833 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1834 // replace this by using parseAssignmentExpression() inside. 1835 do { 1836 if (Style.isCSharp()) { 1837 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1838 // TT_FatArrow. They always start an expression or a child block if 1839 // followed by a curly brace. 1840 if (FormatTok->is(TT_FatArrow)) { 1841 nextToken(); 1842 if (FormatTok->is(tok::l_brace)) { 1843 // C# may break after => if the next character is a newline. 1844 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) { 1845 // calling `addUnwrappedLine()` here causes odd parsing errors. 1846 FormatTok->MustBreakBefore = true; 1847 } 1848 parseChildBlock(); 1849 continue; 1850 } 1851 } 1852 } 1853 if (Style.Language == FormatStyle::LK_JavaScript) { 1854 if (FormatTok->is(Keywords.kw_function) || 1855 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1856 tryToParseJSFunction(); 1857 continue; 1858 } 1859 if (FormatTok->is(TT_FatArrow)) { 1860 nextToken(); 1861 // Fat arrows can be followed by simple expressions or by child blocks 1862 // in curly braces. 1863 if (FormatTok->is(tok::l_brace)) { 1864 parseChildBlock(); 1865 continue; 1866 } 1867 } 1868 if (FormatTok->is(tok::l_brace)) { 1869 // Could be a method inside of a braced list `{a() { return 1; }}`. 1870 if (tryToParseBracedList()) 1871 continue; 1872 parseChildBlock(); 1873 } 1874 } 1875 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1876 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 1877 addUnwrappedLine(); 1878 nextToken(); 1879 return !HasError; 1880 } 1881 switch (FormatTok->Tok.getKind()) { 1882 case tok::caret: 1883 nextToken(); 1884 if (FormatTok->is(tok::l_brace)) { 1885 parseChildBlock(); 1886 } 1887 break; 1888 case tok::l_square: 1889 if (Style.isCSharp()) 1890 parseSquare(); 1891 else 1892 tryToParseLambda(); 1893 break; 1894 case tok::l_paren: 1895 parseParens(); 1896 // JavaScript can just have free standing methods and getters/setters in 1897 // object literals. Detect them by a "{" following ")". 1898 if (Style.Language == FormatStyle::LK_JavaScript) { 1899 if (FormatTok->is(tok::l_brace)) 1900 parseChildBlock(); 1901 break; 1902 } 1903 break; 1904 case tok::l_brace: 1905 // Assume there are no blocks inside a braced init list apart 1906 // from the ones we explicitly parse out (like lambdas). 1907 FormatTok->setBlockKind(BK_BracedInit); 1908 nextToken(); 1909 parseBracedList(); 1910 break; 1911 case tok::less: 1912 if (Style.Language == FormatStyle::LK_Proto) { 1913 nextToken(); 1914 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1915 /*ClosingBraceKind=*/tok::greater); 1916 } else { 1917 nextToken(); 1918 } 1919 break; 1920 case tok::semi: 1921 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1922 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1923 // used for error recovery if we have otherwise determined that this is 1924 // a braced list. 1925 if (Style.Language == FormatStyle::LK_JavaScript) { 1926 nextToken(); 1927 break; 1928 } 1929 HasError = true; 1930 if (!ContinueOnSemicolons) 1931 return !HasError; 1932 nextToken(); 1933 break; 1934 case tok::comma: 1935 nextToken(); 1936 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 1937 addUnwrappedLine(); 1938 break; 1939 default: 1940 nextToken(); 1941 break; 1942 } 1943 } while (!eof()); 1944 return false; 1945 } 1946 1947 void UnwrappedLineParser::parseParens() { 1948 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1949 nextToken(); 1950 do { 1951 switch (FormatTok->Tok.getKind()) { 1952 case tok::l_paren: 1953 parseParens(); 1954 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1955 parseChildBlock(); 1956 break; 1957 case tok::r_paren: 1958 nextToken(); 1959 return; 1960 case tok::r_brace: 1961 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1962 return; 1963 case tok::l_square: 1964 tryToParseLambda(); 1965 break; 1966 case tok::l_brace: 1967 if (!tryToParseBracedList()) 1968 parseChildBlock(); 1969 break; 1970 case tok::at: 1971 nextToken(); 1972 if (FormatTok->Tok.is(tok::l_brace)) { 1973 nextToken(); 1974 parseBracedList(); 1975 } 1976 break; 1977 case tok::equal: 1978 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 1979 parseStructuralElement(); 1980 else 1981 nextToken(); 1982 break; 1983 case tok::kw_class: 1984 if (Style.Language == FormatStyle::LK_JavaScript) 1985 parseRecord(/*ParseAsExpr=*/true); 1986 else 1987 nextToken(); 1988 break; 1989 case tok::identifier: 1990 if (Style.Language == FormatStyle::LK_JavaScript && 1991 (FormatTok->is(Keywords.kw_function) || 1992 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1993 tryToParseJSFunction(); 1994 else 1995 nextToken(); 1996 break; 1997 default: 1998 nextToken(); 1999 break; 2000 } 2001 } while (!eof()); 2002 } 2003 2004 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2005 if (!LambdaIntroducer) { 2006 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 2007 if (tryToParseLambda()) 2008 return; 2009 } 2010 do { 2011 switch (FormatTok->Tok.getKind()) { 2012 case tok::l_paren: 2013 parseParens(); 2014 break; 2015 case tok::r_square: 2016 nextToken(); 2017 return; 2018 case tok::r_brace: 2019 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2020 return; 2021 case tok::l_square: 2022 parseSquare(); 2023 break; 2024 case tok::l_brace: { 2025 if (!tryToParseBracedList()) 2026 parseChildBlock(); 2027 break; 2028 } 2029 case tok::at: 2030 nextToken(); 2031 if (FormatTok->Tok.is(tok::l_brace)) { 2032 nextToken(); 2033 parseBracedList(); 2034 } 2035 break; 2036 default: 2037 nextToken(); 2038 break; 2039 } 2040 } while (!eof()); 2041 } 2042 2043 void UnwrappedLineParser::parseIfThenElse() { 2044 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 2045 nextToken(); 2046 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 2047 nextToken(); 2048 if (FormatTok->Tok.is(tok::l_paren)) 2049 parseParens(); 2050 // handle [[likely]] / [[unlikely]] 2051 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) 2052 parseSquare(); 2053 bool NeedsUnwrappedLine = false; 2054 if (FormatTok->Tok.is(tok::l_brace)) { 2055 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2056 parseBlock(/*MustBeDeclaration=*/false); 2057 if (Style.BraceWrapping.BeforeElse) 2058 addUnwrappedLine(); 2059 else 2060 NeedsUnwrappedLine = true; 2061 } else { 2062 addUnwrappedLine(); 2063 ++Line->Level; 2064 parseStructuralElement(); 2065 --Line->Level; 2066 } 2067 if (FormatTok->Tok.is(tok::kw_else)) { 2068 nextToken(); 2069 // handle [[likely]] / [[unlikely]] 2070 if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute()) 2071 parseSquare(); 2072 if (FormatTok->Tok.is(tok::l_brace)) { 2073 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2074 parseBlock(/*MustBeDeclaration=*/false); 2075 addUnwrappedLine(); 2076 } else if (FormatTok->Tok.is(tok::kw_if)) { 2077 FormatToken *Previous = AllTokens[Tokens->getPosition() - 1]; 2078 bool PrecededByComment = Previous->is(tok::comment); 2079 if (PrecededByComment) { 2080 addUnwrappedLine(); 2081 ++Line->Level; 2082 } 2083 parseIfThenElse(); 2084 if (PrecededByComment) 2085 --Line->Level; 2086 } else { 2087 addUnwrappedLine(); 2088 ++Line->Level; 2089 parseStructuralElement(); 2090 if (FormatTok->is(tok::eof)) 2091 addUnwrappedLine(); 2092 --Line->Level; 2093 } 2094 } else if (NeedsUnwrappedLine) { 2095 addUnwrappedLine(); 2096 } 2097 } 2098 2099 void UnwrappedLineParser::parseTryCatch() { 2100 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2101 nextToken(); 2102 bool NeedsUnwrappedLine = false; 2103 if (FormatTok->is(tok::colon)) { 2104 // We are in a function try block, what comes is an initializer list. 2105 nextToken(); 2106 2107 // In case identifiers were removed by clang-tidy, what might follow is 2108 // multiple commas in sequence - before the first identifier. 2109 while (FormatTok->is(tok::comma)) 2110 nextToken(); 2111 2112 while (FormatTok->is(tok::identifier)) { 2113 nextToken(); 2114 if (FormatTok->is(tok::l_paren)) 2115 parseParens(); 2116 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2117 FormatTok->is(tok::l_brace)) { 2118 do { 2119 nextToken(); 2120 } while (!FormatTok->is(tok::r_brace)); 2121 nextToken(); 2122 } 2123 2124 // In case identifiers were removed by clang-tidy, what might follow is 2125 // multiple commas in sequence - after the first identifier. 2126 while (FormatTok->is(tok::comma)) 2127 nextToken(); 2128 } 2129 } 2130 // Parse try with resource. 2131 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 2132 parseParens(); 2133 } 2134 if (FormatTok->is(tok::l_brace)) { 2135 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2136 parseBlock(/*MustBeDeclaration=*/false); 2137 if (Style.BraceWrapping.BeforeCatch) { 2138 addUnwrappedLine(); 2139 } else { 2140 NeedsUnwrappedLine = true; 2141 } 2142 } else if (!FormatTok->is(tok::kw_catch)) { 2143 // The C++ standard requires a compound-statement after a try. 2144 // If there's none, we try to assume there's a structuralElement 2145 // and try to continue. 2146 addUnwrappedLine(); 2147 ++Line->Level; 2148 parseStructuralElement(); 2149 --Line->Level; 2150 } 2151 while (1) { 2152 if (FormatTok->is(tok::at)) 2153 nextToken(); 2154 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2155 tok::kw___finally) || 2156 ((Style.Language == FormatStyle::LK_Java || 2157 Style.Language == FormatStyle::LK_JavaScript) && 2158 FormatTok->is(Keywords.kw_finally)) || 2159 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 2160 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 2161 break; 2162 nextToken(); 2163 while (FormatTok->isNot(tok::l_brace)) { 2164 if (FormatTok->is(tok::l_paren)) { 2165 parseParens(); 2166 continue; 2167 } 2168 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 2169 return; 2170 nextToken(); 2171 } 2172 NeedsUnwrappedLine = false; 2173 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2174 parseBlock(/*MustBeDeclaration=*/false); 2175 if (Style.BraceWrapping.BeforeCatch) 2176 addUnwrappedLine(); 2177 else 2178 NeedsUnwrappedLine = true; 2179 } 2180 if (NeedsUnwrappedLine) 2181 addUnwrappedLine(); 2182 } 2183 2184 void UnwrappedLineParser::parseNamespace() { 2185 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2186 "'namespace' expected"); 2187 2188 const FormatToken &InitialToken = *FormatTok; 2189 nextToken(); 2190 if (InitialToken.is(TT_NamespaceMacro)) { 2191 parseParens(); 2192 } else { 2193 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2194 tok::l_square)) { 2195 if (FormatTok->is(tok::l_square)) 2196 parseSquare(); 2197 else 2198 nextToken(); 2199 } 2200 } 2201 if (FormatTok->Tok.is(tok::l_brace)) { 2202 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2203 addUnwrappedLine(); 2204 2205 unsigned AddLevels = 2206 Style.NamespaceIndentation == FormatStyle::NI_All || 2207 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2208 DeclarationScopeStack.size() > 1) 2209 ? 1u 2210 : 0u; 2211 bool ManageWhitesmithsBraces = 2212 AddLevels == 0u && 2213 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2214 2215 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2216 // the whole block. 2217 if (ManageWhitesmithsBraces) 2218 ++Line->Level; 2219 2220 parseBlock(/*MustBeDeclaration=*/true, AddLevels, 2221 /*MunchSemi=*/true, 2222 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); 2223 2224 // Munch the semicolon after a namespace. This is more common than one would 2225 // think. Putting the semicolon into its own line is very ugly. 2226 if (FormatTok->Tok.is(tok::semi)) 2227 nextToken(); 2228 2229 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2230 2231 if (ManageWhitesmithsBraces) 2232 --Line->Level; 2233 } 2234 // FIXME: Add error handling. 2235 } 2236 2237 void UnwrappedLineParser::parseNew() { 2238 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2239 nextToken(); 2240 2241 if (Style.isCSharp()) { 2242 do { 2243 if (FormatTok->is(tok::l_brace)) 2244 parseBracedList(); 2245 2246 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2247 return; 2248 2249 nextToken(); 2250 } while (!eof()); 2251 } 2252 2253 if (Style.Language != FormatStyle::LK_Java) 2254 return; 2255 2256 // In Java, we can parse everything up to the parens, which aren't optional. 2257 do { 2258 // There should not be a ;, { or } before the new's open paren. 2259 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2260 return; 2261 2262 // Consume the parens. 2263 if (FormatTok->is(tok::l_paren)) { 2264 parseParens(); 2265 2266 // If there is a class body of an anonymous class, consume that as child. 2267 if (FormatTok->is(tok::l_brace)) 2268 parseChildBlock(); 2269 return; 2270 } 2271 nextToken(); 2272 } while (!eof()); 2273 } 2274 2275 void UnwrappedLineParser::parseForOrWhileLoop() { 2276 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2277 "'for', 'while' or foreach macro expected"); 2278 nextToken(); 2279 // JS' for await ( ... 2280 if (Style.Language == FormatStyle::LK_JavaScript && 2281 FormatTok->is(Keywords.kw_await)) 2282 nextToken(); 2283 if (FormatTok->Tok.is(tok::l_paren)) 2284 parseParens(); 2285 if (FormatTok->Tok.is(tok::l_brace)) { 2286 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2287 parseBlock(/*MustBeDeclaration=*/false); 2288 addUnwrappedLine(); 2289 } else { 2290 addUnwrappedLine(); 2291 ++Line->Level; 2292 parseStructuralElement(); 2293 --Line->Level; 2294 } 2295 } 2296 2297 void UnwrappedLineParser::parseDoWhile() { 2298 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 2299 nextToken(); 2300 if (FormatTok->Tok.is(tok::l_brace)) { 2301 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2302 parseBlock(/*MustBeDeclaration=*/false); 2303 if (Style.BraceWrapping.BeforeWhile) 2304 addUnwrappedLine(); 2305 } else { 2306 addUnwrappedLine(); 2307 ++Line->Level; 2308 parseStructuralElement(); 2309 --Line->Level; 2310 } 2311 2312 // FIXME: Add error handling. 2313 if (!FormatTok->Tok.is(tok::kw_while)) { 2314 addUnwrappedLine(); 2315 return; 2316 } 2317 2318 // If in Whitesmiths mode, the line with the while() needs to be indented 2319 // to the same level as the block. 2320 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2321 ++Line->Level; 2322 2323 nextToken(); 2324 parseStructuralElement(); 2325 } 2326 2327 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2328 nextToken(); 2329 unsigned OldLineLevel = Line->Level; 2330 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2331 --Line->Level; 2332 if (LeftAlignLabel) 2333 Line->Level = 0; 2334 2335 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2336 FormatTok->Tok.is(tok::l_brace)) { 2337 2338 CompoundStatementIndenter Indenter(this, Line->Level, 2339 Style.BraceWrapping.AfterCaseLabel, 2340 Style.BraceWrapping.IndentBraces); 2341 parseBlock(/*MustBeDeclaration=*/false); 2342 if (FormatTok->Tok.is(tok::kw_break)) { 2343 if (Style.BraceWrapping.AfterControlStatement == 2344 FormatStyle::BWACS_Always) { 2345 addUnwrappedLine(); 2346 if (!Style.IndentCaseBlocks && 2347 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2348 Line->Level++; 2349 } 2350 } 2351 parseStructuralElement(); 2352 } 2353 addUnwrappedLine(); 2354 } else { 2355 if (FormatTok->is(tok::semi)) 2356 nextToken(); 2357 addUnwrappedLine(); 2358 } 2359 Line->Level = OldLineLevel; 2360 if (FormatTok->isNot(tok::l_brace)) { 2361 parseStructuralElement(); 2362 addUnwrappedLine(); 2363 } 2364 } 2365 2366 void UnwrappedLineParser::parseCaseLabel() { 2367 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2368 2369 // FIXME: fix handling of complex expressions here. 2370 do { 2371 nextToken(); 2372 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2373 parseLabel(); 2374 } 2375 2376 void UnwrappedLineParser::parseSwitch() { 2377 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2378 nextToken(); 2379 if (FormatTok->Tok.is(tok::l_paren)) 2380 parseParens(); 2381 if (FormatTok->Tok.is(tok::l_brace)) { 2382 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2383 parseBlock(/*MustBeDeclaration=*/false); 2384 addUnwrappedLine(); 2385 } else { 2386 addUnwrappedLine(); 2387 ++Line->Level; 2388 parseStructuralElement(); 2389 --Line->Level; 2390 } 2391 } 2392 2393 void UnwrappedLineParser::parseAccessSpecifier() { 2394 nextToken(); 2395 // Understand Qt's slots. 2396 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2397 nextToken(); 2398 // Otherwise, we don't know what it is, and we'd better keep the next token. 2399 if (FormatTok->Tok.is(tok::colon)) 2400 nextToken(); 2401 addUnwrappedLine(); 2402 } 2403 2404 void UnwrappedLineParser::parseConcept() { 2405 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); 2406 nextToken(); 2407 if (!FormatTok->Tok.is(tok::identifier)) 2408 return; 2409 nextToken(); 2410 if (!FormatTok->Tok.is(tok::equal)) 2411 return; 2412 nextToken(); 2413 if (FormatTok->Tok.is(tok::kw_requires)) { 2414 nextToken(); 2415 parseRequiresExpression(Line->Level); 2416 } else { 2417 parseConstraintExpression(Line->Level); 2418 } 2419 } 2420 2421 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) { 2422 // requires (R range) 2423 if (FormatTok->Tok.is(tok::l_paren)) { 2424 parseParens(); 2425 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2426 addUnwrappedLine(); 2427 --Line->Level; 2428 } 2429 } 2430 2431 if (FormatTok->Tok.is(tok::l_brace)) { 2432 if (Style.BraceWrapping.AfterFunction) 2433 addUnwrappedLine(); 2434 FormatTok->setType(TT_FunctionLBrace); 2435 parseBlock(/*MustBeDeclaration=*/false); 2436 addUnwrappedLine(); 2437 } else { 2438 parseConstraintExpression(OriginalLevel); 2439 } 2440 } 2441 2442 void UnwrappedLineParser::parseConstraintExpression( 2443 unsigned int OriginalLevel) { 2444 // requires Id<T> && Id<T> || Id<T> 2445 while ( 2446 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) { 2447 nextToken(); 2448 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less, 2449 tok::greater, tok::comma, tok::ellipsis)) { 2450 if (FormatTok->Tok.is(tok::less)) { 2451 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2452 /*ClosingBraceKind=*/tok::greater); 2453 continue; 2454 } 2455 nextToken(); 2456 } 2457 if (FormatTok->Tok.is(tok::kw_requires)) { 2458 parseRequiresExpression(OriginalLevel); 2459 } 2460 if (FormatTok->Tok.is(tok::less)) { 2461 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2462 /*ClosingBraceKind=*/tok::greater); 2463 } 2464 2465 if (FormatTok->Tok.is(tok::l_paren)) { 2466 parseParens(); 2467 } 2468 if (FormatTok->Tok.is(tok::l_brace)) { 2469 if (Style.BraceWrapping.AfterFunction) 2470 addUnwrappedLine(); 2471 FormatTok->setType(TT_FunctionLBrace); 2472 parseBlock(/*MustBeDeclaration=*/false); 2473 } 2474 if (FormatTok->Tok.is(tok::semi)) { 2475 // Eat any trailing semi. 2476 nextToken(); 2477 addUnwrappedLine(); 2478 } 2479 if (FormatTok->Tok.is(tok::colon)) { 2480 return; 2481 } 2482 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { 2483 if (FormatTok->Previous && 2484 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires, 2485 tok::coloncolon)) { 2486 addUnwrappedLine(); 2487 } 2488 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2489 --Line->Level; 2490 } 2491 break; 2492 } else { 2493 FormatTok->setType(TT_ConstraintJunctions); 2494 } 2495 2496 nextToken(); 2497 } 2498 } 2499 2500 void UnwrappedLineParser::parseRequires() { 2501 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); 2502 2503 unsigned OriginalLevel = Line->Level; 2504 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { 2505 addUnwrappedLine(); 2506 if (Style.IndentRequires) { 2507 Line->Level++; 2508 } 2509 } 2510 nextToken(); 2511 2512 parseRequiresExpression(OriginalLevel); 2513 } 2514 2515 bool UnwrappedLineParser::parseEnum() { 2516 // Won't be 'enum' for NS_ENUMs. 2517 if (FormatTok->Tok.is(tok::kw_enum)) 2518 nextToken(); 2519 2520 const FormatToken &InitialToken = *FormatTok; 2521 2522 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2523 // declarations. An "enum" keyword followed by a colon would be a syntax 2524 // error and thus assume it is just an identifier. 2525 if (Style.Language == FormatStyle::LK_JavaScript && 2526 FormatTok->isOneOf(tok::colon, tok::question)) 2527 return false; 2528 2529 // In protobuf, "enum" can be used as a field name. 2530 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2531 return false; 2532 2533 // Eat up enum class ... 2534 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2535 nextToken(); 2536 2537 while (FormatTok->Tok.getIdentifierInfo() || 2538 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2539 tok::greater, tok::comma, tok::question)) { 2540 nextToken(); 2541 // We can have macros or attributes in between 'enum' and the enum name. 2542 if (FormatTok->is(tok::l_paren)) 2543 parseParens(); 2544 if (FormatTok->is(tok::identifier)) { 2545 nextToken(); 2546 // If there are two identifiers in a row, this is likely an elaborate 2547 // return type. In Java, this can be "implements", etc. 2548 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2549 return false; 2550 } 2551 } 2552 2553 // Just a declaration or something is wrong. 2554 if (FormatTok->isNot(tok::l_brace)) 2555 return true; 2556 FormatTok->setBlockKind(BK_Block); 2557 2558 if (Style.Language == FormatStyle::LK_Java) { 2559 // Java enums are different. 2560 parseJavaEnumBody(); 2561 return true; 2562 } 2563 if (Style.Language == FormatStyle::LK_Proto) { 2564 parseBlock(/*MustBeDeclaration=*/true); 2565 return true; 2566 } 2567 2568 if (!Style.AllowShortEnumsOnASingleLine && 2569 ShouldBreakBeforeBrace(Style, InitialToken)) 2570 addUnwrappedLine(); 2571 // Parse enum body. 2572 nextToken(); 2573 if (!Style.AllowShortEnumsOnASingleLine) { 2574 addUnwrappedLine(); 2575 Line->Level += 1; 2576 } 2577 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 2578 /*IsEnum=*/true); 2579 if (!Style.AllowShortEnumsOnASingleLine) 2580 Line->Level -= 1; 2581 if (HasError) { 2582 if (FormatTok->is(tok::semi)) 2583 nextToken(); 2584 addUnwrappedLine(); 2585 } 2586 return true; 2587 2588 // There is no addUnwrappedLine() here so that we fall through to parsing a 2589 // structural element afterwards. Thus, in "enum A {} n, m;", 2590 // "} n, m;" will end up in one unwrapped line. 2591 } 2592 2593 bool UnwrappedLineParser::parseStructLike() { 2594 // parseRecord falls through and does not yet add an unwrapped line as a 2595 // record declaration or definition can start a structural element. 2596 parseRecord(); 2597 // This does not apply to Java, JavaScript and C#. 2598 if (Style.Language == FormatStyle::LK_Java || 2599 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { 2600 if (FormatTok->is(tok::semi)) 2601 nextToken(); 2602 addUnwrappedLine(); 2603 return true; 2604 } 2605 return false; 2606 } 2607 2608 namespace { 2609 // A class used to set and restore the Token position when peeking 2610 // ahead in the token source. 2611 class ScopedTokenPosition { 2612 unsigned StoredPosition; 2613 FormatTokenSource *Tokens; 2614 2615 public: 2616 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 2617 assert(Tokens && "Tokens expected to not be null"); 2618 StoredPosition = Tokens->getPosition(); 2619 } 2620 2621 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 2622 }; 2623 } // namespace 2624 2625 // Look to see if we have [[ by looking ahead, if 2626 // its not then rewind to the original position. 2627 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 2628 ScopedTokenPosition AutoPosition(Tokens); 2629 FormatToken *Tok = Tokens->getNextToken(); 2630 // We already read the first [ check for the second. 2631 if (Tok && !Tok->is(tok::l_square)) { 2632 return false; 2633 } 2634 // Double check that the attribute is just something 2635 // fairly simple. 2636 while (Tok) { 2637 if (Tok->is(tok::r_square)) { 2638 break; 2639 } 2640 Tok = Tokens->getNextToken(); 2641 } 2642 Tok = Tokens->getNextToken(); 2643 if (Tok && !Tok->is(tok::r_square)) { 2644 return false; 2645 } 2646 Tok = Tokens->getNextToken(); 2647 if (Tok && Tok->is(tok::semi)) { 2648 return false; 2649 } 2650 return true; 2651 } 2652 2653 void UnwrappedLineParser::parseJavaEnumBody() { 2654 // Determine whether the enum is simple, i.e. does not have a semicolon or 2655 // constants with class bodies. Simple enums can be formatted like braced 2656 // lists, contracted to a single line, etc. 2657 unsigned StoredPosition = Tokens->getPosition(); 2658 bool IsSimple = true; 2659 FormatToken *Tok = Tokens->getNextToken(); 2660 while (Tok) { 2661 if (Tok->is(tok::r_brace)) 2662 break; 2663 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2664 IsSimple = false; 2665 break; 2666 } 2667 // FIXME: This will also mark enums with braces in the arguments to enum 2668 // constants as "not simple". This is probably fine in practice, though. 2669 Tok = Tokens->getNextToken(); 2670 } 2671 FormatTok = Tokens->setPosition(StoredPosition); 2672 2673 if (IsSimple) { 2674 nextToken(); 2675 parseBracedList(); 2676 addUnwrappedLine(); 2677 return; 2678 } 2679 2680 // Parse the body of a more complex enum. 2681 // First add a line for everything up to the "{". 2682 nextToken(); 2683 addUnwrappedLine(); 2684 ++Line->Level; 2685 2686 // Parse the enum constants. 2687 while (FormatTok) { 2688 if (FormatTok->is(tok::l_brace)) { 2689 // Parse the constant's class body. 2690 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 2691 /*MunchSemi=*/false); 2692 } else if (FormatTok->is(tok::l_paren)) { 2693 parseParens(); 2694 } else if (FormatTok->is(tok::comma)) { 2695 nextToken(); 2696 addUnwrappedLine(); 2697 } else if (FormatTok->is(tok::semi)) { 2698 nextToken(); 2699 addUnwrappedLine(); 2700 break; 2701 } else if (FormatTok->is(tok::r_brace)) { 2702 addUnwrappedLine(); 2703 break; 2704 } else { 2705 nextToken(); 2706 } 2707 } 2708 2709 // Parse the class body after the enum's ";" if any. 2710 parseLevel(/*HasOpeningBrace=*/true); 2711 nextToken(); 2712 --Line->Level; 2713 addUnwrappedLine(); 2714 } 2715 2716 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2717 const FormatToken &InitialToken = *FormatTok; 2718 nextToken(); 2719 2720 // The actual identifier can be a nested name specifier, and in macros 2721 // it is often token-pasted. 2722 // An [[attribute]] can be before the identifier. 2723 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2724 tok::kw___attribute, tok::kw___declspec, 2725 tok::kw_alignas, tok::l_square, tok::r_square) || 2726 ((Style.Language == FormatStyle::LK_Java || 2727 Style.Language == FormatStyle::LK_JavaScript) && 2728 FormatTok->isOneOf(tok::period, tok::comma))) { 2729 if (Style.Language == FormatStyle::LK_JavaScript && 2730 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2731 // JavaScript/TypeScript supports inline object types in 2732 // extends/implements positions: 2733 // class Foo implements {bar: number} { } 2734 nextToken(); 2735 if (FormatTok->is(tok::l_brace)) { 2736 tryToParseBracedList(); 2737 continue; 2738 } 2739 } 2740 bool IsNonMacroIdentifier = 2741 FormatTok->is(tok::identifier) && 2742 FormatTok->TokenText != FormatTok->TokenText.upper(); 2743 nextToken(); 2744 // We can have macros or attributes in between 'class' and the class name. 2745 if (!IsNonMacroIdentifier) { 2746 if (FormatTok->Tok.is(tok::l_paren)) { 2747 parseParens(); 2748 } else if (FormatTok->is(TT_AttributeSquare)) { 2749 parseSquare(); 2750 // Consume the closing TT_AttributeSquare. 2751 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 2752 nextToken(); 2753 } 2754 } 2755 } 2756 2757 // Note that parsing away template declarations here leads to incorrectly 2758 // accepting function declarations as record declarations. 2759 // In general, we cannot solve this problem. Consider: 2760 // class A<int> B() {} 2761 // which can be a function definition or a class definition when B() is a 2762 // macro. If we find enough real-world cases where this is a problem, we 2763 // can parse for the 'template' keyword in the beginning of the statement, 2764 // and thus rule out the record production in case there is no template 2765 // (this would still leave us with an ambiguity between template function 2766 // and class declarations). 2767 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2768 while (!eof()) { 2769 if (FormatTok->is(tok::l_brace)) { 2770 calculateBraceTypes(/*ExpectClassBody=*/true); 2771 if (!tryToParseBracedList()) 2772 break; 2773 } 2774 if (FormatTok->Tok.is(tok::semi)) 2775 return; 2776 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 2777 addUnwrappedLine(); 2778 nextToken(); 2779 parseCSharpGenericTypeConstraint(); 2780 break; 2781 } 2782 nextToken(); 2783 } 2784 } 2785 if (FormatTok->Tok.is(tok::l_brace)) { 2786 if (ParseAsExpr) { 2787 parseChildBlock(); 2788 } else { 2789 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2790 addUnwrappedLine(); 2791 2792 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 2793 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 2794 } 2795 } 2796 // There is no addUnwrappedLine() here so that we fall through to parsing a 2797 // structural element afterwards. Thus, in "class A {} n, m;", 2798 // "} n, m;" will end up in one unwrapped line. 2799 } 2800 2801 void UnwrappedLineParser::parseObjCMethod() { 2802 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2803 "'(' or identifier expected."); 2804 do { 2805 if (FormatTok->Tok.is(tok::semi)) { 2806 nextToken(); 2807 addUnwrappedLine(); 2808 return; 2809 } else if (FormatTok->Tok.is(tok::l_brace)) { 2810 if (Style.BraceWrapping.AfterFunction) 2811 addUnwrappedLine(); 2812 parseBlock(/*MustBeDeclaration=*/false); 2813 addUnwrappedLine(); 2814 return; 2815 } else { 2816 nextToken(); 2817 } 2818 } while (!eof()); 2819 } 2820 2821 void UnwrappedLineParser::parseObjCProtocolList() { 2822 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2823 do { 2824 nextToken(); 2825 // Early exit in case someone forgot a close angle. 2826 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2827 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2828 return; 2829 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2830 nextToken(); // Skip '>'. 2831 } 2832 2833 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2834 do { 2835 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2836 nextToken(); 2837 addUnwrappedLine(); 2838 break; 2839 } 2840 if (FormatTok->is(tok::l_brace)) { 2841 parseBlock(/*MustBeDeclaration=*/false); 2842 // In ObjC interfaces, nothing should be following the "}". 2843 addUnwrappedLine(); 2844 } else if (FormatTok->is(tok::r_brace)) { 2845 // Ignore stray "}". parseStructuralElement doesn't consume them. 2846 nextToken(); 2847 addUnwrappedLine(); 2848 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2849 nextToken(); 2850 parseObjCMethod(); 2851 } else { 2852 parseStructuralElement(); 2853 } 2854 } while (!eof()); 2855 } 2856 2857 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2858 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2859 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2860 nextToken(); 2861 nextToken(); // interface name 2862 2863 // @interface can be followed by a lightweight generic 2864 // specialization list, then either a base class or a category. 2865 if (FormatTok->Tok.is(tok::less)) { 2866 parseObjCLightweightGenerics(); 2867 } 2868 if (FormatTok->Tok.is(tok::colon)) { 2869 nextToken(); 2870 nextToken(); // base class name 2871 // The base class can also have lightweight generics applied to it. 2872 if (FormatTok->Tok.is(tok::less)) { 2873 parseObjCLightweightGenerics(); 2874 } 2875 } else if (FormatTok->Tok.is(tok::l_paren)) 2876 // Skip category, if present. 2877 parseParens(); 2878 2879 if (FormatTok->Tok.is(tok::less)) 2880 parseObjCProtocolList(); 2881 2882 if (FormatTok->Tok.is(tok::l_brace)) { 2883 if (Style.BraceWrapping.AfterObjCDeclaration) 2884 addUnwrappedLine(); 2885 parseBlock(/*MustBeDeclaration=*/true); 2886 } 2887 2888 // With instance variables, this puts '}' on its own line. Without instance 2889 // variables, this ends the @interface line. 2890 addUnwrappedLine(); 2891 2892 parseObjCUntilAtEnd(); 2893 } 2894 2895 void UnwrappedLineParser::parseObjCLightweightGenerics() { 2896 assert(FormatTok->Tok.is(tok::less)); 2897 // Unlike protocol lists, generic parameterizations support 2898 // nested angles: 2899 // 2900 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2901 // NSObject <NSCopying, NSSecureCoding> 2902 // 2903 // so we need to count how many open angles we have left. 2904 unsigned NumOpenAngles = 1; 2905 do { 2906 nextToken(); 2907 // Early exit in case someone forgot a close angle. 2908 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2909 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2910 break; 2911 if (FormatTok->Tok.is(tok::less)) 2912 ++NumOpenAngles; 2913 else if (FormatTok->Tok.is(tok::greater)) { 2914 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2915 --NumOpenAngles; 2916 } 2917 } while (!eof() && NumOpenAngles != 0); 2918 nextToken(); // Skip '>'. 2919 } 2920 2921 // Returns true for the declaration/definition form of @protocol, 2922 // false for the expression form. 2923 bool UnwrappedLineParser::parseObjCProtocol() { 2924 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2925 nextToken(); 2926 2927 if (FormatTok->is(tok::l_paren)) 2928 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2929 return false; 2930 2931 // The definition/declaration form, 2932 // @protocol Foo 2933 // - (int)someMethod; 2934 // @end 2935 2936 nextToken(); // protocol name 2937 2938 if (FormatTok->Tok.is(tok::less)) 2939 parseObjCProtocolList(); 2940 2941 // Check for protocol declaration. 2942 if (FormatTok->Tok.is(tok::semi)) { 2943 nextToken(); 2944 addUnwrappedLine(); 2945 return true; 2946 } 2947 2948 addUnwrappedLine(); 2949 parseObjCUntilAtEnd(); 2950 return true; 2951 } 2952 2953 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2954 bool IsImport = FormatTok->is(Keywords.kw_import); 2955 assert(IsImport || FormatTok->is(tok::kw_export)); 2956 nextToken(); 2957 2958 // Consume the "default" in "export default class/function". 2959 if (FormatTok->is(tok::kw_default)) 2960 nextToken(); 2961 2962 // Consume "async function", "function" and "default function", so that these 2963 // get parsed as free-standing JS functions, i.e. do not require a trailing 2964 // semicolon. 2965 if (FormatTok->is(Keywords.kw_async)) 2966 nextToken(); 2967 if (FormatTok->is(Keywords.kw_function)) { 2968 nextToken(); 2969 return; 2970 } 2971 2972 // For imports, `export *`, `export {...}`, consume the rest of the line up 2973 // to the terminating `;`. For everything else, just return and continue 2974 // parsing the structural element, i.e. the declaration or expression for 2975 // `export default`. 2976 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2977 !FormatTok->isStringLiteral()) 2978 return; 2979 2980 while (!eof()) { 2981 if (FormatTok->is(tok::semi)) 2982 return; 2983 if (Line->Tokens.empty()) { 2984 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2985 // import statement should terminate. 2986 return; 2987 } 2988 if (FormatTok->is(tok::l_brace)) { 2989 FormatTok->setBlockKind(BK_Block); 2990 nextToken(); 2991 parseBracedList(); 2992 } else { 2993 nextToken(); 2994 } 2995 } 2996 } 2997 2998 void UnwrappedLineParser::parseStatementMacro() { 2999 nextToken(); 3000 if (FormatTok->is(tok::l_paren)) 3001 parseParens(); 3002 if (FormatTok->is(tok::semi)) 3003 nextToken(); 3004 addUnwrappedLine(); 3005 } 3006 3007 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3008 StringRef Prefix = "") { 3009 llvm::dbgs() << Prefix << "Line(" << Line.Level 3010 << ", FSC=" << Line.FirstStartColumn << ")" 3011 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3012 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 3013 E = Line.Tokens.end(); 3014 I != E; ++I) { 3015 llvm::dbgs() << I->Tok->Tok.getName() << "[" 3016 << "T=" << (unsigned)I->Tok->getType() 3017 << ", OC=" << I->Tok->OriginalColumn << "] "; 3018 } 3019 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 3020 E = Line.Tokens.end(); 3021 I != E; ++I) { 3022 const UnwrappedLineNode &Node = *I; 3023 for (SmallVectorImpl<UnwrappedLine>::const_iterator 3024 I = Node.Children.begin(), 3025 E = Node.Children.end(); 3026 I != E; ++I) { 3027 printDebugInfo(*I, "\nChild: "); 3028 } 3029 } 3030 llvm::dbgs() << "\n"; 3031 } 3032 3033 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3034 if (Line->Tokens.empty()) 3035 return; 3036 LLVM_DEBUG({ 3037 if (CurrentLines == &Lines) 3038 printDebugInfo(*Line); 3039 }); 3040 3041 // If this line closes a block when in Whitesmiths mode, remember that 3042 // information so that the level can be decreased after the line is added. 3043 // This has to happen after the addition of the line since the line itself 3044 // needs to be indented. 3045 bool ClosesWhitesmithsBlock = 3046 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3047 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3048 3049 CurrentLines->push_back(std::move(*Line)); 3050 Line->Tokens.clear(); 3051 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3052 Line->FirstStartColumn = 0; 3053 3054 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3055 --Line->Level; 3056 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3057 CurrentLines->append( 3058 std::make_move_iterator(PreprocessorDirectives.begin()), 3059 std::make_move_iterator(PreprocessorDirectives.end())); 3060 PreprocessorDirectives.clear(); 3061 } 3062 // Disconnect the current token from the last token on the previous line. 3063 FormatTok->Previous = nullptr; 3064 } 3065 3066 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 3067 3068 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3069 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3070 FormatTok.NewlinesBefore > 0; 3071 } 3072 3073 // Checks if \p FormatTok is a line comment that continues the line comment 3074 // section on \p Line. 3075 static bool 3076 continuesLineCommentSection(const FormatToken &FormatTok, 3077 const UnwrappedLine &Line, 3078 const llvm::Regex &CommentPragmasRegex) { 3079 if (Line.Tokens.empty()) 3080 return false; 3081 3082 StringRef IndentContent = FormatTok.TokenText; 3083 if (FormatTok.TokenText.startswith("//") || 3084 FormatTok.TokenText.startswith("/*")) 3085 IndentContent = FormatTok.TokenText.substr(2); 3086 if (CommentPragmasRegex.match(IndentContent)) 3087 return false; 3088 3089 // If Line starts with a line comment, then FormatTok continues the comment 3090 // section if its original column is greater or equal to the original start 3091 // column of the line. 3092 // 3093 // Define the min column token of a line as follows: if a line ends in '{' or 3094 // contains a '{' followed by a line comment, then the min column token is 3095 // that '{'. Otherwise, the min column token of the line is the first token of 3096 // the line. 3097 // 3098 // If Line starts with a token other than a line comment, then FormatTok 3099 // continues the comment section if its original column is greater than the 3100 // original start column of the min column token of the line. 3101 // 3102 // For example, the second line comment continues the first in these cases: 3103 // 3104 // // first line 3105 // // second line 3106 // 3107 // and: 3108 // 3109 // // first line 3110 // // second line 3111 // 3112 // and: 3113 // 3114 // int i; // first line 3115 // // second line 3116 // 3117 // and: 3118 // 3119 // do { // first line 3120 // // second line 3121 // int i; 3122 // } while (true); 3123 // 3124 // and: 3125 // 3126 // enum { 3127 // a, // first line 3128 // // second line 3129 // b 3130 // }; 3131 // 3132 // The second line comment doesn't continue the first in these cases: 3133 // 3134 // // first line 3135 // // second line 3136 // 3137 // and: 3138 // 3139 // int i; // first line 3140 // // second line 3141 // 3142 // and: 3143 // 3144 // do { // first line 3145 // // second line 3146 // int i; 3147 // } while (true); 3148 // 3149 // and: 3150 // 3151 // enum { 3152 // a, // first line 3153 // // second line 3154 // }; 3155 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3156 3157 // Scan for '{//'. If found, use the column of '{' as a min column for line 3158 // comment section continuation. 3159 const FormatToken *PreviousToken = nullptr; 3160 for (const UnwrappedLineNode &Node : Line.Tokens) { 3161 if (PreviousToken && PreviousToken->is(tok::l_brace) && 3162 isLineComment(*Node.Tok)) { 3163 MinColumnToken = PreviousToken; 3164 break; 3165 } 3166 PreviousToken = Node.Tok; 3167 3168 // Grab the last newline preceding a token in this unwrapped line. 3169 if (Node.Tok->NewlinesBefore > 0) { 3170 MinColumnToken = Node.Tok; 3171 } 3172 } 3173 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 3174 MinColumnToken = PreviousToken; 3175 } 3176 3177 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 3178 MinColumnToken); 3179 } 3180 3181 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 3182 bool JustComments = Line->Tokens.empty(); 3183 for (SmallVectorImpl<FormatToken *>::const_iterator 3184 I = CommentsBeforeNextToken.begin(), 3185 E = CommentsBeforeNextToken.end(); 3186 I != E; ++I) { 3187 // Line comments that belong to the same line comment section are put on the 3188 // same line since later we might want to reflow content between them. 3189 // Additional fine-grained breaking of line comment sections is controlled 3190 // by the class BreakableLineCommentSection in case it is desirable to keep 3191 // several line comment sections in the same unwrapped line. 3192 // 3193 // FIXME: Consider putting separate line comment sections as children to the 3194 // unwrapped line instead. 3195 (*I)->ContinuesLineCommentSection = 3196 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 3197 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 3198 addUnwrappedLine(); 3199 pushToken(*I); 3200 } 3201 if (NewlineBeforeNext && JustComments) 3202 addUnwrappedLine(); 3203 CommentsBeforeNextToken.clear(); 3204 } 3205 3206 void UnwrappedLineParser::nextToken(int LevelDifference) { 3207 if (eof()) 3208 return; 3209 flushComments(isOnNewLine(*FormatTok)); 3210 pushToken(FormatTok); 3211 FormatToken *Previous = FormatTok; 3212 if (Style.Language != FormatStyle::LK_JavaScript) 3213 readToken(LevelDifference); 3214 else 3215 readTokenWithJavaScriptASI(); 3216 FormatTok->Previous = Previous; 3217 } 3218 3219 void UnwrappedLineParser::distributeComments( 3220 const SmallVectorImpl<FormatToken *> &Comments, 3221 const FormatToken *NextTok) { 3222 // Whether or not a line comment token continues a line is controlled by 3223 // the method continuesLineCommentSection, with the following caveat: 3224 // 3225 // Define a trail of Comments to be a nonempty proper postfix of Comments such 3226 // that each comment line from the trail is aligned with the next token, if 3227 // the next token exists. If a trail exists, the beginning of the maximal 3228 // trail is marked as a start of a new comment section. 3229 // 3230 // For example in this code: 3231 // 3232 // int a; // line about a 3233 // // line 1 about b 3234 // // line 2 about b 3235 // int b; 3236 // 3237 // the two lines about b form a maximal trail, so there are two sections, the 3238 // first one consisting of the single comment "// line about a" and the 3239 // second one consisting of the next two comments. 3240 if (Comments.empty()) 3241 return; 3242 bool ShouldPushCommentsInCurrentLine = true; 3243 bool HasTrailAlignedWithNextToken = false; 3244 unsigned StartOfTrailAlignedWithNextToken = 0; 3245 if (NextTok) { 3246 // We are skipping the first element intentionally. 3247 for (unsigned i = Comments.size() - 1; i > 0; --i) { 3248 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 3249 HasTrailAlignedWithNextToken = true; 3250 StartOfTrailAlignedWithNextToken = i; 3251 } 3252 } 3253 } 3254 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 3255 FormatToken *FormatTok = Comments[i]; 3256 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 3257 FormatTok->ContinuesLineCommentSection = false; 3258 } else { 3259 FormatTok->ContinuesLineCommentSection = 3260 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 3261 } 3262 if (!FormatTok->ContinuesLineCommentSection && 3263 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 3264 ShouldPushCommentsInCurrentLine = false; 3265 } 3266 if (ShouldPushCommentsInCurrentLine) { 3267 pushToken(FormatTok); 3268 } else { 3269 CommentsBeforeNextToken.push_back(FormatTok); 3270 } 3271 } 3272 } 3273 3274 void UnwrappedLineParser::readToken(int LevelDifference) { 3275 SmallVector<FormatToken *, 1> Comments; 3276 do { 3277 FormatTok = Tokens->getNextToken(); 3278 assert(FormatTok); 3279 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 3280 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 3281 distributeComments(Comments, FormatTok); 3282 Comments.clear(); 3283 // If there is an unfinished unwrapped line, we flush the preprocessor 3284 // directives only after that unwrapped line was finished later. 3285 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 3286 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 3287 assert((LevelDifference >= 0 || 3288 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 3289 "LevelDifference makes Line->Level negative"); 3290 Line->Level += LevelDifference; 3291 // Comments stored before the preprocessor directive need to be output 3292 // before the preprocessor directive, at the same level as the 3293 // preprocessor directive, as we consider them to apply to the directive. 3294 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 3295 PPBranchLevel > 0) 3296 Line->Level += PPBranchLevel; 3297 flushComments(isOnNewLine(*FormatTok)); 3298 parsePPDirective(); 3299 } 3300 while (FormatTok->getType() == TT_ConflictStart || 3301 FormatTok->getType() == TT_ConflictEnd || 3302 FormatTok->getType() == TT_ConflictAlternative) { 3303 if (FormatTok->getType() == TT_ConflictStart) { 3304 conditionalCompilationStart(/*Unreachable=*/false); 3305 } else if (FormatTok->getType() == TT_ConflictAlternative) { 3306 conditionalCompilationAlternative(); 3307 } else if (FormatTok->getType() == TT_ConflictEnd) { 3308 conditionalCompilationEnd(); 3309 } 3310 FormatTok = Tokens->getNextToken(); 3311 FormatTok->MustBreakBefore = true; 3312 } 3313 3314 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 3315 !Line->InPPDirective) { 3316 continue; 3317 } 3318 3319 if (!FormatTok->Tok.is(tok::comment)) { 3320 distributeComments(Comments, FormatTok); 3321 Comments.clear(); 3322 return; 3323 } 3324 3325 Comments.push_back(FormatTok); 3326 } while (!eof()); 3327 3328 distributeComments(Comments, nullptr); 3329 Comments.clear(); 3330 } 3331 3332 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 3333 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 3334 if (MustBreakBeforeNextToken) { 3335 Line->Tokens.back().Tok->MustBreakBefore = true; 3336 MustBreakBeforeNextToken = false; 3337 } 3338 } 3339 3340 } // end namespace format 3341 } // end namespace clang 3342