1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = std::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : CompoundStatementIndenter(Parser, LineLevel, 176 Style.BraceWrapping.AfterControlStatement, 177 Style.BraceWrapping.IndentBraces) {} 178 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 179 bool WrapBrace, bool IndentBrace) 180 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 181 if (WrapBrace) 182 Parser->addUnwrappedLine(); 183 if (IndentBrace) 184 ++LineLevel; 185 } 186 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 187 188 private: 189 unsigned &LineLevel; 190 unsigned OldLineLevel; 191 }; 192 193 namespace { 194 195 class IndexedTokenSource : public FormatTokenSource { 196 public: 197 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 198 : Tokens(Tokens), Position(-1) {} 199 200 FormatToken *getNextToken() override { 201 ++Position; 202 return Tokens[Position]; 203 } 204 205 unsigned getPosition() override { 206 assert(Position >= 0); 207 return Position; 208 } 209 210 FormatToken *setPosition(unsigned P) override { 211 Position = P; 212 return Tokens[Position]; 213 } 214 215 void reset() { Position = -1; } 216 217 private: 218 ArrayRef<FormatToken *> Tokens; 219 int Position; 220 }; 221 222 } // end anonymous namespace 223 224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 225 const AdditionalKeywords &Keywords, 226 unsigned FirstStartColumn, 227 ArrayRef<FormatToken *> Tokens, 228 UnwrappedLineConsumer &Callback) 229 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 230 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 231 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 232 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 233 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 234 ? IG_Rejected 235 : IG_Inited), 236 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 237 238 void UnwrappedLineParser::reset() { 239 PPBranchLevel = -1; 240 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 241 ? IG_Rejected 242 : IG_Inited; 243 IncludeGuardToken = nullptr; 244 Line.reset(new UnwrappedLine); 245 CommentsBeforeNextToken.clear(); 246 FormatTok = nullptr; 247 MustBreakBeforeNextToken = false; 248 PreprocessorDirectives.clear(); 249 CurrentLines = &Lines; 250 DeclarationScopeStack.clear(); 251 PPStack.clear(); 252 Line->FirstStartColumn = FirstStartColumn; 253 } 254 255 void UnwrappedLineParser::parse() { 256 IndexedTokenSource TokenSource(AllTokens); 257 Line->FirstStartColumn = FirstStartColumn; 258 do { 259 LLVM_DEBUG(llvm::dbgs() << "----\n"); 260 reset(); 261 Tokens = &TokenSource; 262 TokenSource.reset(); 263 264 readToken(); 265 parseFile(); 266 267 // If we found an include guard then all preprocessor directives (other than 268 // the guard) are over-indented by one. 269 if (IncludeGuard == IG_Found) 270 for (auto &Line : Lines) 271 if (Line.InPPDirective && Line.Level > 0) 272 --Line.Level; 273 274 // Create line with eof token. 275 pushToken(FormatTok); 276 addUnwrappedLine(); 277 278 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 279 E = Lines.end(); 280 I != E; ++I) { 281 Callback.consumeUnwrappedLine(*I); 282 } 283 Callback.finishRun(); 284 Lines.clear(); 285 while (!PPLevelBranchIndex.empty() && 286 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 287 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 288 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 289 } 290 if (!PPLevelBranchIndex.empty()) { 291 ++PPLevelBranchIndex.back(); 292 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 293 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 294 } 295 } while (!PPLevelBranchIndex.empty()); 296 } 297 298 void UnwrappedLineParser::parseFile() { 299 // The top-level context in a file always has declarations, except for pre- 300 // processor directives and JavaScript files. 301 bool MustBeDeclaration = 302 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 303 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 304 MustBeDeclaration); 305 if (Style.Language == FormatStyle::LK_TextProto) 306 parseBracedList(); 307 else 308 parseLevel(/*HasOpeningBrace=*/false); 309 // Make sure to format the remaining tokens. 310 // 311 // LK_TextProto is special since its top-level is parsed as the body of a 312 // braced list, which does not necessarily have natural line separators such 313 // as a semicolon. Comments after the last entry that have been determined to 314 // not belong to that line, as in: 315 // key: value 316 // // endfile comment 317 // do not have a chance to be put on a line of their own until this point. 318 // Here we add this newline before end-of-file comments. 319 if (Style.Language == FormatStyle::LK_TextProto && 320 !CommentsBeforeNextToken.empty()) 321 addUnwrappedLine(); 322 flushComments(true); 323 addUnwrappedLine(); 324 } 325 326 void UnwrappedLineParser::parseCSharpAttribute() { 327 do { 328 switch (FormatTok->Tok.getKind()) { 329 case tok::r_square: 330 nextToken(); 331 addUnwrappedLine(); 332 return; 333 default: 334 nextToken(); 335 break; 336 } 337 } while (!eof()); 338 } 339 340 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 341 bool SwitchLabelEncountered = false; 342 do { 343 tok::TokenKind kind = FormatTok->Tok.getKind(); 344 if (FormatTok->Type == TT_MacroBlockBegin) { 345 kind = tok::l_brace; 346 } else if (FormatTok->Type == TT_MacroBlockEnd) { 347 kind = tok::r_brace; 348 } 349 350 switch (kind) { 351 case tok::comment: 352 nextToken(); 353 addUnwrappedLine(); 354 break; 355 case tok::l_brace: 356 // FIXME: Add parameter whether this can happen - if this happens, we must 357 // be in a non-declaration context. 358 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 359 continue; 360 parseBlock(/*MustBeDeclaration=*/false); 361 addUnwrappedLine(); 362 break; 363 case tok::r_brace: 364 if (HasOpeningBrace) 365 return; 366 nextToken(); 367 addUnwrappedLine(); 368 break; 369 case tok::kw_default: { 370 unsigned StoredPosition = Tokens->getPosition(); 371 FormatToken *Next; 372 do { 373 Next = Tokens->getNextToken(); 374 } while (Next && Next->is(tok::comment)); 375 FormatTok = Tokens->setPosition(StoredPosition); 376 if (Next && Next->isNot(tok::colon)) { 377 // default not followed by ':' is not a case label; treat it like 378 // an identifier. 379 parseStructuralElement(); 380 break; 381 } 382 // Else, if it is 'default:', fall through to the case handling. 383 LLVM_FALLTHROUGH; 384 } 385 case tok::kw_case: 386 if (Style.Language == FormatStyle::LK_JavaScript && 387 Line->MustBeDeclaration) { 388 // A 'case: string' style field declaration. 389 parseStructuralElement(); 390 break; 391 } 392 if (!SwitchLabelEncountered && 393 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 394 ++Line->Level; 395 SwitchLabelEncountered = true; 396 parseStructuralElement(); 397 break; 398 case tok::l_square: 399 if (Style.isCSharp()) { 400 nextToken(); 401 parseCSharpAttribute(); 402 break; 403 } 404 LLVM_FALLTHROUGH; 405 default: 406 parseStructuralElement(); 407 break; 408 } 409 } while (!eof()); 410 } 411 412 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 413 // We'll parse forward through the tokens until we hit 414 // a closing brace or eof - note that getNextToken() will 415 // parse macros, so this will magically work inside macro 416 // definitions, too. 417 unsigned StoredPosition = Tokens->getPosition(); 418 FormatToken *Tok = FormatTok; 419 const FormatToken *PrevTok = Tok->Previous; 420 // Keep a stack of positions of lbrace tokens. We will 421 // update information about whether an lbrace starts a 422 // braced init list or a different block during the loop. 423 SmallVector<FormatToken *, 8> LBraceStack; 424 assert(Tok->Tok.is(tok::l_brace)); 425 do { 426 // Get next non-comment token. 427 FormatToken *NextTok; 428 unsigned ReadTokens = 0; 429 do { 430 NextTok = Tokens->getNextToken(); 431 ++ReadTokens; 432 } while (NextTok->is(tok::comment)); 433 434 switch (Tok->Tok.getKind()) { 435 case tok::l_brace: 436 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 437 if (PrevTok->isOneOf(tok::colon, tok::less)) 438 // A ':' indicates this code is in a type, or a braced list 439 // following a label in an object literal ({a: {b: 1}}). 440 // A '<' could be an object used in a comparison, but that is nonsense 441 // code (can never return true), so more likely it is a generic type 442 // argument (`X<{a: string; b: number}>`). 443 // The code below could be confused by semicolons between the 444 // individual members in a type member list, which would normally 445 // trigger BK_Block. In both cases, this must be parsed as an inline 446 // braced init. 447 Tok->BlockKind = BK_BracedInit; 448 else if (PrevTok->is(tok::r_paren)) 449 // `) { }` can only occur in function or method declarations in JS. 450 Tok->BlockKind = BK_Block; 451 } else { 452 Tok->BlockKind = BK_Unknown; 453 } 454 LBraceStack.push_back(Tok); 455 break; 456 case tok::r_brace: 457 if (LBraceStack.empty()) 458 break; 459 if (LBraceStack.back()->BlockKind == BK_Unknown) { 460 bool ProbablyBracedList = false; 461 if (Style.Language == FormatStyle::LK_Proto) { 462 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 463 } else { 464 // Using OriginalColumn to distinguish between ObjC methods and 465 // binary operators is a bit hacky. 466 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 467 NextTok->OriginalColumn == 0; 468 469 // If there is a comma, semicolon or right paren after the closing 470 // brace, we assume this is a braced initializer list. Note that 471 // regardless how we mark inner braces here, we will overwrite the 472 // BlockKind later if we parse a braced list (where all blocks 473 // inside are by default braced lists), or when we explicitly detect 474 // blocks (for example while parsing lambdas). 475 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 476 // braced list in JS. 477 ProbablyBracedList = 478 (Style.Language == FormatStyle::LK_JavaScript && 479 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 480 Keywords.kw_as)) || 481 (Style.isCpp() && NextTok->is(tok::l_paren)) || 482 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 483 tok::r_paren, tok::r_square, tok::l_brace, 484 tok::ellipsis) || 485 (NextTok->is(tok::identifier) && 486 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 487 (NextTok->is(tok::semi) && 488 (!ExpectClassBody || LBraceStack.size() != 1)) || 489 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 490 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 491 // We can have an array subscript after a braced init 492 // list, but C++11 attributes are expected after blocks. 493 NextTok = Tokens->getNextToken(); 494 ++ReadTokens; 495 ProbablyBracedList = NextTok->isNot(tok::l_square); 496 } 497 } 498 if (ProbablyBracedList) { 499 Tok->BlockKind = BK_BracedInit; 500 LBraceStack.back()->BlockKind = BK_BracedInit; 501 } else { 502 Tok->BlockKind = BK_Block; 503 LBraceStack.back()->BlockKind = BK_Block; 504 } 505 } 506 LBraceStack.pop_back(); 507 break; 508 case tok::identifier: 509 if (!Tok->is(TT_StatementMacro)) 510 break; 511 LLVM_FALLTHROUGH; 512 case tok::at: 513 case tok::semi: 514 case tok::kw_if: 515 case tok::kw_while: 516 case tok::kw_for: 517 case tok::kw_switch: 518 case tok::kw_try: 519 case tok::kw___try: 520 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 521 LBraceStack.back()->BlockKind = BK_Block; 522 break; 523 default: 524 break; 525 } 526 PrevTok = Tok; 527 Tok = NextTok; 528 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 529 530 // Assume other blocks for all unclosed opening braces. 531 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 532 if (LBraceStack[i]->BlockKind == BK_Unknown) 533 LBraceStack[i]->BlockKind = BK_Block; 534 } 535 536 FormatTok = Tokens->setPosition(StoredPosition); 537 } 538 539 template <class T> 540 static inline void hash_combine(std::size_t &seed, const T &v) { 541 std::hash<T> hasher; 542 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 543 } 544 545 size_t UnwrappedLineParser::computePPHash() const { 546 size_t h = 0; 547 for (const auto &i : PPStack) { 548 hash_combine(h, size_t(i.Kind)); 549 hash_combine(h, i.Line); 550 } 551 return h; 552 } 553 554 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 555 bool MunchSemi) { 556 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 557 "'{' or macro block token expected"); 558 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 559 FormatTok->BlockKind = BK_Block; 560 561 size_t PPStartHash = computePPHash(); 562 563 unsigned InitialLevel = Line->Level; 564 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 565 566 if (MacroBlock && FormatTok->is(tok::l_paren)) 567 parseParens(); 568 569 size_t NbPreprocessorDirectives = 570 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 571 addUnwrappedLine(); 572 size_t OpeningLineIndex = 573 CurrentLines->empty() 574 ? (UnwrappedLine::kInvalidIndex) 575 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 576 577 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 578 MustBeDeclaration); 579 if (AddLevel) 580 ++Line->Level; 581 parseLevel(/*HasOpeningBrace=*/true); 582 583 if (eof()) 584 return; 585 586 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 587 : !FormatTok->is(tok::r_brace)) { 588 Line->Level = InitialLevel; 589 FormatTok->BlockKind = BK_Block; 590 return; 591 } 592 593 size_t PPEndHash = computePPHash(); 594 595 // Munch the closing brace. 596 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 597 598 if (MacroBlock && FormatTok->is(tok::l_paren)) 599 parseParens(); 600 601 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 602 nextToken(); 603 Line->Level = InitialLevel; 604 605 if (PPStartHash == PPEndHash) { 606 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 607 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 608 // Update the opening line to add the forward reference as well 609 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 610 CurrentLines->size() - 1; 611 } 612 } 613 } 614 615 static bool isGoogScope(const UnwrappedLine &Line) { 616 // FIXME: Closure-library specific stuff should not be hard-coded but be 617 // configurable. 618 if (Line.Tokens.size() < 4) 619 return false; 620 auto I = Line.Tokens.begin(); 621 if (I->Tok->TokenText != "goog") 622 return false; 623 ++I; 624 if (I->Tok->isNot(tok::period)) 625 return false; 626 ++I; 627 if (I->Tok->TokenText != "scope") 628 return false; 629 ++I; 630 return I->Tok->is(tok::l_paren); 631 } 632 633 static bool isIIFE(const UnwrappedLine &Line, 634 const AdditionalKeywords &Keywords) { 635 // Look for the start of an immediately invoked anonymous function. 636 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 637 // This is commonly done in JavaScript to create a new, anonymous scope. 638 // Example: (function() { ... })() 639 if (Line.Tokens.size() < 3) 640 return false; 641 auto I = Line.Tokens.begin(); 642 if (I->Tok->isNot(tok::l_paren)) 643 return false; 644 ++I; 645 if (I->Tok->isNot(Keywords.kw_function)) 646 return false; 647 ++I; 648 return I->Tok->is(tok::l_paren); 649 } 650 651 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 652 const FormatToken &InitialToken) { 653 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 654 return Style.BraceWrapping.AfterNamespace; 655 if (InitialToken.is(tok::kw_class)) 656 return Style.BraceWrapping.AfterClass; 657 if (InitialToken.is(tok::kw_union)) 658 return Style.BraceWrapping.AfterUnion; 659 if (InitialToken.is(tok::kw_struct)) 660 return Style.BraceWrapping.AfterStruct; 661 return false; 662 } 663 664 void UnwrappedLineParser::parseChildBlock() { 665 FormatTok->BlockKind = BK_Block; 666 nextToken(); 667 { 668 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 669 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 670 ScopedLineState LineState(*this); 671 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 672 /*MustBeDeclaration=*/false); 673 Line->Level += SkipIndent ? 0 : 1; 674 parseLevel(/*HasOpeningBrace=*/true); 675 flushComments(isOnNewLine(*FormatTok)); 676 Line->Level -= SkipIndent ? 0 : 1; 677 } 678 nextToken(); 679 } 680 681 void UnwrappedLineParser::parsePPDirective() { 682 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 683 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 684 685 nextToken(); 686 687 if (!FormatTok->Tok.getIdentifierInfo()) { 688 parsePPUnknown(); 689 return; 690 } 691 692 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 693 case tok::pp_define: 694 parsePPDefine(); 695 return; 696 case tok::pp_if: 697 parsePPIf(/*IfDef=*/false); 698 break; 699 case tok::pp_ifdef: 700 case tok::pp_ifndef: 701 parsePPIf(/*IfDef=*/true); 702 break; 703 case tok::pp_else: 704 parsePPElse(); 705 break; 706 case tok::pp_elif: 707 parsePPElIf(); 708 break; 709 case tok::pp_endif: 710 parsePPEndIf(); 711 break; 712 default: 713 parsePPUnknown(); 714 break; 715 } 716 } 717 718 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 719 size_t Line = CurrentLines->size(); 720 if (CurrentLines == &PreprocessorDirectives) 721 Line += Lines.size(); 722 723 if (Unreachable || 724 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 725 PPStack.push_back({PP_Unreachable, Line}); 726 else 727 PPStack.push_back({PP_Conditional, Line}); 728 } 729 730 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 731 ++PPBranchLevel; 732 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 733 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 734 PPLevelBranchIndex.push_back(0); 735 PPLevelBranchCount.push_back(0); 736 } 737 PPChainBranchIndex.push(0); 738 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 739 conditionalCompilationCondition(Unreachable || Skip); 740 } 741 742 void UnwrappedLineParser::conditionalCompilationAlternative() { 743 if (!PPStack.empty()) 744 PPStack.pop_back(); 745 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 746 if (!PPChainBranchIndex.empty()) 747 ++PPChainBranchIndex.top(); 748 conditionalCompilationCondition( 749 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 750 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 751 } 752 753 void UnwrappedLineParser::conditionalCompilationEnd() { 754 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 755 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 756 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 757 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 758 } 759 } 760 // Guard against #endif's without #if. 761 if (PPBranchLevel > -1) 762 --PPBranchLevel; 763 if (!PPChainBranchIndex.empty()) 764 PPChainBranchIndex.pop(); 765 if (!PPStack.empty()) 766 PPStack.pop_back(); 767 } 768 769 void UnwrappedLineParser::parsePPIf(bool IfDef) { 770 bool IfNDef = FormatTok->is(tok::pp_ifndef); 771 nextToken(); 772 bool Unreachable = false; 773 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 774 Unreachable = true; 775 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 776 Unreachable = true; 777 conditionalCompilationStart(Unreachable); 778 FormatToken *IfCondition = FormatTok; 779 // If there's a #ifndef on the first line, and the only lines before it are 780 // comments, it could be an include guard. 781 bool MaybeIncludeGuard = IfNDef; 782 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 783 for (auto &Line : Lines) { 784 if (!Line.Tokens.front().Tok->is(tok::comment)) { 785 MaybeIncludeGuard = false; 786 IncludeGuard = IG_Rejected; 787 break; 788 } 789 } 790 --PPBranchLevel; 791 parsePPUnknown(); 792 ++PPBranchLevel; 793 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 794 IncludeGuard = IG_IfNdefed; 795 IncludeGuardToken = IfCondition; 796 } 797 } 798 799 void UnwrappedLineParser::parsePPElse() { 800 // If a potential include guard has an #else, it's not an include guard. 801 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 802 IncludeGuard = IG_Rejected; 803 conditionalCompilationAlternative(); 804 if (PPBranchLevel > -1) 805 --PPBranchLevel; 806 parsePPUnknown(); 807 ++PPBranchLevel; 808 } 809 810 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 811 812 void UnwrappedLineParser::parsePPEndIf() { 813 conditionalCompilationEnd(); 814 parsePPUnknown(); 815 // If the #endif of a potential include guard is the last thing in the file, 816 // then we found an include guard. 817 unsigned TokenPosition = Tokens->getPosition(); 818 FormatToken *PeekNext = AllTokens[TokenPosition]; 819 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 820 PeekNext->is(tok::eof) && 821 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 822 IncludeGuard = IG_Found; 823 } 824 825 void UnwrappedLineParser::parsePPDefine() { 826 nextToken(); 827 828 if (!FormatTok->Tok.getIdentifierInfo()) { 829 IncludeGuard = IG_Rejected; 830 IncludeGuardToken = nullptr; 831 parsePPUnknown(); 832 return; 833 } 834 835 if (IncludeGuard == IG_IfNdefed && 836 IncludeGuardToken->TokenText == FormatTok->TokenText) { 837 IncludeGuard = IG_Defined; 838 IncludeGuardToken = nullptr; 839 for (auto &Line : Lines) { 840 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 841 IncludeGuard = IG_Rejected; 842 break; 843 } 844 } 845 } 846 847 nextToken(); 848 if (FormatTok->Tok.getKind() == tok::l_paren && 849 FormatTok->WhitespaceRange.getBegin() == 850 FormatTok->WhitespaceRange.getEnd()) { 851 parseParens(); 852 } 853 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 854 Line->Level += PPBranchLevel + 1; 855 addUnwrappedLine(); 856 ++Line->Level; 857 858 // Errors during a preprocessor directive can only affect the layout of the 859 // preprocessor directive, and thus we ignore them. An alternative approach 860 // would be to use the same approach we use on the file level (no 861 // re-indentation if there was a structural error) within the macro 862 // definition. 863 parseFile(); 864 } 865 866 void UnwrappedLineParser::parsePPUnknown() { 867 do { 868 nextToken(); 869 } while (!eof()); 870 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 871 Line->Level += PPBranchLevel + 1; 872 addUnwrappedLine(); 873 } 874 875 // Here we blacklist certain tokens that are not usually the first token in an 876 // unwrapped line. This is used in attempt to distinguish macro calls without 877 // trailing semicolons from other constructs split to several lines. 878 static bool tokenCanStartNewLine(const clang::Token &Tok) { 879 // Semicolon can be a null-statement, l_square can be a start of a macro or 880 // a C++11 attribute, but this doesn't seem to be common. 881 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 882 Tok.isNot(tok::l_square) && 883 // Tokens that can only be used as binary operators and a part of 884 // overloaded operator names. 885 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 886 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 887 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 888 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 889 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 890 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 891 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 892 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 893 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 894 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 895 Tok.isNot(tok::lesslessequal) && 896 // Colon is used in labels, base class lists, initializer lists, 897 // range-based for loops, ternary operator, but should never be the 898 // first token in an unwrapped line. 899 Tok.isNot(tok::colon) && 900 // 'noexcept' is a trailing annotation. 901 Tok.isNot(tok::kw_noexcept); 902 } 903 904 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 905 const FormatToken *FormatTok) { 906 // FIXME: This returns true for C/C++ keywords like 'struct'. 907 return FormatTok->is(tok::identifier) && 908 (FormatTok->Tok.getIdentifierInfo() == nullptr || 909 !FormatTok->isOneOf( 910 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 911 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 912 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 913 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 914 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 915 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 916 Keywords.kw_from)); 917 } 918 919 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 920 const FormatToken *FormatTok) { 921 return FormatTok->Tok.isLiteral() || 922 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 923 mustBeJSIdent(Keywords, FormatTok); 924 } 925 926 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 927 // when encountered after a value (see mustBeJSIdentOrValue). 928 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 929 const FormatToken *FormatTok) { 930 return FormatTok->isOneOf( 931 tok::kw_return, Keywords.kw_yield, 932 // conditionals 933 tok::kw_if, tok::kw_else, 934 // loops 935 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 936 // switch/case 937 tok::kw_switch, tok::kw_case, 938 // exceptions 939 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 940 // declaration 941 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 942 Keywords.kw_async, Keywords.kw_function, 943 // import/export 944 Keywords.kw_import, tok::kw_export); 945 } 946 947 // readTokenWithJavaScriptASI reads the next token and terminates the current 948 // line if JavaScript Automatic Semicolon Insertion must 949 // happen between the current token and the next token. 950 // 951 // This method is conservative - it cannot cover all edge cases of JavaScript, 952 // but only aims to correctly handle certain well known cases. It *must not* 953 // return true in speculative cases. 954 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 955 FormatToken *Previous = FormatTok; 956 readToken(); 957 FormatToken *Next = FormatTok; 958 959 bool IsOnSameLine = 960 CommentsBeforeNextToken.empty() 961 ? Next->NewlinesBefore == 0 962 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 963 if (IsOnSameLine) 964 return; 965 966 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 967 bool PreviousStartsTemplateExpr = 968 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 969 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 970 // If the line contains an '@' sign, the previous token might be an 971 // annotation, which can precede another identifier/value. 972 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 973 [](UnwrappedLineNode &LineNode) { 974 return LineNode.Tok->is(tok::at); 975 }) != Line->Tokens.end(); 976 if (HasAt) 977 return; 978 } 979 if (Next->is(tok::exclaim) && PreviousMustBeValue) 980 return addUnwrappedLine(); 981 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 982 bool NextEndsTemplateExpr = 983 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 984 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 985 (PreviousMustBeValue || 986 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 987 tok::minusminus))) 988 return addUnwrappedLine(); 989 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 990 isJSDeclOrStmt(Keywords, Next)) 991 return addUnwrappedLine(); 992 } 993 994 void UnwrappedLineParser::parseStructuralElement() { 995 assert(!FormatTok->is(tok::l_brace)); 996 if (Style.Language == FormatStyle::LK_TableGen && 997 FormatTok->is(tok::pp_include)) { 998 nextToken(); 999 if (FormatTok->is(tok::string_literal)) 1000 nextToken(); 1001 addUnwrappedLine(); 1002 return; 1003 } 1004 switch (FormatTok->Tok.getKind()) { 1005 case tok::kw_asm: 1006 nextToken(); 1007 if (FormatTok->is(tok::l_brace)) { 1008 FormatTok->Type = TT_InlineASMBrace; 1009 nextToken(); 1010 while (FormatTok && FormatTok->isNot(tok::eof)) { 1011 if (FormatTok->is(tok::r_brace)) { 1012 FormatTok->Type = TT_InlineASMBrace; 1013 nextToken(); 1014 addUnwrappedLine(); 1015 break; 1016 } 1017 FormatTok->Finalized = true; 1018 nextToken(); 1019 } 1020 } 1021 break; 1022 case tok::kw_namespace: 1023 parseNamespace(); 1024 return; 1025 case tok::kw_public: 1026 case tok::kw_protected: 1027 case tok::kw_private: 1028 if (Style.Language == FormatStyle::LK_Java || 1029 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) 1030 nextToken(); 1031 else 1032 parseAccessSpecifier(); 1033 return; 1034 case tok::kw_if: 1035 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1036 // field/method declaration. 1037 break; 1038 parseIfThenElse(); 1039 return; 1040 case tok::kw_for: 1041 case tok::kw_while: 1042 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1043 // field/method declaration. 1044 break; 1045 parseForOrWhileLoop(); 1046 return; 1047 case tok::kw_do: 1048 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1049 // field/method declaration. 1050 break; 1051 parseDoWhile(); 1052 return; 1053 case tok::kw_switch: 1054 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1055 // 'switch: string' field declaration. 1056 break; 1057 parseSwitch(); 1058 return; 1059 case tok::kw_default: 1060 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1061 // 'default: string' field declaration. 1062 break; 1063 nextToken(); 1064 if (FormatTok->is(tok::colon)) { 1065 parseLabel(); 1066 return; 1067 } 1068 // e.g. "default void f() {}" in a Java interface. 1069 break; 1070 case tok::kw_case: 1071 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1072 // 'case: string' field declaration. 1073 break; 1074 parseCaseLabel(); 1075 return; 1076 case tok::kw_try: 1077 case tok::kw___try: 1078 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1079 // field/method declaration. 1080 break; 1081 parseTryCatch(); 1082 return; 1083 case tok::kw_extern: 1084 nextToken(); 1085 if (FormatTok->Tok.is(tok::string_literal)) { 1086 nextToken(); 1087 if (FormatTok->Tok.is(tok::l_brace)) { 1088 if (Style.BraceWrapping.AfterExternBlock) { 1089 addUnwrappedLine(); 1090 parseBlock(/*MustBeDeclaration=*/true); 1091 } else { 1092 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1093 } 1094 addUnwrappedLine(); 1095 return; 1096 } 1097 } 1098 break; 1099 case tok::kw_export: 1100 if (Style.Language == FormatStyle::LK_JavaScript) { 1101 parseJavaScriptEs6ImportExport(); 1102 return; 1103 } 1104 if (!Style.isCpp()) 1105 break; 1106 // Handle C++ "(inline|export) namespace". 1107 LLVM_FALLTHROUGH; 1108 case tok::kw_inline: 1109 nextToken(); 1110 if (FormatTok->Tok.is(tok::kw_namespace)) { 1111 parseNamespace(); 1112 return; 1113 } 1114 break; 1115 case tok::identifier: 1116 if (FormatTok->is(TT_ForEachMacro)) { 1117 parseForOrWhileLoop(); 1118 return; 1119 } 1120 if (FormatTok->is(TT_MacroBlockBegin)) { 1121 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1122 /*MunchSemi=*/false); 1123 return; 1124 } 1125 if (FormatTok->is(Keywords.kw_import)) { 1126 if (Style.Language == FormatStyle::LK_JavaScript) { 1127 parseJavaScriptEs6ImportExport(); 1128 return; 1129 } 1130 if (Style.Language == FormatStyle::LK_Proto) { 1131 nextToken(); 1132 if (FormatTok->is(tok::kw_public)) 1133 nextToken(); 1134 if (!FormatTok->is(tok::string_literal)) 1135 return; 1136 nextToken(); 1137 if (FormatTok->is(tok::semi)) 1138 nextToken(); 1139 addUnwrappedLine(); 1140 return; 1141 } 1142 } 1143 if (Style.isCpp() && 1144 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1145 Keywords.kw_slots, Keywords.kw_qslots)) { 1146 nextToken(); 1147 if (FormatTok->is(tok::colon)) { 1148 nextToken(); 1149 addUnwrappedLine(); 1150 return; 1151 } 1152 } 1153 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1154 parseStatementMacro(); 1155 return; 1156 } 1157 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1158 parseNamespace(); 1159 return; 1160 } 1161 // In all other cases, parse the declaration. 1162 break; 1163 default: 1164 break; 1165 } 1166 do { 1167 const FormatToken *Previous = FormatTok->Previous; 1168 switch (FormatTok->Tok.getKind()) { 1169 case tok::at: 1170 nextToken(); 1171 if (FormatTok->Tok.is(tok::l_brace)) { 1172 nextToken(); 1173 parseBracedList(); 1174 break; 1175 } else if (Style.Language == FormatStyle::LK_Java && 1176 FormatTok->is(Keywords.kw_interface)) { 1177 nextToken(); 1178 break; 1179 } 1180 switch (FormatTok->Tok.getObjCKeywordID()) { 1181 case tok::objc_public: 1182 case tok::objc_protected: 1183 case tok::objc_package: 1184 case tok::objc_private: 1185 return parseAccessSpecifier(); 1186 case tok::objc_interface: 1187 case tok::objc_implementation: 1188 return parseObjCInterfaceOrImplementation(); 1189 case tok::objc_protocol: 1190 if (parseObjCProtocol()) 1191 return; 1192 break; 1193 case tok::objc_end: 1194 return; // Handled by the caller. 1195 case tok::objc_optional: 1196 case tok::objc_required: 1197 nextToken(); 1198 addUnwrappedLine(); 1199 return; 1200 case tok::objc_autoreleasepool: 1201 nextToken(); 1202 if (FormatTok->Tok.is(tok::l_brace)) { 1203 if (Style.BraceWrapping.AfterControlStatement == 1204 FormatStyle::BWACS_Always) 1205 addUnwrappedLine(); 1206 parseBlock(/*MustBeDeclaration=*/false); 1207 } 1208 addUnwrappedLine(); 1209 return; 1210 case tok::objc_synchronized: 1211 nextToken(); 1212 if (FormatTok->Tok.is(tok::l_paren)) 1213 // Skip synchronization object 1214 parseParens(); 1215 if (FormatTok->Tok.is(tok::l_brace)) { 1216 if (Style.BraceWrapping.AfterControlStatement == 1217 FormatStyle::BWACS_Always) 1218 addUnwrappedLine(); 1219 parseBlock(/*MustBeDeclaration=*/false); 1220 } 1221 addUnwrappedLine(); 1222 return; 1223 case tok::objc_try: 1224 // This branch isn't strictly necessary (the kw_try case below would 1225 // do this too after the tok::at is parsed above). But be explicit. 1226 parseTryCatch(); 1227 return; 1228 default: 1229 break; 1230 } 1231 break; 1232 case tok::kw_enum: 1233 // Ignore if this is part of "template <enum ...". 1234 if (Previous && Previous->is(tok::less)) { 1235 nextToken(); 1236 break; 1237 } 1238 1239 // parseEnum falls through and does not yet add an unwrapped line as an 1240 // enum definition can start a structural element. 1241 if (!parseEnum()) 1242 break; 1243 // This only applies for C++. 1244 if (!Style.isCpp()) { 1245 addUnwrappedLine(); 1246 return; 1247 } 1248 break; 1249 case tok::kw_typedef: 1250 nextToken(); 1251 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1252 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1253 Keywords.kw_CF_CLOSED_ENUM, 1254 Keywords.kw_NS_CLOSED_ENUM)) 1255 parseEnum(); 1256 break; 1257 case tok::kw_struct: 1258 case tok::kw_union: 1259 case tok::kw_class: 1260 // parseRecord falls through and does not yet add an unwrapped line as a 1261 // record declaration or definition can start a structural element. 1262 parseRecord(); 1263 // This does not apply for Java, JavaScript and C#. 1264 if (Style.Language == FormatStyle::LK_Java || 1265 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { 1266 if (FormatTok->is(tok::semi)) 1267 nextToken(); 1268 addUnwrappedLine(); 1269 return; 1270 } 1271 break; 1272 case tok::period: 1273 nextToken(); 1274 // In Java, classes have an implicit static member "class". 1275 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1276 FormatTok->is(tok::kw_class)) 1277 nextToken(); 1278 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1279 FormatTok->Tok.getIdentifierInfo()) 1280 // JavaScript only has pseudo keywords, all keywords are allowed to 1281 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1282 nextToken(); 1283 break; 1284 case tok::semi: 1285 nextToken(); 1286 addUnwrappedLine(); 1287 return; 1288 case tok::r_brace: 1289 addUnwrappedLine(); 1290 return; 1291 case tok::l_paren: 1292 parseParens(); 1293 break; 1294 case tok::kw_operator: 1295 nextToken(); 1296 if (FormatTok->isBinaryOperator()) 1297 nextToken(); 1298 break; 1299 case tok::caret: 1300 nextToken(); 1301 if (FormatTok->Tok.isAnyIdentifier() || 1302 FormatTok->isSimpleTypeSpecifier()) 1303 nextToken(); 1304 if (FormatTok->is(tok::l_paren)) 1305 parseParens(); 1306 if (FormatTok->is(tok::l_brace)) 1307 parseChildBlock(); 1308 break; 1309 case tok::l_brace: 1310 if (!tryToParseBracedList()) { 1311 // A block outside of parentheses must be the last part of a 1312 // structural element. 1313 // FIXME: Figure out cases where this is not true, and add projections 1314 // for them (the one we know is missing are lambdas). 1315 if (Style.BraceWrapping.AfterFunction) 1316 addUnwrappedLine(); 1317 FormatTok->Type = TT_FunctionLBrace; 1318 parseBlock(/*MustBeDeclaration=*/false); 1319 addUnwrappedLine(); 1320 return; 1321 } 1322 // Otherwise this was a braced init list, and the structural 1323 // element continues. 1324 break; 1325 case tok::kw_try: 1326 if (Style.Language == FormatStyle::LK_JavaScript && 1327 Line->MustBeDeclaration) { 1328 // field/method declaration. 1329 nextToken(); 1330 break; 1331 } 1332 // We arrive here when parsing function-try blocks. 1333 if (Style.BraceWrapping.AfterFunction) 1334 addUnwrappedLine(); 1335 parseTryCatch(); 1336 return; 1337 case tok::identifier: { 1338 if (FormatTok->is(TT_MacroBlockEnd)) { 1339 addUnwrappedLine(); 1340 return; 1341 } 1342 1343 // Function declarations (as opposed to function expressions) are parsed 1344 // on their own unwrapped line by continuing this loop. Function 1345 // expressions (functions that are not on their own line) must not create 1346 // a new unwrapped line, so they are special cased below. 1347 size_t TokenCount = Line->Tokens.size(); 1348 if (Style.Language == FormatStyle::LK_JavaScript && 1349 FormatTok->is(Keywords.kw_function) && 1350 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1351 Keywords.kw_async)))) { 1352 tryToParseJSFunction(); 1353 break; 1354 } 1355 if ((Style.Language == FormatStyle::LK_JavaScript || 1356 Style.Language == FormatStyle::LK_Java) && 1357 FormatTok->is(Keywords.kw_interface)) { 1358 if (Style.Language == FormatStyle::LK_JavaScript) { 1359 // In JavaScript/TypeScript, "interface" can be used as a standalone 1360 // identifier, e.g. in `var interface = 1;`. If "interface" is 1361 // followed by another identifier, it is very like to be an actual 1362 // interface declaration. 1363 unsigned StoredPosition = Tokens->getPosition(); 1364 FormatToken *Next = Tokens->getNextToken(); 1365 FormatTok = Tokens->setPosition(StoredPosition); 1366 if (Next && !mustBeJSIdent(Keywords, Next)) { 1367 nextToken(); 1368 break; 1369 } 1370 } 1371 parseRecord(); 1372 addUnwrappedLine(); 1373 return; 1374 } 1375 1376 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1377 parseStatementMacro(); 1378 return; 1379 } 1380 1381 // See if the following token should start a new unwrapped line. 1382 StringRef Text = FormatTok->TokenText; 1383 nextToken(); 1384 1385 // JS doesn't have macros, and within classes colons indicate fields, not 1386 // labels. 1387 if (Style.Language == FormatStyle::LK_JavaScript) 1388 break; 1389 1390 TokenCount = Line->Tokens.size(); 1391 if (TokenCount == 1 || 1392 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1393 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1394 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1395 parseLabel(!Style.IndentGotoLabels); 1396 return; 1397 } 1398 // Recognize function-like macro usages without trailing semicolon as 1399 // well as free-standing macros like Q_OBJECT. 1400 bool FunctionLike = FormatTok->is(tok::l_paren); 1401 if (FunctionLike) 1402 parseParens(); 1403 1404 bool FollowedByNewline = 1405 CommentsBeforeNextToken.empty() 1406 ? FormatTok->NewlinesBefore > 0 1407 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1408 1409 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1410 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1411 addUnwrappedLine(); 1412 return; 1413 } 1414 } 1415 break; 1416 } 1417 case tok::equal: 1418 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1419 // TT_JsFatArrow. The always start an expression or a child block if 1420 // followed by a curly. 1421 if (FormatTok->is(TT_JsFatArrow)) { 1422 nextToken(); 1423 if (FormatTok->is(tok::l_brace)) 1424 parseChildBlock(); 1425 break; 1426 } 1427 1428 nextToken(); 1429 if (FormatTok->Tok.is(tok::l_brace)) { 1430 nextToken(); 1431 parseBracedList(); 1432 } else if (Style.Language == FormatStyle::LK_Proto && 1433 FormatTok->Tok.is(tok::less)) { 1434 nextToken(); 1435 parseBracedList(/*ContinueOnSemicolons=*/false, 1436 /*ClosingBraceKind=*/tok::greater); 1437 } 1438 break; 1439 case tok::l_square: 1440 parseSquare(); 1441 break; 1442 case tok::kw_new: 1443 parseNew(); 1444 break; 1445 default: 1446 nextToken(); 1447 break; 1448 } 1449 } while (!eof()); 1450 } 1451 1452 bool UnwrappedLineParser::tryToParseLambda() { 1453 if (!Style.isCpp()) { 1454 nextToken(); 1455 return false; 1456 } 1457 assert(FormatTok->is(tok::l_square)); 1458 FormatToken &LSquare = *FormatTok; 1459 if (!tryToParseLambdaIntroducer()) 1460 return false; 1461 1462 bool SeenArrow = false; 1463 1464 while (FormatTok->isNot(tok::l_brace)) { 1465 if (FormatTok->isSimpleTypeSpecifier()) { 1466 nextToken(); 1467 continue; 1468 } 1469 switch (FormatTok->Tok.getKind()) { 1470 case tok::l_brace: 1471 break; 1472 case tok::l_paren: 1473 parseParens(); 1474 break; 1475 case tok::amp: 1476 case tok::star: 1477 case tok::kw_const: 1478 case tok::comma: 1479 case tok::less: 1480 case tok::greater: 1481 case tok::identifier: 1482 case tok::numeric_constant: 1483 case tok::coloncolon: 1484 case tok::kw_class: 1485 case tok::kw_mutable: 1486 case tok::kw_noexcept: 1487 case tok::kw_template: 1488 case tok::kw_typename: 1489 nextToken(); 1490 break; 1491 // Specialization of a template with an integer parameter can contain 1492 // arithmetic, logical, comparison and ternary operators. 1493 // 1494 // FIXME: This also accepts sequences of operators that are not in the scope 1495 // of a template argument list. 1496 // 1497 // In a C++ lambda a template type can only occur after an arrow. We use 1498 // this as an heuristic to distinguish between Objective-C expressions 1499 // followed by an `a->b` expression, such as: 1500 // ([obj func:arg] + a->b) 1501 // Otherwise the code below would parse as a lambda. 1502 // 1503 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1504 // explicit template lists: []<bool b = true && false>(U &&u){} 1505 case tok::plus: 1506 case tok::minus: 1507 case tok::exclaim: 1508 case tok::tilde: 1509 case tok::slash: 1510 case tok::percent: 1511 case tok::lessless: 1512 case tok::pipe: 1513 case tok::pipepipe: 1514 case tok::ampamp: 1515 case tok::caret: 1516 case tok::equalequal: 1517 case tok::exclaimequal: 1518 case tok::greaterequal: 1519 case tok::lessequal: 1520 case tok::question: 1521 case tok::colon: 1522 case tok::kw_true: 1523 case tok::kw_false: 1524 if (SeenArrow) { 1525 nextToken(); 1526 break; 1527 } 1528 return true; 1529 case tok::arrow: 1530 // This might or might not actually be a lambda arrow (this could be an 1531 // ObjC method invocation followed by a dereferencing arrow). We might 1532 // reset this back to TT_Unknown in TokenAnnotator. 1533 FormatTok->Type = TT_LambdaArrow; 1534 SeenArrow = true; 1535 nextToken(); 1536 break; 1537 default: 1538 return true; 1539 } 1540 } 1541 FormatTok->Type = TT_LambdaLBrace; 1542 LSquare.Type = TT_LambdaLSquare; 1543 parseChildBlock(); 1544 return true; 1545 } 1546 1547 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1548 const FormatToken *Previous = FormatTok->Previous; 1549 if (Previous && 1550 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1551 tok::kw_delete, tok::l_square) || 1552 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1553 Previous->isSimpleTypeSpecifier())) { 1554 nextToken(); 1555 return false; 1556 } 1557 nextToken(); 1558 if (FormatTok->is(tok::l_square)) { 1559 return false; 1560 } 1561 parseSquare(/*LambdaIntroducer=*/true); 1562 return true; 1563 } 1564 1565 void UnwrappedLineParser::tryToParseJSFunction() { 1566 assert(FormatTok->is(Keywords.kw_function) || 1567 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1568 if (FormatTok->is(Keywords.kw_async)) 1569 nextToken(); 1570 // Consume "function". 1571 nextToken(); 1572 1573 // Consume * (generator function). Treat it like C++'s overloaded operators. 1574 if (FormatTok->is(tok::star)) { 1575 FormatTok->Type = TT_OverloadedOperator; 1576 nextToken(); 1577 } 1578 1579 // Consume function name. 1580 if (FormatTok->is(tok::identifier)) 1581 nextToken(); 1582 1583 if (FormatTok->isNot(tok::l_paren)) 1584 return; 1585 1586 // Parse formal parameter list. 1587 parseParens(); 1588 1589 if (FormatTok->is(tok::colon)) { 1590 // Parse a type definition. 1591 nextToken(); 1592 1593 // Eat the type declaration. For braced inline object types, balance braces, 1594 // otherwise just parse until finding an l_brace for the function body. 1595 if (FormatTok->is(tok::l_brace)) 1596 tryToParseBracedList(); 1597 else 1598 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1599 nextToken(); 1600 } 1601 1602 if (FormatTok->is(tok::semi)) 1603 return; 1604 1605 parseChildBlock(); 1606 } 1607 1608 bool UnwrappedLineParser::tryToParseBracedList() { 1609 if (FormatTok->BlockKind == BK_Unknown) 1610 calculateBraceTypes(); 1611 assert(FormatTok->BlockKind != BK_Unknown); 1612 if (FormatTok->BlockKind == BK_Block) 1613 return false; 1614 nextToken(); 1615 parseBracedList(); 1616 return true; 1617 } 1618 1619 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1620 tok::TokenKind ClosingBraceKind) { 1621 bool HasError = false; 1622 1623 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1624 // replace this by using parseAssigmentExpression() inside. 1625 do { 1626 if (Style.Language == FormatStyle::LK_JavaScript) { 1627 if (FormatTok->is(Keywords.kw_function) || 1628 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1629 tryToParseJSFunction(); 1630 continue; 1631 } 1632 if (FormatTok->is(TT_JsFatArrow)) { 1633 nextToken(); 1634 // Fat arrows can be followed by simple expressions or by child blocks 1635 // in curly braces. 1636 if (FormatTok->is(tok::l_brace)) { 1637 parseChildBlock(); 1638 continue; 1639 } 1640 } 1641 if (FormatTok->is(tok::l_brace)) { 1642 // Could be a method inside of a braced list `{a() { return 1; }}`. 1643 if (tryToParseBracedList()) 1644 continue; 1645 parseChildBlock(); 1646 } 1647 } 1648 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1649 nextToken(); 1650 return !HasError; 1651 } 1652 switch (FormatTok->Tok.getKind()) { 1653 case tok::caret: 1654 nextToken(); 1655 if (FormatTok->is(tok::l_brace)) { 1656 parseChildBlock(); 1657 } 1658 break; 1659 case tok::l_square: 1660 tryToParseLambda(); 1661 break; 1662 case tok::l_paren: 1663 parseParens(); 1664 // JavaScript can just have free standing methods and getters/setters in 1665 // object literals. Detect them by a "{" following ")". 1666 if (Style.Language == FormatStyle::LK_JavaScript) { 1667 if (FormatTok->is(tok::l_brace)) 1668 parseChildBlock(); 1669 break; 1670 } 1671 break; 1672 case tok::l_brace: 1673 // Assume there are no blocks inside a braced init list apart 1674 // from the ones we explicitly parse out (like lambdas). 1675 FormatTok->BlockKind = BK_BracedInit; 1676 nextToken(); 1677 parseBracedList(); 1678 break; 1679 case tok::less: 1680 if (Style.Language == FormatStyle::LK_Proto) { 1681 nextToken(); 1682 parseBracedList(/*ContinueOnSemicolons=*/false, 1683 /*ClosingBraceKind=*/tok::greater); 1684 } else { 1685 nextToken(); 1686 } 1687 break; 1688 case tok::semi: 1689 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1690 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1691 // used for error recovery if we have otherwise determined that this is 1692 // a braced list. 1693 if (Style.Language == FormatStyle::LK_JavaScript) { 1694 nextToken(); 1695 break; 1696 } 1697 HasError = true; 1698 if (!ContinueOnSemicolons) 1699 return !HasError; 1700 nextToken(); 1701 break; 1702 case tok::comma: 1703 nextToken(); 1704 break; 1705 default: 1706 nextToken(); 1707 break; 1708 } 1709 } while (!eof()); 1710 return false; 1711 } 1712 1713 void UnwrappedLineParser::parseParens() { 1714 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1715 nextToken(); 1716 do { 1717 switch (FormatTok->Tok.getKind()) { 1718 case tok::l_paren: 1719 parseParens(); 1720 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1721 parseChildBlock(); 1722 break; 1723 case tok::r_paren: 1724 nextToken(); 1725 return; 1726 case tok::r_brace: 1727 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1728 return; 1729 case tok::l_square: 1730 tryToParseLambda(); 1731 break; 1732 case tok::l_brace: 1733 if (!tryToParseBracedList()) 1734 parseChildBlock(); 1735 break; 1736 case tok::at: 1737 nextToken(); 1738 if (FormatTok->Tok.is(tok::l_brace)) { 1739 nextToken(); 1740 parseBracedList(); 1741 } 1742 break; 1743 case tok::kw_class: 1744 if (Style.Language == FormatStyle::LK_JavaScript) 1745 parseRecord(/*ParseAsExpr=*/true); 1746 else 1747 nextToken(); 1748 break; 1749 case tok::identifier: 1750 if (Style.Language == FormatStyle::LK_JavaScript && 1751 (FormatTok->is(Keywords.kw_function) || 1752 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1753 tryToParseJSFunction(); 1754 else 1755 nextToken(); 1756 break; 1757 default: 1758 nextToken(); 1759 break; 1760 } 1761 } while (!eof()); 1762 } 1763 1764 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1765 if (!LambdaIntroducer) { 1766 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1767 if (tryToParseLambda()) 1768 return; 1769 } 1770 do { 1771 switch (FormatTok->Tok.getKind()) { 1772 case tok::l_paren: 1773 parseParens(); 1774 break; 1775 case tok::r_square: 1776 nextToken(); 1777 return; 1778 case tok::r_brace: 1779 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1780 return; 1781 case tok::l_square: 1782 parseSquare(); 1783 break; 1784 case tok::l_brace: { 1785 if (!tryToParseBracedList()) 1786 parseChildBlock(); 1787 break; 1788 } 1789 case tok::at: 1790 nextToken(); 1791 if (FormatTok->Tok.is(tok::l_brace)) { 1792 nextToken(); 1793 parseBracedList(); 1794 } 1795 break; 1796 default: 1797 nextToken(); 1798 break; 1799 } 1800 } while (!eof()); 1801 } 1802 1803 void UnwrappedLineParser::parseIfThenElse() { 1804 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1805 nextToken(); 1806 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 1807 nextToken(); 1808 if (FormatTok->Tok.is(tok::l_paren)) 1809 parseParens(); 1810 bool NeedsUnwrappedLine = false; 1811 if (FormatTok->Tok.is(tok::l_brace)) { 1812 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1813 parseBlock(/*MustBeDeclaration=*/false); 1814 if (Style.BraceWrapping.BeforeElse) 1815 addUnwrappedLine(); 1816 else 1817 NeedsUnwrappedLine = true; 1818 } else { 1819 addUnwrappedLine(); 1820 ++Line->Level; 1821 parseStructuralElement(); 1822 --Line->Level; 1823 } 1824 if (FormatTok->Tok.is(tok::kw_else)) { 1825 nextToken(); 1826 if (FormatTok->Tok.is(tok::l_brace)) { 1827 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1828 parseBlock(/*MustBeDeclaration=*/false); 1829 addUnwrappedLine(); 1830 } else if (FormatTok->Tok.is(tok::kw_if)) { 1831 parseIfThenElse(); 1832 } else { 1833 addUnwrappedLine(); 1834 ++Line->Level; 1835 parseStructuralElement(); 1836 if (FormatTok->is(tok::eof)) 1837 addUnwrappedLine(); 1838 --Line->Level; 1839 } 1840 } else if (NeedsUnwrappedLine) { 1841 addUnwrappedLine(); 1842 } 1843 } 1844 1845 void UnwrappedLineParser::parseTryCatch() { 1846 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1847 nextToken(); 1848 bool NeedsUnwrappedLine = false; 1849 if (FormatTok->is(tok::colon)) { 1850 // We are in a function try block, what comes is an initializer list. 1851 nextToken(); 1852 while (FormatTok->is(tok::identifier)) { 1853 nextToken(); 1854 if (FormatTok->is(tok::l_paren)) 1855 parseParens(); 1856 if (FormatTok->is(tok::comma)) 1857 nextToken(); 1858 } 1859 } 1860 // Parse try with resource. 1861 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1862 parseParens(); 1863 } 1864 if (FormatTok->is(tok::l_brace)) { 1865 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1866 parseBlock(/*MustBeDeclaration=*/false); 1867 if (Style.BraceWrapping.BeforeCatch) { 1868 addUnwrappedLine(); 1869 } else { 1870 NeedsUnwrappedLine = true; 1871 } 1872 } else if (!FormatTok->is(tok::kw_catch)) { 1873 // The C++ standard requires a compound-statement after a try. 1874 // If there's none, we try to assume there's a structuralElement 1875 // and try to continue. 1876 addUnwrappedLine(); 1877 ++Line->Level; 1878 parseStructuralElement(); 1879 --Line->Level; 1880 } 1881 while (1) { 1882 if (FormatTok->is(tok::at)) 1883 nextToken(); 1884 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1885 tok::kw___finally) || 1886 ((Style.Language == FormatStyle::LK_Java || 1887 Style.Language == FormatStyle::LK_JavaScript) && 1888 FormatTok->is(Keywords.kw_finally)) || 1889 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1890 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1891 break; 1892 nextToken(); 1893 while (FormatTok->isNot(tok::l_brace)) { 1894 if (FormatTok->is(tok::l_paren)) { 1895 parseParens(); 1896 continue; 1897 } 1898 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1899 return; 1900 nextToken(); 1901 } 1902 NeedsUnwrappedLine = false; 1903 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1904 parseBlock(/*MustBeDeclaration=*/false); 1905 if (Style.BraceWrapping.BeforeCatch) 1906 addUnwrappedLine(); 1907 else 1908 NeedsUnwrappedLine = true; 1909 } 1910 if (NeedsUnwrappedLine) 1911 addUnwrappedLine(); 1912 } 1913 1914 void UnwrappedLineParser::parseNamespace() { 1915 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 1916 "'namespace' expected"); 1917 1918 const FormatToken &InitialToken = *FormatTok; 1919 nextToken(); 1920 if (InitialToken.is(TT_NamespaceMacro)) { 1921 parseParens(); 1922 } else { 1923 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 1924 tok::l_square)) { 1925 if (FormatTok->is(tok::l_square)) 1926 parseSquare(); 1927 else 1928 nextToken(); 1929 } 1930 } 1931 if (FormatTok->Tok.is(tok::l_brace)) { 1932 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1933 addUnwrappedLine(); 1934 1935 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1936 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1937 DeclarationScopeStack.size() > 1); 1938 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1939 // Munch the semicolon after a namespace. This is more common than one would 1940 // think. Puttin the semicolon into its own line is very ugly. 1941 if (FormatTok->Tok.is(tok::semi)) 1942 nextToken(); 1943 addUnwrappedLine(); 1944 } 1945 // FIXME: Add error handling. 1946 } 1947 1948 void UnwrappedLineParser::parseNew() { 1949 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1950 nextToken(); 1951 if (Style.Language != FormatStyle::LK_Java) 1952 return; 1953 1954 // In Java, we can parse everything up to the parens, which aren't optional. 1955 do { 1956 // There should not be a ;, { or } before the new's open paren. 1957 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1958 return; 1959 1960 // Consume the parens. 1961 if (FormatTok->is(tok::l_paren)) { 1962 parseParens(); 1963 1964 // If there is a class body of an anonymous class, consume that as child. 1965 if (FormatTok->is(tok::l_brace)) 1966 parseChildBlock(); 1967 return; 1968 } 1969 nextToken(); 1970 } while (!eof()); 1971 } 1972 1973 void UnwrappedLineParser::parseForOrWhileLoop() { 1974 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1975 "'for', 'while' or foreach macro expected"); 1976 nextToken(); 1977 // JS' for await ( ... 1978 if (Style.Language == FormatStyle::LK_JavaScript && 1979 FormatTok->is(Keywords.kw_await)) 1980 nextToken(); 1981 if (FormatTok->Tok.is(tok::l_paren)) 1982 parseParens(); 1983 if (FormatTok->Tok.is(tok::l_brace)) { 1984 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1985 parseBlock(/*MustBeDeclaration=*/false); 1986 addUnwrappedLine(); 1987 } else { 1988 addUnwrappedLine(); 1989 ++Line->Level; 1990 parseStructuralElement(); 1991 --Line->Level; 1992 } 1993 } 1994 1995 void UnwrappedLineParser::parseDoWhile() { 1996 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1997 nextToken(); 1998 if (FormatTok->Tok.is(tok::l_brace)) { 1999 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2000 parseBlock(/*MustBeDeclaration=*/false); 2001 if (Style.BraceWrapping.IndentBraces) 2002 addUnwrappedLine(); 2003 } else { 2004 addUnwrappedLine(); 2005 ++Line->Level; 2006 parseStructuralElement(); 2007 --Line->Level; 2008 } 2009 2010 // FIXME: Add error handling. 2011 if (!FormatTok->Tok.is(tok::kw_while)) { 2012 addUnwrappedLine(); 2013 return; 2014 } 2015 2016 nextToken(); 2017 parseStructuralElement(); 2018 } 2019 2020 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2021 nextToken(); 2022 unsigned OldLineLevel = Line->Level; 2023 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2024 --Line->Level; 2025 if (LeftAlignLabel) 2026 Line->Level = 0; 2027 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2028 FormatTok->Tok.is(tok::l_brace)) { 2029 CompoundStatementIndenter Indenter(this, Line->Level, 2030 Style.BraceWrapping.AfterCaseLabel, 2031 Style.BraceWrapping.IndentBraces); 2032 parseBlock(/*MustBeDeclaration=*/false); 2033 if (FormatTok->Tok.is(tok::kw_break)) { 2034 if (Style.BraceWrapping.AfterControlStatement == 2035 FormatStyle::BWACS_Always) 2036 addUnwrappedLine(); 2037 parseStructuralElement(); 2038 } 2039 addUnwrappedLine(); 2040 } else { 2041 if (FormatTok->is(tok::semi)) 2042 nextToken(); 2043 addUnwrappedLine(); 2044 } 2045 Line->Level = OldLineLevel; 2046 if (FormatTok->isNot(tok::l_brace)) { 2047 parseStructuralElement(); 2048 addUnwrappedLine(); 2049 } 2050 } 2051 2052 void UnwrappedLineParser::parseCaseLabel() { 2053 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2054 // FIXME: fix handling of complex expressions here. 2055 do { 2056 nextToken(); 2057 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2058 parseLabel(); 2059 } 2060 2061 void UnwrappedLineParser::parseSwitch() { 2062 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2063 nextToken(); 2064 if (FormatTok->Tok.is(tok::l_paren)) 2065 parseParens(); 2066 if (FormatTok->Tok.is(tok::l_brace)) { 2067 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2068 parseBlock(/*MustBeDeclaration=*/false); 2069 addUnwrappedLine(); 2070 } else { 2071 addUnwrappedLine(); 2072 ++Line->Level; 2073 parseStructuralElement(); 2074 --Line->Level; 2075 } 2076 } 2077 2078 void UnwrappedLineParser::parseAccessSpecifier() { 2079 nextToken(); 2080 // Understand Qt's slots. 2081 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2082 nextToken(); 2083 // Otherwise, we don't know what it is, and we'd better keep the next token. 2084 if (FormatTok->Tok.is(tok::colon)) 2085 nextToken(); 2086 addUnwrappedLine(); 2087 } 2088 2089 bool UnwrappedLineParser::parseEnum() { 2090 // Won't be 'enum' for NS_ENUMs. 2091 if (FormatTok->Tok.is(tok::kw_enum)) 2092 nextToken(); 2093 2094 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2095 // declarations. An "enum" keyword followed by a colon would be a syntax 2096 // error and thus assume it is just an identifier. 2097 if (Style.Language == FormatStyle::LK_JavaScript && 2098 FormatTok->isOneOf(tok::colon, tok::question)) 2099 return false; 2100 2101 // In protobuf, "enum" can be used as a field name. 2102 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2103 return false; 2104 2105 // Eat up enum class ... 2106 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2107 nextToken(); 2108 2109 while (FormatTok->Tok.getIdentifierInfo() || 2110 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2111 tok::greater, tok::comma, tok::question)) { 2112 nextToken(); 2113 // We can have macros or attributes in between 'enum' and the enum name. 2114 if (FormatTok->is(tok::l_paren)) 2115 parseParens(); 2116 if (FormatTok->is(tok::identifier)) { 2117 nextToken(); 2118 // If there are two identifiers in a row, this is likely an elaborate 2119 // return type. In Java, this can be "implements", etc. 2120 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2121 return false; 2122 } 2123 } 2124 2125 // Just a declaration or something is wrong. 2126 if (FormatTok->isNot(tok::l_brace)) 2127 return true; 2128 FormatTok->BlockKind = BK_Block; 2129 2130 if (Style.Language == FormatStyle::LK_Java) { 2131 // Java enums are different. 2132 parseJavaEnumBody(); 2133 return true; 2134 } 2135 if (Style.Language == FormatStyle::LK_Proto) { 2136 parseBlock(/*MustBeDeclaration=*/true); 2137 return true; 2138 } 2139 2140 // Parse enum body. 2141 nextToken(); 2142 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2143 if (HasError) { 2144 if (FormatTok->is(tok::semi)) 2145 nextToken(); 2146 addUnwrappedLine(); 2147 } 2148 return true; 2149 2150 // There is no addUnwrappedLine() here so that we fall through to parsing a 2151 // structural element afterwards. Thus, in "enum A {} n, m;", 2152 // "} n, m;" will end up in one unwrapped line. 2153 } 2154 2155 void UnwrappedLineParser::parseJavaEnumBody() { 2156 // Determine whether the enum is simple, i.e. does not have a semicolon or 2157 // constants with class bodies. Simple enums can be formatted like braced 2158 // lists, contracted to a single line, etc. 2159 unsigned StoredPosition = Tokens->getPosition(); 2160 bool IsSimple = true; 2161 FormatToken *Tok = Tokens->getNextToken(); 2162 while (Tok) { 2163 if (Tok->is(tok::r_brace)) 2164 break; 2165 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2166 IsSimple = false; 2167 break; 2168 } 2169 // FIXME: This will also mark enums with braces in the arguments to enum 2170 // constants as "not simple". This is probably fine in practice, though. 2171 Tok = Tokens->getNextToken(); 2172 } 2173 FormatTok = Tokens->setPosition(StoredPosition); 2174 2175 if (IsSimple) { 2176 nextToken(); 2177 parseBracedList(); 2178 addUnwrappedLine(); 2179 return; 2180 } 2181 2182 // Parse the body of a more complex enum. 2183 // First add a line for everything up to the "{". 2184 nextToken(); 2185 addUnwrappedLine(); 2186 ++Line->Level; 2187 2188 // Parse the enum constants. 2189 while (FormatTok) { 2190 if (FormatTok->is(tok::l_brace)) { 2191 // Parse the constant's class body. 2192 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2193 /*MunchSemi=*/false); 2194 } else if (FormatTok->is(tok::l_paren)) { 2195 parseParens(); 2196 } else if (FormatTok->is(tok::comma)) { 2197 nextToken(); 2198 addUnwrappedLine(); 2199 } else if (FormatTok->is(tok::semi)) { 2200 nextToken(); 2201 addUnwrappedLine(); 2202 break; 2203 } else if (FormatTok->is(tok::r_brace)) { 2204 addUnwrappedLine(); 2205 break; 2206 } else { 2207 nextToken(); 2208 } 2209 } 2210 2211 // Parse the class body after the enum's ";" if any. 2212 parseLevel(/*HasOpeningBrace=*/true); 2213 nextToken(); 2214 --Line->Level; 2215 addUnwrappedLine(); 2216 } 2217 2218 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2219 const FormatToken &InitialToken = *FormatTok; 2220 nextToken(); 2221 2222 // The actual identifier can be a nested name specifier, and in macros 2223 // it is often token-pasted. 2224 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2225 tok::kw___attribute, tok::kw___declspec, 2226 tok::kw_alignas) || 2227 ((Style.Language == FormatStyle::LK_Java || 2228 Style.Language == FormatStyle::LK_JavaScript) && 2229 FormatTok->isOneOf(tok::period, tok::comma))) { 2230 if (Style.Language == FormatStyle::LK_JavaScript && 2231 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2232 // JavaScript/TypeScript supports inline object types in 2233 // extends/implements positions: 2234 // class Foo implements {bar: number} { } 2235 nextToken(); 2236 if (FormatTok->is(tok::l_brace)) { 2237 tryToParseBracedList(); 2238 continue; 2239 } 2240 } 2241 bool IsNonMacroIdentifier = 2242 FormatTok->is(tok::identifier) && 2243 FormatTok->TokenText != FormatTok->TokenText.upper(); 2244 nextToken(); 2245 // We can have macros or attributes in between 'class' and the class name. 2246 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2247 parseParens(); 2248 } 2249 2250 // Note that parsing away template declarations here leads to incorrectly 2251 // accepting function declarations as record declarations. 2252 // In general, we cannot solve this problem. Consider: 2253 // class A<int> B() {} 2254 // which can be a function definition or a class definition when B() is a 2255 // macro. If we find enough real-world cases where this is a problem, we 2256 // can parse for the 'template' keyword in the beginning of the statement, 2257 // and thus rule out the record production in case there is no template 2258 // (this would still leave us with an ambiguity between template function 2259 // and class declarations). 2260 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2261 while (!eof()) { 2262 if (FormatTok->is(tok::l_brace)) { 2263 calculateBraceTypes(/*ExpectClassBody=*/true); 2264 if (!tryToParseBracedList()) 2265 break; 2266 } 2267 if (FormatTok->Tok.is(tok::semi)) 2268 return; 2269 nextToken(); 2270 } 2271 } 2272 if (FormatTok->Tok.is(tok::l_brace)) { 2273 if (ParseAsExpr) { 2274 parseChildBlock(); 2275 } else { 2276 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2277 addUnwrappedLine(); 2278 2279 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2280 /*MunchSemi=*/false); 2281 } 2282 } 2283 // There is no addUnwrappedLine() here so that we fall through to parsing a 2284 // structural element afterwards. Thus, in "class A {} n, m;", 2285 // "} n, m;" will end up in one unwrapped line. 2286 } 2287 2288 void UnwrappedLineParser::parseObjCMethod() { 2289 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2290 "'(' or identifier expected."); 2291 do { 2292 if (FormatTok->Tok.is(tok::semi)) { 2293 nextToken(); 2294 addUnwrappedLine(); 2295 return; 2296 } else if (FormatTok->Tok.is(tok::l_brace)) { 2297 if (Style.BraceWrapping.AfterFunction) 2298 addUnwrappedLine(); 2299 parseBlock(/*MustBeDeclaration=*/false); 2300 addUnwrappedLine(); 2301 return; 2302 } else { 2303 nextToken(); 2304 } 2305 } while (!eof()); 2306 } 2307 2308 void UnwrappedLineParser::parseObjCProtocolList() { 2309 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2310 do { 2311 nextToken(); 2312 // Early exit in case someone forgot a close angle. 2313 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2314 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2315 return; 2316 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2317 nextToken(); // Skip '>'. 2318 } 2319 2320 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2321 do { 2322 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2323 nextToken(); 2324 addUnwrappedLine(); 2325 break; 2326 } 2327 if (FormatTok->is(tok::l_brace)) { 2328 parseBlock(/*MustBeDeclaration=*/false); 2329 // In ObjC interfaces, nothing should be following the "}". 2330 addUnwrappedLine(); 2331 } else if (FormatTok->is(tok::r_brace)) { 2332 // Ignore stray "}". parseStructuralElement doesn't consume them. 2333 nextToken(); 2334 addUnwrappedLine(); 2335 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2336 nextToken(); 2337 parseObjCMethod(); 2338 } else { 2339 parseStructuralElement(); 2340 } 2341 } while (!eof()); 2342 } 2343 2344 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2345 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2346 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2347 nextToken(); 2348 nextToken(); // interface name 2349 2350 // @interface can be followed by a lightweight generic 2351 // specialization list, then either a base class or a category. 2352 if (FormatTok->Tok.is(tok::less)) { 2353 // Unlike protocol lists, generic parameterizations support 2354 // nested angles: 2355 // 2356 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2357 // NSObject <NSCopying, NSSecureCoding> 2358 // 2359 // so we need to count how many open angles we have left. 2360 unsigned NumOpenAngles = 1; 2361 do { 2362 nextToken(); 2363 // Early exit in case someone forgot a close angle. 2364 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2365 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2366 break; 2367 if (FormatTok->Tok.is(tok::less)) 2368 ++NumOpenAngles; 2369 else if (FormatTok->Tok.is(tok::greater)) { 2370 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2371 --NumOpenAngles; 2372 } 2373 } while (!eof() && NumOpenAngles != 0); 2374 nextToken(); // Skip '>'. 2375 } 2376 if (FormatTok->Tok.is(tok::colon)) { 2377 nextToken(); 2378 nextToken(); // base class name 2379 } else if (FormatTok->Tok.is(tok::l_paren)) 2380 // Skip category, if present. 2381 parseParens(); 2382 2383 if (FormatTok->Tok.is(tok::less)) 2384 parseObjCProtocolList(); 2385 2386 if (FormatTok->Tok.is(tok::l_brace)) { 2387 if (Style.BraceWrapping.AfterObjCDeclaration) 2388 addUnwrappedLine(); 2389 parseBlock(/*MustBeDeclaration=*/true); 2390 } 2391 2392 // With instance variables, this puts '}' on its own line. Without instance 2393 // variables, this ends the @interface line. 2394 addUnwrappedLine(); 2395 2396 parseObjCUntilAtEnd(); 2397 } 2398 2399 // Returns true for the declaration/definition form of @protocol, 2400 // false for the expression form. 2401 bool UnwrappedLineParser::parseObjCProtocol() { 2402 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2403 nextToken(); 2404 2405 if (FormatTok->is(tok::l_paren)) 2406 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2407 return false; 2408 2409 // The definition/declaration form, 2410 // @protocol Foo 2411 // - (int)someMethod; 2412 // @end 2413 2414 nextToken(); // protocol name 2415 2416 if (FormatTok->Tok.is(tok::less)) 2417 parseObjCProtocolList(); 2418 2419 // Check for protocol declaration. 2420 if (FormatTok->Tok.is(tok::semi)) { 2421 nextToken(); 2422 addUnwrappedLine(); 2423 return true; 2424 } 2425 2426 addUnwrappedLine(); 2427 parseObjCUntilAtEnd(); 2428 return true; 2429 } 2430 2431 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2432 bool IsImport = FormatTok->is(Keywords.kw_import); 2433 assert(IsImport || FormatTok->is(tok::kw_export)); 2434 nextToken(); 2435 2436 // Consume the "default" in "export default class/function". 2437 if (FormatTok->is(tok::kw_default)) 2438 nextToken(); 2439 2440 // Consume "async function", "function" and "default function", so that these 2441 // get parsed as free-standing JS functions, i.e. do not require a trailing 2442 // semicolon. 2443 if (FormatTok->is(Keywords.kw_async)) 2444 nextToken(); 2445 if (FormatTok->is(Keywords.kw_function)) { 2446 nextToken(); 2447 return; 2448 } 2449 2450 // For imports, `export *`, `export {...}`, consume the rest of the line up 2451 // to the terminating `;`. For everything else, just return and continue 2452 // parsing the structural element, i.e. the declaration or expression for 2453 // `export default`. 2454 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2455 !FormatTok->isStringLiteral()) 2456 return; 2457 2458 while (!eof()) { 2459 if (FormatTok->is(tok::semi)) 2460 return; 2461 if (Line->Tokens.empty()) { 2462 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2463 // import statement should terminate. 2464 return; 2465 } 2466 if (FormatTok->is(tok::l_brace)) { 2467 FormatTok->BlockKind = BK_Block; 2468 nextToken(); 2469 parseBracedList(); 2470 } else { 2471 nextToken(); 2472 } 2473 } 2474 } 2475 2476 void UnwrappedLineParser::parseStatementMacro() { 2477 nextToken(); 2478 if (FormatTok->is(tok::l_paren)) 2479 parseParens(); 2480 if (FormatTok->is(tok::semi)) 2481 nextToken(); 2482 addUnwrappedLine(); 2483 } 2484 2485 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2486 StringRef Prefix = "") { 2487 llvm::dbgs() << Prefix << "Line(" << Line.Level 2488 << ", FSC=" << Line.FirstStartColumn << ")" 2489 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2490 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2491 E = Line.Tokens.end(); 2492 I != E; ++I) { 2493 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2494 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2495 << "] "; 2496 } 2497 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2498 E = Line.Tokens.end(); 2499 I != E; ++I) { 2500 const UnwrappedLineNode &Node = *I; 2501 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2502 I = Node.Children.begin(), 2503 E = Node.Children.end(); 2504 I != E; ++I) { 2505 printDebugInfo(*I, "\nChild: "); 2506 } 2507 } 2508 llvm::dbgs() << "\n"; 2509 } 2510 2511 void UnwrappedLineParser::addUnwrappedLine() { 2512 if (Line->Tokens.empty()) 2513 return; 2514 LLVM_DEBUG({ 2515 if (CurrentLines == &Lines) 2516 printDebugInfo(*Line); 2517 }); 2518 CurrentLines->push_back(std::move(*Line)); 2519 Line->Tokens.clear(); 2520 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2521 Line->FirstStartColumn = 0; 2522 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2523 CurrentLines->append( 2524 std::make_move_iterator(PreprocessorDirectives.begin()), 2525 std::make_move_iterator(PreprocessorDirectives.end())); 2526 PreprocessorDirectives.clear(); 2527 } 2528 // Disconnect the current token from the last token on the previous line. 2529 FormatTok->Previous = nullptr; 2530 } 2531 2532 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2533 2534 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2535 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2536 FormatTok.NewlinesBefore > 0; 2537 } 2538 2539 // Checks if \p FormatTok is a line comment that continues the line comment 2540 // section on \p Line. 2541 static bool 2542 continuesLineCommentSection(const FormatToken &FormatTok, 2543 const UnwrappedLine &Line, 2544 const llvm::Regex &CommentPragmasRegex) { 2545 if (Line.Tokens.empty()) 2546 return false; 2547 2548 StringRef IndentContent = FormatTok.TokenText; 2549 if (FormatTok.TokenText.startswith("//") || 2550 FormatTok.TokenText.startswith("/*")) 2551 IndentContent = FormatTok.TokenText.substr(2); 2552 if (CommentPragmasRegex.match(IndentContent)) 2553 return false; 2554 2555 // If Line starts with a line comment, then FormatTok continues the comment 2556 // section if its original column is greater or equal to the original start 2557 // column of the line. 2558 // 2559 // Define the min column token of a line as follows: if a line ends in '{' or 2560 // contains a '{' followed by a line comment, then the min column token is 2561 // that '{'. Otherwise, the min column token of the line is the first token of 2562 // the line. 2563 // 2564 // If Line starts with a token other than a line comment, then FormatTok 2565 // continues the comment section if its original column is greater than the 2566 // original start column of the min column token of the line. 2567 // 2568 // For example, the second line comment continues the first in these cases: 2569 // 2570 // // first line 2571 // // second line 2572 // 2573 // and: 2574 // 2575 // // first line 2576 // // second line 2577 // 2578 // and: 2579 // 2580 // int i; // first line 2581 // // second line 2582 // 2583 // and: 2584 // 2585 // do { // first line 2586 // // second line 2587 // int i; 2588 // } while (true); 2589 // 2590 // and: 2591 // 2592 // enum { 2593 // a, // first line 2594 // // second line 2595 // b 2596 // }; 2597 // 2598 // The second line comment doesn't continue the first in these cases: 2599 // 2600 // // first line 2601 // // second line 2602 // 2603 // and: 2604 // 2605 // int i; // first line 2606 // // second line 2607 // 2608 // and: 2609 // 2610 // do { // first line 2611 // // second line 2612 // int i; 2613 // } while (true); 2614 // 2615 // and: 2616 // 2617 // enum { 2618 // a, // first line 2619 // // second line 2620 // }; 2621 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2622 2623 // Scan for '{//'. If found, use the column of '{' as a min column for line 2624 // comment section continuation. 2625 const FormatToken *PreviousToken = nullptr; 2626 for (const UnwrappedLineNode &Node : Line.Tokens) { 2627 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2628 isLineComment(*Node.Tok)) { 2629 MinColumnToken = PreviousToken; 2630 break; 2631 } 2632 PreviousToken = Node.Tok; 2633 2634 // Grab the last newline preceding a token in this unwrapped line. 2635 if (Node.Tok->NewlinesBefore > 0) { 2636 MinColumnToken = Node.Tok; 2637 } 2638 } 2639 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2640 MinColumnToken = PreviousToken; 2641 } 2642 2643 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2644 MinColumnToken); 2645 } 2646 2647 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2648 bool JustComments = Line->Tokens.empty(); 2649 for (SmallVectorImpl<FormatToken *>::const_iterator 2650 I = CommentsBeforeNextToken.begin(), 2651 E = CommentsBeforeNextToken.end(); 2652 I != E; ++I) { 2653 // Line comments that belong to the same line comment section are put on the 2654 // same line since later we might want to reflow content between them. 2655 // Additional fine-grained breaking of line comment sections is controlled 2656 // by the class BreakableLineCommentSection in case it is desirable to keep 2657 // several line comment sections in the same unwrapped line. 2658 // 2659 // FIXME: Consider putting separate line comment sections as children to the 2660 // unwrapped line instead. 2661 (*I)->ContinuesLineCommentSection = 2662 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2663 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2664 addUnwrappedLine(); 2665 pushToken(*I); 2666 } 2667 if (NewlineBeforeNext && JustComments) 2668 addUnwrappedLine(); 2669 CommentsBeforeNextToken.clear(); 2670 } 2671 2672 void UnwrappedLineParser::nextToken(int LevelDifference) { 2673 if (eof()) 2674 return; 2675 flushComments(isOnNewLine(*FormatTok)); 2676 pushToken(FormatTok); 2677 FormatToken *Previous = FormatTok; 2678 if (Style.Language != FormatStyle::LK_JavaScript) 2679 readToken(LevelDifference); 2680 else 2681 readTokenWithJavaScriptASI(); 2682 FormatTok->Previous = Previous; 2683 } 2684 2685 void UnwrappedLineParser::distributeComments( 2686 const SmallVectorImpl<FormatToken *> &Comments, 2687 const FormatToken *NextTok) { 2688 // Whether or not a line comment token continues a line is controlled by 2689 // the method continuesLineCommentSection, with the following caveat: 2690 // 2691 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2692 // that each comment line from the trail is aligned with the next token, if 2693 // the next token exists. If a trail exists, the beginning of the maximal 2694 // trail is marked as a start of a new comment section. 2695 // 2696 // For example in this code: 2697 // 2698 // int a; // line about a 2699 // // line 1 about b 2700 // // line 2 about b 2701 // int b; 2702 // 2703 // the two lines about b form a maximal trail, so there are two sections, the 2704 // first one consisting of the single comment "// line about a" and the 2705 // second one consisting of the next two comments. 2706 if (Comments.empty()) 2707 return; 2708 bool ShouldPushCommentsInCurrentLine = true; 2709 bool HasTrailAlignedWithNextToken = false; 2710 unsigned StartOfTrailAlignedWithNextToken = 0; 2711 if (NextTok) { 2712 // We are skipping the first element intentionally. 2713 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2714 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2715 HasTrailAlignedWithNextToken = true; 2716 StartOfTrailAlignedWithNextToken = i; 2717 } 2718 } 2719 } 2720 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2721 FormatToken *FormatTok = Comments[i]; 2722 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2723 FormatTok->ContinuesLineCommentSection = false; 2724 } else { 2725 FormatTok->ContinuesLineCommentSection = 2726 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2727 } 2728 if (!FormatTok->ContinuesLineCommentSection && 2729 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2730 ShouldPushCommentsInCurrentLine = false; 2731 } 2732 if (ShouldPushCommentsInCurrentLine) { 2733 pushToken(FormatTok); 2734 } else { 2735 CommentsBeforeNextToken.push_back(FormatTok); 2736 } 2737 } 2738 } 2739 2740 void UnwrappedLineParser::readToken(int LevelDifference) { 2741 SmallVector<FormatToken *, 1> Comments; 2742 do { 2743 FormatTok = Tokens->getNextToken(); 2744 assert(FormatTok); 2745 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2746 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2747 distributeComments(Comments, FormatTok); 2748 Comments.clear(); 2749 // If there is an unfinished unwrapped line, we flush the preprocessor 2750 // directives only after that unwrapped line was finished later. 2751 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2752 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2753 assert((LevelDifference >= 0 || 2754 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2755 "LevelDifference makes Line->Level negative"); 2756 Line->Level += LevelDifference; 2757 // Comments stored before the preprocessor directive need to be output 2758 // before the preprocessor directive, at the same level as the 2759 // preprocessor directive, as we consider them to apply to the directive. 2760 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 2761 PPBranchLevel > 0) 2762 Line->Level += PPBranchLevel; 2763 flushComments(isOnNewLine(*FormatTok)); 2764 parsePPDirective(); 2765 } 2766 while (FormatTok->Type == TT_ConflictStart || 2767 FormatTok->Type == TT_ConflictEnd || 2768 FormatTok->Type == TT_ConflictAlternative) { 2769 if (FormatTok->Type == TT_ConflictStart) { 2770 conditionalCompilationStart(/*Unreachable=*/false); 2771 } else if (FormatTok->Type == TT_ConflictAlternative) { 2772 conditionalCompilationAlternative(); 2773 } else if (FormatTok->Type == TT_ConflictEnd) { 2774 conditionalCompilationEnd(); 2775 } 2776 FormatTok = Tokens->getNextToken(); 2777 FormatTok->MustBreakBefore = true; 2778 } 2779 2780 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2781 !Line->InPPDirective) { 2782 continue; 2783 } 2784 2785 if (!FormatTok->Tok.is(tok::comment)) { 2786 distributeComments(Comments, FormatTok); 2787 Comments.clear(); 2788 return; 2789 } 2790 2791 Comments.push_back(FormatTok); 2792 } while (!eof()); 2793 2794 distributeComments(Comments, nullptr); 2795 Comments.clear(); 2796 } 2797 2798 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2799 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2800 if (MustBreakBeforeNextToken) { 2801 Line->Tokens.back().Tok->MustBreakBefore = true; 2802 MustBreakBeforeNextToken = false; 2803 } 2804 } 2805 2806 } // end namespace format 2807 } // end namespace clang 2808