1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 176 if (Style.BraceWrapping.AfterControlStatement) 177 Parser->addUnwrappedLine(); 178 if (Style.BraceWrapping.IndentBraces) 179 ++LineLevel; 180 } 181 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 182 183 private: 184 unsigned &LineLevel; 185 unsigned OldLineLevel; 186 }; 187 188 namespace { 189 190 class IndexedTokenSource : public FormatTokenSource { 191 public: 192 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 193 : Tokens(Tokens), Position(-1) {} 194 195 FormatToken *getNextToken() override { 196 ++Position; 197 return Tokens[Position]; 198 } 199 200 unsigned getPosition() override { 201 assert(Position >= 0); 202 return Position; 203 } 204 205 FormatToken *setPosition(unsigned P) override { 206 Position = P; 207 return Tokens[Position]; 208 } 209 210 void reset() { Position = -1; } 211 212 private: 213 ArrayRef<FormatToken *> Tokens; 214 int Position; 215 }; 216 217 } // end anonymous namespace 218 219 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 220 const AdditionalKeywords &Keywords, 221 unsigned FirstStartColumn, 222 ArrayRef<FormatToken *> Tokens, 223 UnwrappedLineConsumer &Callback) 224 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 225 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 226 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 227 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 228 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 229 ? IG_Rejected 230 : IG_Inited), 231 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 232 233 void UnwrappedLineParser::reset() { 234 PPBranchLevel = -1; 235 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 236 ? IG_Rejected 237 : IG_Inited; 238 IncludeGuardToken = nullptr; 239 Line.reset(new UnwrappedLine); 240 CommentsBeforeNextToken.clear(); 241 FormatTok = nullptr; 242 MustBreakBeforeNextToken = false; 243 PreprocessorDirectives.clear(); 244 CurrentLines = &Lines; 245 DeclarationScopeStack.clear(); 246 PPStack.clear(); 247 Line->FirstStartColumn = FirstStartColumn; 248 } 249 250 void UnwrappedLineParser::parse() { 251 IndexedTokenSource TokenSource(AllTokens); 252 Line->FirstStartColumn = FirstStartColumn; 253 do { 254 LLVM_DEBUG(llvm::dbgs() << "----\n"); 255 reset(); 256 Tokens = &TokenSource; 257 TokenSource.reset(); 258 259 readToken(); 260 parseFile(); 261 262 // If we found an include guard then all preprocessor directives (other than 263 // the guard) are over-indented by one. 264 if (IncludeGuard == IG_Found) 265 for (auto &Line : Lines) 266 if (Line.InPPDirective && Line.Level > 0) 267 --Line.Level; 268 269 // Create line with eof token. 270 pushToken(FormatTok); 271 addUnwrappedLine(); 272 273 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 274 E = Lines.end(); 275 I != E; ++I) { 276 Callback.consumeUnwrappedLine(*I); 277 } 278 Callback.finishRun(); 279 Lines.clear(); 280 while (!PPLevelBranchIndex.empty() && 281 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 282 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 283 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 284 } 285 if (!PPLevelBranchIndex.empty()) { 286 ++PPLevelBranchIndex.back(); 287 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 288 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 289 } 290 } while (!PPLevelBranchIndex.empty()); 291 } 292 293 void UnwrappedLineParser::parseFile() { 294 // The top-level context in a file always has declarations, except for pre- 295 // processor directives and JavaScript files. 296 bool MustBeDeclaration = 297 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 298 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 299 MustBeDeclaration); 300 if (Style.Language == FormatStyle::LK_TextProto) 301 parseBracedList(); 302 else 303 parseLevel(/*HasOpeningBrace=*/false); 304 // Make sure to format the remaining tokens. 305 // 306 // LK_TextProto is special since its top-level is parsed as the body of a 307 // braced list, which does not necessarily have natural line separators such 308 // as a semicolon. Comments after the last entry that have been determined to 309 // not belong to that line, as in: 310 // key: value 311 // // endfile comment 312 // do not have a chance to be put on a line of their own until this point. 313 // Here we add this newline before end-of-file comments. 314 if (Style.Language == FormatStyle::LK_TextProto && 315 !CommentsBeforeNextToken.empty()) 316 addUnwrappedLine(); 317 flushComments(true); 318 addUnwrappedLine(); 319 } 320 321 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 322 bool SwitchLabelEncountered = false; 323 do { 324 tok::TokenKind kind = FormatTok->Tok.getKind(); 325 if (FormatTok->Type == TT_MacroBlockBegin) { 326 kind = tok::l_brace; 327 } else if (FormatTok->Type == TT_MacroBlockEnd) { 328 kind = tok::r_brace; 329 } 330 331 switch (kind) { 332 case tok::comment: 333 nextToken(); 334 addUnwrappedLine(); 335 break; 336 case tok::l_brace: 337 // FIXME: Add parameter whether this can happen - if this happens, we must 338 // be in a non-declaration context. 339 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 340 continue; 341 parseBlock(/*MustBeDeclaration=*/false); 342 addUnwrappedLine(); 343 break; 344 case tok::r_brace: 345 if (HasOpeningBrace) 346 return; 347 nextToken(); 348 addUnwrappedLine(); 349 break; 350 case tok::kw_default: { 351 unsigned StoredPosition = Tokens->getPosition(); 352 FormatToken *Next; 353 do { 354 Next = Tokens->getNextToken(); 355 } while (Next && Next->is(tok::comment)); 356 FormatTok = Tokens->setPosition(StoredPosition); 357 if (Next && Next->isNot(tok::colon)) { 358 // default not followed by ':' is not a case label; treat it like 359 // an identifier. 360 parseStructuralElement(); 361 break; 362 } 363 // Else, if it is 'default:', fall through to the case handling. 364 LLVM_FALLTHROUGH; 365 } 366 case tok::kw_case: 367 if (Style.Language == FormatStyle::LK_JavaScript && 368 Line->MustBeDeclaration) { 369 // A 'case: string' style field declaration. 370 parseStructuralElement(); 371 break; 372 } 373 if (!SwitchLabelEncountered && 374 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 375 ++Line->Level; 376 SwitchLabelEncountered = true; 377 parseStructuralElement(); 378 break; 379 default: 380 parseStructuralElement(); 381 break; 382 } 383 } while (!eof()); 384 } 385 386 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 387 // We'll parse forward through the tokens until we hit 388 // a closing brace or eof - note that getNextToken() will 389 // parse macros, so this will magically work inside macro 390 // definitions, too. 391 unsigned StoredPosition = Tokens->getPosition(); 392 FormatToken *Tok = FormatTok; 393 const FormatToken *PrevTok = Tok->Previous; 394 // Keep a stack of positions of lbrace tokens. We will 395 // update information about whether an lbrace starts a 396 // braced init list or a different block during the loop. 397 SmallVector<FormatToken *, 8> LBraceStack; 398 assert(Tok->Tok.is(tok::l_brace)); 399 do { 400 // Get next non-comment token. 401 FormatToken *NextTok; 402 unsigned ReadTokens = 0; 403 do { 404 NextTok = Tokens->getNextToken(); 405 ++ReadTokens; 406 } while (NextTok->is(tok::comment)); 407 408 switch (Tok->Tok.getKind()) { 409 case tok::l_brace: 410 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 411 if (PrevTok->isOneOf(tok::colon, tok::less)) 412 // A ':' indicates this code is in a type, or a braced list 413 // following a label in an object literal ({a: {b: 1}}). 414 // A '<' could be an object used in a comparison, but that is nonsense 415 // code (can never return true), so more likely it is a generic type 416 // argument (`X<{a: string; b: number}>`). 417 // The code below could be confused by semicolons between the 418 // individual members in a type member list, which would normally 419 // trigger BK_Block. In both cases, this must be parsed as an inline 420 // braced init. 421 Tok->BlockKind = BK_BracedInit; 422 else if (PrevTok->is(tok::r_paren)) 423 // `) { }` can only occur in function or method declarations in JS. 424 Tok->BlockKind = BK_Block; 425 } else { 426 Tok->BlockKind = BK_Unknown; 427 } 428 LBraceStack.push_back(Tok); 429 break; 430 case tok::r_brace: 431 if (LBraceStack.empty()) 432 break; 433 if (LBraceStack.back()->BlockKind == BK_Unknown) { 434 bool ProbablyBracedList = false; 435 if (Style.Language == FormatStyle::LK_Proto) { 436 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 437 } else { 438 // Using OriginalColumn to distinguish between ObjC methods and 439 // binary operators is a bit hacky. 440 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 441 NextTok->OriginalColumn == 0; 442 443 // If there is a comma, semicolon or right paren after the closing 444 // brace, we assume this is a braced initializer list. Note that 445 // regardless how we mark inner braces here, we will overwrite the 446 // BlockKind later if we parse a braced list (where all blocks 447 // inside are by default braced lists), or when we explicitly detect 448 // blocks (for example while parsing lambdas). 449 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 450 // braced list in JS. 451 ProbablyBracedList = 452 (Style.Language == FormatStyle::LK_JavaScript && 453 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 454 Keywords.kw_as)) || 455 (Style.isCpp() && NextTok->is(tok::l_paren)) || 456 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 457 tok::r_paren, tok::r_square, tok::l_brace, 458 tok::ellipsis) || 459 (NextTok->is(tok::identifier) && 460 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 461 (NextTok->is(tok::semi) && 462 (!ExpectClassBody || LBraceStack.size() != 1)) || 463 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 464 if (NextTok->is(tok::l_square)) { 465 // We can have an array subscript after a braced init 466 // list, but C++11 attributes are expected after blocks. 467 NextTok = Tokens->getNextToken(); 468 ++ReadTokens; 469 ProbablyBracedList = NextTok->isNot(tok::l_square); 470 } 471 } 472 if (ProbablyBracedList) { 473 Tok->BlockKind = BK_BracedInit; 474 LBraceStack.back()->BlockKind = BK_BracedInit; 475 } else { 476 Tok->BlockKind = BK_Block; 477 LBraceStack.back()->BlockKind = BK_Block; 478 } 479 } 480 LBraceStack.pop_back(); 481 break; 482 case tok::identifier: 483 if (!Tok->is(TT_StatementMacro)) 484 break; 485 LLVM_FALLTHROUGH; 486 case tok::at: 487 case tok::semi: 488 case tok::kw_if: 489 case tok::kw_while: 490 case tok::kw_for: 491 case tok::kw_switch: 492 case tok::kw_try: 493 case tok::kw___try: 494 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 495 LBraceStack.back()->BlockKind = BK_Block; 496 break; 497 default: 498 break; 499 } 500 PrevTok = Tok; 501 Tok = NextTok; 502 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 503 504 // Assume other blocks for all unclosed opening braces. 505 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 506 if (LBraceStack[i]->BlockKind == BK_Unknown) 507 LBraceStack[i]->BlockKind = BK_Block; 508 } 509 510 FormatTok = Tokens->setPosition(StoredPosition); 511 } 512 513 template <class T> 514 static inline void hash_combine(std::size_t &seed, const T &v) { 515 std::hash<T> hasher; 516 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 517 } 518 519 size_t UnwrappedLineParser::computePPHash() const { 520 size_t h = 0; 521 for (const auto &i : PPStack) { 522 hash_combine(h, size_t(i.Kind)); 523 hash_combine(h, i.Line); 524 } 525 return h; 526 } 527 528 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 529 bool MunchSemi) { 530 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 531 "'{' or macro block token expected"); 532 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 533 FormatTok->BlockKind = BK_Block; 534 535 size_t PPStartHash = computePPHash(); 536 537 unsigned InitialLevel = Line->Level; 538 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 539 540 if (MacroBlock && FormatTok->is(tok::l_paren)) 541 parseParens(); 542 543 size_t NbPreprocessorDirectives = 544 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 545 addUnwrappedLine(); 546 size_t OpeningLineIndex = 547 CurrentLines->empty() 548 ? (UnwrappedLine::kInvalidIndex) 549 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 550 551 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 552 MustBeDeclaration); 553 if (AddLevel) 554 ++Line->Level; 555 parseLevel(/*HasOpeningBrace=*/true); 556 557 if (eof()) 558 return; 559 560 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 561 : !FormatTok->is(tok::r_brace)) { 562 Line->Level = InitialLevel; 563 FormatTok->BlockKind = BK_Block; 564 return; 565 } 566 567 size_t PPEndHash = computePPHash(); 568 569 // Munch the closing brace. 570 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 571 572 if (MacroBlock && FormatTok->is(tok::l_paren)) 573 parseParens(); 574 575 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 576 nextToken(); 577 Line->Level = InitialLevel; 578 579 if (PPStartHash == PPEndHash) { 580 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 581 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 582 // Update the opening line to add the forward reference as well 583 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 584 CurrentLines->size() - 1; 585 } 586 } 587 } 588 589 static bool isGoogScope(const UnwrappedLine &Line) { 590 // FIXME: Closure-library specific stuff should not be hard-coded but be 591 // configurable. 592 if (Line.Tokens.size() < 4) 593 return false; 594 auto I = Line.Tokens.begin(); 595 if (I->Tok->TokenText != "goog") 596 return false; 597 ++I; 598 if (I->Tok->isNot(tok::period)) 599 return false; 600 ++I; 601 if (I->Tok->TokenText != "scope") 602 return false; 603 ++I; 604 return I->Tok->is(tok::l_paren); 605 } 606 607 static bool isIIFE(const UnwrappedLine &Line, 608 const AdditionalKeywords &Keywords) { 609 // Look for the start of an immediately invoked anonymous function. 610 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 611 // This is commonly done in JavaScript to create a new, anonymous scope. 612 // Example: (function() { ... })() 613 if (Line.Tokens.size() < 3) 614 return false; 615 auto I = Line.Tokens.begin(); 616 if (I->Tok->isNot(tok::l_paren)) 617 return false; 618 ++I; 619 if (I->Tok->isNot(Keywords.kw_function)) 620 return false; 621 ++I; 622 return I->Tok->is(tok::l_paren); 623 } 624 625 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 626 const FormatToken &InitialToken) { 627 if (InitialToken.is(tok::kw_namespace)) 628 return Style.BraceWrapping.AfterNamespace; 629 if (InitialToken.is(tok::kw_class)) 630 return Style.BraceWrapping.AfterClass; 631 if (InitialToken.is(tok::kw_union)) 632 return Style.BraceWrapping.AfterUnion; 633 if (InitialToken.is(tok::kw_struct)) 634 return Style.BraceWrapping.AfterStruct; 635 return false; 636 } 637 638 void UnwrappedLineParser::parseChildBlock() { 639 FormatTok->BlockKind = BK_Block; 640 nextToken(); 641 { 642 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 643 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 644 ScopedLineState LineState(*this); 645 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 646 /*MustBeDeclaration=*/false); 647 Line->Level += SkipIndent ? 0 : 1; 648 parseLevel(/*HasOpeningBrace=*/true); 649 flushComments(isOnNewLine(*FormatTok)); 650 Line->Level -= SkipIndent ? 0 : 1; 651 } 652 nextToken(); 653 } 654 655 void UnwrappedLineParser::parsePPDirective() { 656 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 657 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 658 nextToken(); 659 660 if (!FormatTok->Tok.getIdentifierInfo()) { 661 parsePPUnknown(); 662 return; 663 } 664 665 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 666 case tok::pp_define: 667 parsePPDefine(); 668 return; 669 case tok::pp_if: 670 parsePPIf(/*IfDef=*/false); 671 break; 672 case tok::pp_ifdef: 673 case tok::pp_ifndef: 674 parsePPIf(/*IfDef=*/true); 675 break; 676 case tok::pp_else: 677 parsePPElse(); 678 break; 679 case tok::pp_elif: 680 parsePPElIf(); 681 break; 682 case tok::pp_endif: 683 parsePPEndIf(); 684 break; 685 default: 686 parsePPUnknown(); 687 break; 688 } 689 } 690 691 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 692 size_t Line = CurrentLines->size(); 693 if (CurrentLines == &PreprocessorDirectives) 694 Line += Lines.size(); 695 696 if (Unreachable || 697 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 698 PPStack.push_back({PP_Unreachable, Line}); 699 else 700 PPStack.push_back({PP_Conditional, Line}); 701 } 702 703 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 704 ++PPBranchLevel; 705 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 706 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 707 PPLevelBranchIndex.push_back(0); 708 PPLevelBranchCount.push_back(0); 709 } 710 PPChainBranchIndex.push(0); 711 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 712 conditionalCompilationCondition(Unreachable || Skip); 713 } 714 715 void UnwrappedLineParser::conditionalCompilationAlternative() { 716 if (!PPStack.empty()) 717 PPStack.pop_back(); 718 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 719 if (!PPChainBranchIndex.empty()) 720 ++PPChainBranchIndex.top(); 721 conditionalCompilationCondition( 722 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 723 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 724 } 725 726 void UnwrappedLineParser::conditionalCompilationEnd() { 727 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 728 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 729 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 730 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 731 } 732 } 733 // Guard against #endif's without #if. 734 if (PPBranchLevel > -1) 735 --PPBranchLevel; 736 if (!PPChainBranchIndex.empty()) 737 PPChainBranchIndex.pop(); 738 if (!PPStack.empty()) 739 PPStack.pop_back(); 740 } 741 742 void UnwrappedLineParser::parsePPIf(bool IfDef) { 743 bool IfNDef = FormatTok->is(tok::pp_ifndef); 744 nextToken(); 745 bool Unreachable = false; 746 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 747 Unreachable = true; 748 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 749 Unreachable = true; 750 conditionalCompilationStart(Unreachable); 751 FormatToken *IfCondition = FormatTok; 752 // If there's a #ifndef on the first line, and the only lines before it are 753 // comments, it could be an include guard. 754 bool MaybeIncludeGuard = IfNDef; 755 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 756 for (auto &Line : Lines) { 757 if (!Line.Tokens.front().Tok->is(tok::comment)) { 758 MaybeIncludeGuard = false; 759 IncludeGuard = IG_Rejected; 760 break; 761 } 762 } 763 --PPBranchLevel; 764 parsePPUnknown(); 765 ++PPBranchLevel; 766 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 767 IncludeGuard = IG_IfNdefed; 768 IncludeGuardToken = IfCondition; 769 } 770 } 771 772 void UnwrappedLineParser::parsePPElse() { 773 // If a potential include guard has an #else, it's not an include guard. 774 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 775 IncludeGuard = IG_Rejected; 776 conditionalCompilationAlternative(); 777 if (PPBranchLevel > -1) 778 --PPBranchLevel; 779 parsePPUnknown(); 780 ++PPBranchLevel; 781 } 782 783 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 784 785 void UnwrappedLineParser::parsePPEndIf() { 786 conditionalCompilationEnd(); 787 parsePPUnknown(); 788 // If the #endif of a potential include guard is the last thing in the file, 789 // then we found an include guard. 790 unsigned TokenPosition = Tokens->getPosition(); 791 FormatToken *PeekNext = AllTokens[TokenPosition]; 792 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 793 PeekNext->is(tok::eof) && 794 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 795 IncludeGuard = IG_Found; 796 } 797 798 void UnwrappedLineParser::parsePPDefine() { 799 nextToken(); 800 801 if (FormatTok->Tok.getKind() != tok::identifier) { 802 IncludeGuard = IG_Rejected; 803 IncludeGuardToken = nullptr; 804 parsePPUnknown(); 805 return; 806 } 807 808 if (IncludeGuard == IG_IfNdefed && 809 IncludeGuardToken->TokenText == FormatTok->TokenText) { 810 IncludeGuard = IG_Defined; 811 IncludeGuardToken = nullptr; 812 for (auto &Line : Lines) { 813 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 814 IncludeGuard = IG_Rejected; 815 break; 816 } 817 } 818 } 819 820 nextToken(); 821 if (FormatTok->Tok.getKind() == tok::l_paren && 822 FormatTok->WhitespaceRange.getBegin() == 823 FormatTok->WhitespaceRange.getEnd()) { 824 parseParens(); 825 } 826 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 827 Line->Level += PPBranchLevel + 1; 828 addUnwrappedLine(); 829 ++Line->Level; 830 831 // Errors during a preprocessor directive can only affect the layout of the 832 // preprocessor directive, and thus we ignore them. An alternative approach 833 // would be to use the same approach we use on the file level (no 834 // re-indentation if there was a structural error) within the macro 835 // definition. 836 parseFile(); 837 } 838 839 void UnwrappedLineParser::parsePPUnknown() { 840 do { 841 nextToken(); 842 } while (!eof()); 843 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 844 Line->Level += PPBranchLevel + 1; 845 addUnwrappedLine(); 846 } 847 848 // Here we blacklist certain tokens that are not usually the first token in an 849 // unwrapped line. This is used in attempt to distinguish macro calls without 850 // trailing semicolons from other constructs split to several lines. 851 static bool tokenCanStartNewLine(const clang::Token &Tok) { 852 // Semicolon can be a null-statement, l_square can be a start of a macro or 853 // a C++11 attribute, but this doesn't seem to be common. 854 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 855 Tok.isNot(tok::l_square) && 856 // Tokens that can only be used as binary operators and a part of 857 // overloaded operator names. 858 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 859 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 860 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 861 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 862 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 863 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 864 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 865 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 866 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 867 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 868 Tok.isNot(tok::lesslessequal) && 869 // Colon is used in labels, base class lists, initializer lists, 870 // range-based for loops, ternary operator, but should never be the 871 // first token in an unwrapped line. 872 Tok.isNot(tok::colon) && 873 // 'noexcept' is a trailing annotation. 874 Tok.isNot(tok::kw_noexcept); 875 } 876 877 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 878 const FormatToken *FormatTok) { 879 // FIXME: This returns true for C/C++ keywords like 'struct'. 880 return FormatTok->is(tok::identifier) && 881 (FormatTok->Tok.getIdentifierInfo() == nullptr || 882 !FormatTok->isOneOf( 883 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 884 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 885 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 886 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 887 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 888 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 889 Keywords.kw_from)); 890 } 891 892 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 893 const FormatToken *FormatTok) { 894 return FormatTok->Tok.isLiteral() || 895 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 896 mustBeJSIdent(Keywords, FormatTok); 897 } 898 899 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 900 // when encountered after a value (see mustBeJSIdentOrValue). 901 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 902 const FormatToken *FormatTok) { 903 return FormatTok->isOneOf( 904 tok::kw_return, Keywords.kw_yield, 905 // conditionals 906 tok::kw_if, tok::kw_else, 907 // loops 908 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 909 // switch/case 910 tok::kw_switch, tok::kw_case, 911 // exceptions 912 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 913 // declaration 914 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 915 Keywords.kw_async, Keywords.kw_function, 916 // import/export 917 Keywords.kw_import, tok::kw_export); 918 } 919 920 // readTokenWithJavaScriptASI reads the next token and terminates the current 921 // line if JavaScript Automatic Semicolon Insertion must 922 // happen between the current token and the next token. 923 // 924 // This method is conservative - it cannot cover all edge cases of JavaScript, 925 // but only aims to correctly handle certain well known cases. It *must not* 926 // return true in speculative cases. 927 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 928 FormatToken *Previous = FormatTok; 929 readToken(); 930 FormatToken *Next = FormatTok; 931 932 bool IsOnSameLine = 933 CommentsBeforeNextToken.empty() 934 ? Next->NewlinesBefore == 0 935 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 936 if (IsOnSameLine) 937 return; 938 939 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 940 bool PreviousStartsTemplateExpr = 941 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 942 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 943 // If the line contains an '@' sign, the previous token might be an 944 // annotation, which can precede another identifier/value. 945 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 946 [](UnwrappedLineNode &LineNode) { 947 return LineNode.Tok->is(tok::at); 948 }) != Line->Tokens.end(); 949 if (HasAt) 950 return; 951 } 952 if (Next->is(tok::exclaim) && PreviousMustBeValue) 953 return addUnwrappedLine(); 954 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 955 bool NextEndsTemplateExpr = 956 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 957 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 958 (PreviousMustBeValue || 959 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 960 tok::minusminus))) 961 return addUnwrappedLine(); 962 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 963 isJSDeclOrStmt(Keywords, Next)) 964 return addUnwrappedLine(); 965 } 966 967 void UnwrappedLineParser::parseStructuralElement() { 968 assert(!FormatTok->is(tok::l_brace)); 969 if (Style.Language == FormatStyle::LK_TableGen && 970 FormatTok->is(tok::pp_include)) { 971 nextToken(); 972 if (FormatTok->is(tok::string_literal)) 973 nextToken(); 974 addUnwrappedLine(); 975 return; 976 } 977 switch (FormatTok->Tok.getKind()) { 978 case tok::kw_asm: 979 nextToken(); 980 if (FormatTok->is(tok::l_brace)) { 981 FormatTok->Type = TT_InlineASMBrace; 982 nextToken(); 983 while (FormatTok && FormatTok->isNot(tok::eof)) { 984 if (FormatTok->is(tok::r_brace)) { 985 FormatTok->Type = TT_InlineASMBrace; 986 nextToken(); 987 addUnwrappedLine(); 988 break; 989 } 990 FormatTok->Finalized = true; 991 nextToken(); 992 } 993 } 994 break; 995 case tok::kw_namespace: 996 parseNamespace(); 997 return; 998 case tok::kw_public: 999 case tok::kw_protected: 1000 case tok::kw_private: 1001 if (Style.Language == FormatStyle::LK_Java || 1002 Style.Language == FormatStyle::LK_JavaScript) 1003 nextToken(); 1004 else 1005 parseAccessSpecifier(); 1006 return; 1007 case tok::kw_if: 1008 parseIfThenElse(); 1009 return; 1010 case tok::kw_for: 1011 case tok::kw_while: 1012 parseForOrWhileLoop(); 1013 return; 1014 case tok::kw_do: 1015 parseDoWhile(); 1016 return; 1017 case tok::kw_switch: 1018 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1019 // 'switch: string' field declaration. 1020 break; 1021 parseSwitch(); 1022 return; 1023 case tok::kw_default: 1024 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1025 // 'default: string' field declaration. 1026 break; 1027 nextToken(); 1028 if (FormatTok->is(tok::colon)) { 1029 parseLabel(); 1030 return; 1031 } 1032 // e.g. "default void f() {}" in a Java interface. 1033 break; 1034 case tok::kw_case: 1035 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1036 // 'case: string' field declaration. 1037 break; 1038 parseCaseLabel(); 1039 return; 1040 case tok::kw_try: 1041 case tok::kw___try: 1042 parseTryCatch(); 1043 return; 1044 case tok::kw_extern: 1045 nextToken(); 1046 if (FormatTok->Tok.is(tok::string_literal)) { 1047 nextToken(); 1048 if (FormatTok->Tok.is(tok::l_brace)) { 1049 if (Style.BraceWrapping.AfterExternBlock) { 1050 addUnwrappedLine(); 1051 parseBlock(/*MustBeDeclaration=*/true); 1052 } else { 1053 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1054 } 1055 addUnwrappedLine(); 1056 return; 1057 } 1058 } 1059 break; 1060 case tok::kw_export: 1061 if (Style.Language == FormatStyle::LK_JavaScript) { 1062 parseJavaScriptEs6ImportExport(); 1063 return; 1064 } 1065 if (!Style.isCpp()) 1066 break; 1067 // Handle C++ "(inline|export) namespace". 1068 LLVM_FALLTHROUGH; 1069 case tok::kw_inline: 1070 nextToken(); 1071 if (FormatTok->Tok.is(tok::kw_namespace)) { 1072 parseNamespace(); 1073 return; 1074 } 1075 break; 1076 case tok::identifier: 1077 if (FormatTok->is(TT_ForEachMacro)) { 1078 parseForOrWhileLoop(); 1079 return; 1080 } 1081 if (FormatTok->is(TT_MacroBlockBegin)) { 1082 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1083 /*MunchSemi=*/false); 1084 return; 1085 } 1086 if (FormatTok->is(Keywords.kw_import)) { 1087 if (Style.Language == FormatStyle::LK_JavaScript) { 1088 parseJavaScriptEs6ImportExport(); 1089 return; 1090 } 1091 if (Style.Language == FormatStyle::LK_Proto) { 1092 nextToken(); 1093 if (FormatTok->is(tok::kw_public)) 1094 nextToken(); 1095 if (!FormatTok->is(tok::string_literal)) 1096 return; 1097 nextToken(); 1098 if (FormatTok->is(tok::semi)) 1099 nextToken(); 1100 addUnwrappedLine(); 1101 return; 1102 } 1103 } 1104 if (Style.isCpp() && 1105 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1106 Keywords.kw_slots, Keywords.kw_qslots)) { 1107 nextToken(); 1108 if (FormatTok->is(tok::colon)) { 1109 nextToken(); 1110 addUnwrappedLine(); 1111 return; 1112 } 1113 } 1114 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1115 parseStatementMacro(); 1116 return; 1117 } 1118 // In all other cases, parse the declaration. 1119 break; 1120 default: 1121 break; 1122 } 1123 do { 1124 const FormatToken *Previous = FormatTok->Previous; 1125 switch (FormatTok->Tok.getKind()) { 1126 case tok::at: 1127 nextToken(); 1128 if (FormatTok->Tok.is(tok::l_brace)) { 1129 nextToken(); 1130 parseBracedList(); 1131 break; 1132 } else if (Style.Language == FormatStyle::LK_Java && 1133 FormatTok->is(Keywords.kw_interface)) { 1134 nextToken(); 1135 break; 1136 } 1137 switch (FormatTok->Tok.getObjCKeywordID()) { 1138 case tok::objc_public: 1139 case tok::objc_protected: 1140 case tok::objc_package: 1141 case tok::objc_private: 1142 return parseAccessSpecifier(); 1143 case tok::objc_interface: 1144 case tok::objc_implementation: 1145 return parseObjCInterfaceOrImplementation(); 1146 case tok::objc_protocol: 1147 if (parseObjCProtocol()) 1148 return; 1149 break; 1150 case tok::objc_end: 1151 return; // Handled by the caller. 1152 case tok::objc_optional: 1153 case tok::objc_required: 1154 nextToken(); 1155 addUnwrappedLine(); 1156 return; 1157 case tok::objc_autoreleasepool: 1158 nextToken(); 1159 if (FormatTok->Tok.is(tok::l_brace)) { 1160 if (Style.BraceWrapping.AfterControlStatement) 1161 addUnwrappedLine(); 1162 parseBlock(/*MustBeDeclaration=*/false); 1163 } 1164 addUnwrappedLine(); 1165 return; 1166 case tok::objc_synchronized: 1167 nextToken(); 1168 if (FormatTok->Tok.is(tok::l_paren)) 1169 // Skip synchronization object 1170 parseParens(); 1171 if (FormatTok->Tok.is(tok::l_brace)) { 1172 if (Style.BraceWrapping.AfterControlStatement) 1173 addUnwrappedLine(); 1174 parseBlock(/*MustBeDeclaration=*/false); 1175 } 1176 addUnwrappedLine(); 1177 return; 1178 case tok::objc_try: 1179 // This branch isn't strictly necessary (the kw_try case below would 1180 // do this too after the tok::at is parsed above). But be explicit. 1181 parseTryCatch(); 1182 return; 1183 default: 1184 break; 1185 } 1186 break; 1187 case tok::kw_enum: 1188 // Ignore if this is part of "template <enum ...". 1189 if (Previous && Previous->is(tok::less)) { 1190 nextToken(); 1191 break; 1192 } 1193 1194 // parseEnum falls through and does not yet add an unwrapped line as an 1195 // enum definition can start a structural element. 1196 if (!parseEnum()) 1197 break; 1198 // This only applies for C++. 1199 if (!Style.isCpp()) { 1200 addUnwrappedLine(); 1201 return; 1202 } 1203 break; 1204 case tok::kw_typedef: 1205 nextToken(); 1206 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1207 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1208 parseEnum(); 1209 break; 1210 case tok::kw_struct: 1211 case tok::kw_union: 1212 case tok::kw_class: 1213 // parseRecord falls through and does not yet add an unwrapped line as a 1214 // record declaration or definition can start a structural element. 1215 parseRecord(); 1216 // This does not apply for Java and JavaScript. 1217 if (Style.Language == FormatStyle::LK_Java || 1218 Style.Language == FormatStyle::LK_JavaScript) { 1219 if (FormatTok->is(tok::semi)) 1220 nextToken(); 1221 addUnwrappedLine(); 1222 return; 1223 } 1224 break; 1225 case tok::period: 1226 nextToken(); 1227 // In Java, classes have an implicit static member "class". 1228 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1229 FormatTok->is(tok::kw_class)) 1230 nextToken(); 1231 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1232 FormatTok->Tok.getIdentifierInfo()) 1233 // JavaScript only has pseudo keywords, all keywords are allowed to 1234 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1235 nextToken(); 1236 break; 1237 case tok::semi: 1238 nextToken(); 1239 addUnwrappedLine(); 1240 return; 1241 case tok::r_brace: 1242 addUnwrappedLine(); 1243 return; 1244 case tok::l_paren: 1245 parseParens(); 1246 break; 1247 case tok::kw_operator: 1248 nextToken(); 1249 if (FormatTok->isBinaryOperator()) 1250 nextToken(); 1251 break; 1252 case tok::caret: 1253 nextToken(); 1254 if (FormatTok->Tok.isAnyIdentifier() || 1255 FormatTok->isSimpleTypeSpecifier()) 1256 nextToken(); 1257 if (FormatTok->is(tok::l_paren)) 1258 parseParens(); 1259 if (FormatTok->is(tok::l_brace)) 1260 parseChildBlock(); 1261 break; 1262 case tok::l_brace: 1263 if (!tryToParseBracedList()) { 1264 // A block outside of parentheses must be the last part of a 1265 // structural element. 1266 // FIXME: Figure out cases where this is not true, and add projections 1267 // for them (the one we know is missing are lambdas). 1268 if (Style.BraceWrapping.AfterFunction) 1269 addUnwrappedLine(); 1270 FormatTok->Type = TT_FunctionLBrace; 1271 parseBlock(/*MustBeDeclaration=*/false); 1272 addUnwrappedLine(); 1273 return; 1274 } 1275 // Otherwise this was a braced init list, and the structural 1276 // element continues. 1277 break; 1278 case tok::kw_try: 1279 // We arrive here when parsing function-try blocks. 1280 if (Style.BraceWrapping.AfterFunction) 1281 addUnwrappedLine(); 1282 parseTryCatch(); 1283 return; 1284 case tok::identifier: { 1285 if (FormatTok->is(TT_MacroBlockEnd)) { 1286 addUnwrappedLine(); 1287 return; 1288 } 1289 1290 // Function declarations (as opposed to function expressions) are parsed 1291 // on their own unwrapped line by continuing this loop. Function 1292 // expressions (functions that are not on their own line) must not create 1293 // a new unwrapped line, so they are special cased below. 1294 size_t TokenCount = Line->Tokens.size(); 1295 if (Style.Language == FormatStyle::LK_JavaScript && 1296 FormatTok->is(Keywords.kw_function) && 1297 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1298 Keywords.kw_async)))) { 1299 tryToParseJSFunction(); 1300 break; 1301 } 1302 if ((Style.Language == FormatStyle::LK_JavaScript || 1303 Style.Language == FormatStyle::LK_Java) && 1304 FormatTok->is(Keywords.kw_interface)) { 1305 if (Style.Language == FormatStyle::LK_JavaScript) { 1306 // In JavaScript/TypeScript, "interface" can be used as a standalone 1307 // identifier, e.g. in `var interface = 1;`. If "interface" is 1308 // followed by another identifier, it is very like to be an actual 1309 // interface declaration. 1310 unsigned StoredPosition = Tokens->getPosition(); 1311 FormatToken *Next = Tokens->getNextToken(); 1312 FormatTok = Tokens->setPosition(StoredPosition); 1313 if (Next && !mustBeJSIdent(Keywords, Next)) { 1314 nextToken(); 1315 break; 1316 } 1317 } 1318 parseRecord(); 1319 addUnwrappedLine(); 1320 return; 1321 } 1322 1323 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1324 parseStatementMacro(); 1325 return; 1326 } 1327 1328 // See if the following token should start a new unwrapped line. 1329 StringRef Text = FormatTok->TokenText; 1330 nextToken(); 1331 if (Line->Tokens.size() == 1 && 1332 // JS doesn't have macros, and within classes colons indicate fields, 1333 // not labels. 1334 Style.Language != FormatStyle::LK_JavaScript) { 1335 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1336 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1337 parseLabel(); 1338 return; 1339 } 1340 // Recognize function-like macro usages without trailing semicolon as 1341 // well as free-standing macros like Q_OBJECT. 1342 bool FunctionLike = FormatTok->is(tok::l_paren); 1343 if (FunctionLike) 1344 parseParens(); 1345 1346 bool FollowedByNewline = 1347 CommentsBeforeNextToken.empty() 1348 ? FormatTok->NewlinesBefore > 0 1349 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1350 1351 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1352 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1353 addUnwrappedLine(); 1354 return; 1355 } 1356 } 1357 break; 1358 } 1359 case tok::equal: 1360 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1361 // TT_JsFatArrow. The always start an expression or a child block if 1362 // followed by a curly. 1363 if (FormatTok->is(TT_JsFatArrow)) { 1364 nextToken(); 1365 if (FormatTok->is(tok::l_brace)) 1366 parseChildBlock(); 1367 break; 1368 } 1369 1370 nextToken(); 1371 if (FormatTok->Tok.is(tok::l_brace)) { 1372 nextToken(); 1373 parseBracedList(); 1374 } else if (Style.Language == FormatStyle::LK_Proto && 1375 FormatTok->Tok.is(tok::less)) { 1376 nextToken(); 1377 parseBracedList(/*ContinueOnSemicolons=*/false, 1378 /*ClosingBraceKind=*/tok::greater); 1379 } 1380 break; 1381 case tok::l_square: 1382 parseSquare(); 1383 break; 1384 case tok::kw_new: 1385 parseNew(); 1386 break; 1387 default: 1388 nextToken(); 1389 break; 1390 } 1391 } while (!eof()); 1392 } 1393 1394 bool UnwrappedLineParser::tryToParseLambda() { 1395 if (!Style.isCpp()) { 1396 nextToken(); 1397 return false; 1398 } 1399 assert(FormatTok->is(tok::l_square)); 1400 FormatToken &LSquare = *FormatTok; 1401 if (!tryToParseLambdaIntroducer()) 1402 return false; 1403 1404 while (FormatTok->isNot(tok::l_brace)) { 1405 if (FormatTok->isSimpleTypeSpecifier()) { 1406 nextToken(); 1407 continue; 1408 } 1409 switch (FormatTok->Tok.getKind()) { 1410 case tok::l_brace: 1411 break; 1412 case tok::l_paren: 1413 parseParens(); 1414 break; 1415 case tok::amp: 1416 case tok::star: 1417 case tok::kw_const: 1418 case tok::comma: 1419 case tok::less: 1420 case tok::greater: 1421 case tok::identifier: 1422 case tok::numeric_constant: 1423 case tok::coloncolon: 1424 case tok::kw_mutable: 1425 case tok::kw_noexcept: 1426 nextToken(); 1427 break; 1428 case tok::arrow: 1429 FormatTok->Type = TT_LambdaArrow; 1430 nextToken(); 1431 break; 1432 default: 1433 return true; 1434 } 1435 } 1436 LSquare.Type = TT_LambdaLSquare; 1437 parseChildBlock(); 1438 return true; 1439 } 1440 1441 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1442 const FormatToken *Previous = FormatTok->Previous; 1443 if (Previous && 1444 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1445 tok::kw_delete, tok::l_square) || 1446 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1447 Previous->isSimpleTypeSpecifier())) { 1448 nextToken(); 1449 return false; 1450 } 1451 nextToken(); 1452 if (FormatTok->is(tok::l_square)) { 1453 return false; 1454 } 1455 parseSquare(/*LambdaIntroducer=*/true); 1456 return true; 1457 } 1458 1459 void UnwrappedLineParser::tryToParseJSFunction() { 1460 assert(FormatTok->is(Keywords.kw_function) || 1461 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1462 if (FormatTok->is(Keywords.kw_async)) 1463 nextToken(); 1464 // Consume "function". 1465 nextToken(); 1466 1467 // Consume * (generator function). Treat it like C++'s overloaded operators. 1468 if (FormatTok->is(tok::star)) { 1469 FormatTok->Type = TT_OverloadedOperator; 1470 nextToken(); 1471 } 1472 1473 // Consume function name. 1474 if (FormatTok->is(tok::identifier)) 1475 nextToken(); 1476 1477 if (FormatTok->isNot(tok::l_paren)) 1478 return; 1479 1480 // Parse formal parameter list. 1481 parseParens(); 1482 1483 if (FormatTok->is(tok::colon)) { 1484 // Parse a type definition. 1485 nextToken(); 1486 1487 // Eat the type declaration. For braced inline object types, balance braces, 1488 // otherwise just parse until finding an l_brace for the function body. 1489 if (FormatTok->is(tok::l_brace)) 1490 tryToParseBracedList(); 1491 else 1492 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1493 nextToken(); 1494 } 1495 1496 if (FormatTok->is(tok::semi)) 1497 return; 1498 1499 parseChildBlock(); 1500 } 1501 1502 bool UnwrappedLineParser::tryToParseBracedList() { 1503 if (FormatTok->BlockKind == BK_Unknown) 1504 calculateBraceTypes(); 1505 assert(FormatTok->BlockKind != BK_Unknown); 1506 if (FormatTok->BlockKind == BK_Block) 1507 return false; 1508 nextToken(); 1509 parseBracedList(); 1510 return true; 1511 } 1512 1513 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1514 tok::TokenKind ClosingBraceKind) { 1515 bool HasError = false; 1516 1517 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1518 // replace this by using parseAssigmentExpression() inside. 1519 do { 1520 if (Style.Language == FormatStyle::LK_JavaScript) { 1521 if (FormatTok->is(Keywords.kw_function) || 1522 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1523 tryToParseJSFunction(); 1524 continue; 1525 } 1526 if (FormatTok->is(TT_JsFatArrow)) { 1527 nextToken(); 1528 // Fat arrows can be followed by simple expressions or by child blocks 1529 // in curly braces. 1530 if (FormatTok->is(tok::l_brace)) { 1531 parseChildBlock(); 1532 continue; 1533 } 1534 } 1535 if (FormatTok->is(tok::l_brace)) { 1536 // Could be a method inside of a braced list `{a() { return 1; }}`. 1537 if (tryToParseBracedList()) 1538 continue; 1539 parseChildBlock(); 1540 } 1541 } 1542 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1543 nextToken(); 1544 return !HasError; 1545 } 1546 switch (FormatTok->Tok.getKind()) { 1547 case tok::caret: 1548 nextToken(); 1549 if (FormatTok->is(tok::l_brace)) { 1550 parseChildBlock(); 1551 } 1552 break; 1553 case tok::l_square: 1554 tryToParseLambda(); 1555 break; 1556 case tok::l_paren: 1557 parseParens(); 1558 // JavaScript can just have free standing methods and getters/setters in 1559 // object literals. Detect them by a "{" following ")". 1560 if (Style.Language == FormatStyle::LK_JavaScript) { 1561 if (FormatTok->is(tok::l_brace)) 1562 parseChildBlock(); 1563 break; 1564 } 1565 break; 1566 case tok::l_brace: 1567 // Assume there are no blocks inside a braced init list apart 1568 // from the ones we explicitly parse out (like lambdas). 1569 FormatTok->BlockKind = BK_BracedInit; 1570 nextToken(); 1571 parseBracedList(); 1572 break; 1573 case tok::less: 1574 if (Style.Language == FormatStyle::LK_Proto) { 1575 nextToken(); 1576 parseBracedList(/*ContinueOnSemicolons=*/false, 1577 /*ClosingBraceKind=*/tok::greater); 1578 } else { 1579 nextToken(); 1580 } 1581 break; 1582 case tok::semi: 1583 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1584 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1585 // used for error recovery if we have otherwise determined that this is 1586 // a braced list. 1587 if (Style.Language == FormatStyle::LK_JavaScript) { 1588 nextToken(); 1589 break; 1590 } 1591 HasError = true; 1592 if (!ContinueOnSemicolons) 1593 return !HasError; 1594 nextToken(); 1595 break; 1596 case tok::comma: 1597 nextToken(); 1598 break; 1599 default: 1600 nextToken(); 1601 break; 1602 } 1603 } while (!eof()); 1604 return false; 1605 } 1606 1607 void UnwrappedLineParser::parseParens() { 1608 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1609 nextToken(); 1610 do { 1611 switch (FormatTok->Tok.getKind()) { 1612 case tok::l_paren: 1613 parseParens(); 1614 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1615 parseChildBlock(); 1616 break; 1617 case tok::r_paren: 1618 nextToken(); 1619 return; 1620 case tok::r_brace: 1621 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1622 return; 1623 case tok::l_square: 1624 tryToParseLambda(); 1625 break; 1626 case tok::l_brace: 1627 if (!tryToParseBracedList()) 1628 parseChildBlock(); 1629 break; 1630 case tok::at: 1631 nextToken(); 1632 if (FormatTok->Tok.is(tok::l_brace)) { 1633 nextToken(); 1634 parseBracedList(); 1635 } 1636 break; 1637 case tok::kw_class: 1638 if (Style.Language == FormatStyle::LK_JavaScript) 1639 parseRecord(/*ParseAsExpr=*/true); 1640 else 1641 nextToken(); 1642 break; 1643 case tok::identifier: 1644 if (Style.Language == FormatStyle::LK_JavaScript && 1645 (FormatTok->is(Keywords.kw_function) || 1646 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1647 tryToParseJSFunction(); 1648 else 1649 nextToken(); 1650 break; 1651 default: 1652 nextToken(); 1653 break; 1654 } 1655 } while (!eof()); 1656 } 1657 1658 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1659 if (!LambdaIntroducer) { 1660 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1661 if (tryToParseLambda()) 1662 return; 1663 } 1664 do { 1665 switch (FormatTok->Tok.getKind()) { 1666 case tok::l_paren: 1667 parseParens(); 1668 break; 1669 case tok::r_square: 1670 nextToken(); 1671 return; 1672 case tok::r_brace: 1673 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1674 return; 1675 case tok::l_square: 1676 parseSquare(); 1677 break; 1678 case tok::l_brace: { 1679 if (!tryToParseBracedList()) 1680 parseChildBlock(); 1681 break; 1682 } 1683 case tok::at: 1684 nextToken(); 1685 if (FormatTok->Tok.is(tok::l_brace)) { 1686 nextToken(); 1687 parseBracedList(); 1688 } 1689 break; 1690 default: 1691 nextToken(); 1692 break; 1693 } 1694 } while (!eof()); 1695 } 1696 1697 void UnwrappedLineParser::parseIfThenElse() { 1698 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1699 nextToken(); 1700 if (FormatTok->Tok.is(tok::kw_constexpr)) 1701 nextToken(); 1702 if (FormatTok->Tok.is(tok::l_paren)) 1703 parseParens(); 1704 bool NeedsUnwrappedLine = false; 1705 if (FormatTok->Tok.is(tok::l_brace)) { 1706 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1707 parseBlock(/*MustBeDeclaration=*/false); 1708 if (Style.BraceWrapping.BeforeElse) 1709 addUnwrappedLine(); 1710 else 1711 NeedsUnwrappedLine = true; 1712 } else { 1713 addUnwrappedLine(); 1714 ++Line->Level; 1715 parseStructuralElement(); 1716 --Line->Level; 1717 } 1718 if (FormatTok->Tok.is(tok::kw_else)) { 1719 nextToken(); 1720 if (FormatTok->Tok.is(tok::l_brace)) { 1721 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1722 parseBlock(/*MustBeDeclaration=*/false); 1723 addUnwrappedLine(); 1724 } else if (FormatTok->Tok.is(tok::kw_if)) { 1725 parseIfThenElse(); 1726 } else { 1727 addUnwrappedLine(); 1728 ++Line->Level; 1729 parseStructuralElement(); 1730 if (FormatTok->is(tok::eof)) 1731 addUnwrappedLine(); 1732 --Line->Level; 1733 } 1734 } else if (NeedsUnwrappedLine) { 1735 addUnwrappedLine(); 1736 } 1737 } 1738 1739 void UnwrappedLineParser::parseTryCatch() { 1740 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1741 nextToken(); 1742 bool NeedsUnwrappedLine = false; 1743 if (FormatTok->is(tok::colon)) { 1744 // We are in a function try block, what comes is an initializer list. 1745 nextToken(); 1746 while (FormatTok->is(tok::identifier)) { 1747 nextToken(); 1748 if (FormatTok->is(tok::l_paren)) 1749 parseParens(); 1750 if (FormatTok->is(tok::comma)) 1751 nextToken(); 1752 } 1753 } 1754 // Parse try with resource. 1755 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1756 parseParens(); 1757 } 1758 if (FormatTok->is(tok::l_brace)) { 1759 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1760 parseBlock(/*MustBeDeclaration=*/false); 1761 if (Style.BraceWrapping.BeforeCatch) { 1762 addUnwrappedLine(); 1763 } else { 1764 NeedsUnwrappedLine = true; 1765 } 1766 } else if (!FormatTok->is(tok::kw_catch)) { 1767 // The C++ standard requires a compound-statement after a try. 1768 // If there's none, we try to assume there's a structuralElement 1769 // and try to continue. 1770 addUnwrappedLine(); 1771 ++Line->Level; 1772 parseStructuralElement(); 1773 --Line->Level; 1774 } 1775 while (1) { 1776 if (FormatTok->is(tok::at)) 1777 nextToken(); 1778 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1779 tok::kw___finally) || 1780 ((Style.Language == FormatStyle::LK_Java || 1781 Style.Language == FormatStyle::LK_JavaScript) && 1782 FormatTok->is(Keywords.kw_finally)) || 1783 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1784 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1785 break; 1786 nextToken(); 1787 while (FormatTok->isNot(tok::l_brace)) { 1788 if (FormatTok->is(tok::l_paren)) { 1789 parseParens(); 1790 continue; 1791 } 1792 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1793 return; 1794 nextToken(); 1795 } 1796 NeedsUnwrappedLine = false; 1797 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1798 parseBlock(/*MustBeDeclaration=*/false); 1799 if (Style.BraceWrapping.BeforeCatch) 1800 addUnwrappedLine(); 1801 else 1802 NeedsUnwrappedLine = true; 1803 } 1804 if (NeedsUnwrappedLine) 1805 addUnwrappedLine(); 1806 } 1807 1808 void UnwrappedLineParser::parseNamespace() { 1809 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1810 1811 const FormatToken &InitialToken = *FormatTok; 1812 nextToken(); 1813 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1814 nextToken(); 1815 if (FormatTok->Tok.is(tok::l_brace)) { 1816 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1817 addUnwrappedLine(); 1818 1819 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1820 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1821 DeclarationScopeStack.size() > 1); 1822 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1823 // Munch the semicolon after a namespace. This is more common than one would 1824 // think. Puttin the semicolon into its own line is very ugly. 1825 if (FormatTok->Tok.is(tok::semi)) 1826 nextToken(); 1827 addUnwrappedLine(); 1828 } 1829 // FIXME: Add error handling. 1830 } 1831 1832 void UnwrappedLineParser::parseNew() { 1833 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1834 nextToken(); 1835 if (Style.Language != FormatStyle::LK_Java) 1836 return; 1837 1838 // In Java, we can parse everything up to the parens, which aren't optional. 1839 do { 1840 // There should not be a ;, { or } before the new's open paren. 1841 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1842 return; 1843 1844 // Consume the parens. 1845 if (FormatTok->is(tok::l_paren)) { 1846 parseParens(); 1847 1848 // If there is a class body of an anonymous class, consume that as child. 1849 if (FormatTok->is(tok::l_brace)) 1850 parseChildBlock(); 1851 return; 1852 } 1853 nextToken(); 1854 } while (!eof()); 1855 } 1856 1857 void UnwrappedLineParser::parseForOrWhileLoop() { 1858 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1859 "'for', 'while' or foreach macro expected"); 1860 nextToken(); 1861 // JS' for await ( ... 1862 if (Style.Language == FormatStyle::LK_JavaScript && 1863 FormatTok->is(Keywords.kw_await)) 1864 nextToken(); 1865 if (FormatTok->Tok.is(tok::l_paren)) 1866 parseParens(); 1867 if (FormatTok->Tok.is(tok::l_brace)) { 1868 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1869 parseBlock(/*MustBeDeclaration=*/false); 1870 addUnwrappedLine(); 1871 } else { 1872 addUnwrappedLine(); 1873 ++Line->Level; 1874 parseStructuralElement(); 1875 --Line->Level; 1876 } 1877 } 1878 1879 void UnwrappedLineParser::parseDoWhile() { 1880 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1881 nextToken(); 1882 if (FormatTok->Tok.is(tok::l_brace)) { 1883 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1884 parseBlock(/*MustBeDeclaration=*/false); 1885 if (Style.BraceWrapping.IndentBraces) 1886 addUnwrappedLine(); 1887 } else { 1888 addUnwrappedLine(); 1889 ++Line->Level; 1890 parseStructuralElement(); 1891 --Line->Level; 1892 } 1893 1894 // FIXME: Add error handling. 1895 if (!FormatTok->Tok.is(tok::kw_while)) { 1896 addUnwrappedLine(); 1897 return; 1898 } 1899 1900 nextToken(); 1901 parseStructuralElement(); 1902 } 1903 1904 void UnwrappedLineParser::parseLabel() { 1905 nextToken(); 1906 unsigned OldLineLevel = Line->Level; 1907 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1908 --Line->Level; 1909 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1910 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1911 parseBlock(/*MustBeDeclaration=*/false); 1912 if (FormatTok->Tok.is(tok::kw_break)) { 1913 if (Style.BraceWrapping.AfterControlStatement) 1914 addUnwrappedLine(); 1915 parseStructuralElement(); 1916 } 1917 addUnwrappedLine(); 1918 } else { 1919 if (FormatTok->is(tok::semi)) 1920 nextToken(); 1921 addUnwrappedLine(); 1922 } 1923 Line->Level = OldLineLevel; 1924 if (FormatTok->isNot(tok::l_brace)) { 1925 parseStructuralElement(); 1926 addUnwrappedLine(); 1927 } 1928 } 1929 1930 void UnwrappedLineParser::parseCaseLabel() { 1931 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1932 // FIXME: fix handling of complex expressions here. 1933 do { 1934 nextToken(); 1935 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1936 parseLabel(); 1937 } 1938 1939 void UnwrappedLineParser::parseSwitch() { 1940 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1941 nextToken(); 1942 if (FormatTok->Tok.is(tok::l_paren)) 1943 parseParens(); 1944 if (FormatTok->Tok.is(tok::l_brace)) { 1945 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1946 parseBlock(/*MustBeDeclaration=*/false); 1947 addUnwrappedLine(); 1948 } else { 1949 addUnwrappedLine(); 1950 ++Line->Level; 1951 parseStructuralElement(); 1952 --Line->Level; 1953 } 1954 } 1955 1956 void UnwrappedLineParser::parseAccessSpecifier() { 1957 nextToken(); 1958 // Understand Qt's slots. 1959 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1960 nextToken(); 1961 // Otherwise, we don't know what it is, and we'd better keep the next token. 1962 if (FormatTok->Tok.is(tok::colon)) 1963 nextToken(); 1964 addUnwrappedLine(); 1965 } 1966 1967 bool UnwrappedLineParser::parseEnum() { 1968 // Won't be 'enum' for NS_ENUMs. 1969 if (FormatTok->Tok.is(tok::kw_enum)) 1970 nextToken(); 1971 1972 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1973 // declarations. An "enum" keyword followed by a colon would be a syntax 1974 // error and thus assume it is just an identifier. 1975 if (Style.Language == FormatStyle::LK_JavaScript && 1976 FormatTok->isOneOf(tok::colon, tok::question)) 1977 return false; 1978 1979 // Eat up enum class ... 1980 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1981 nextToken(); 1982 1983 while (FormatTok->Tok.getIdentifierInfo() || 1984 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1985 tok::greater, tok::comma, tok::question)) { 1986 nextToken(); 1987 // We can have macros or attributes in between 'enum' and the enum name. 1988 if (FormatTok->is(tok::l_paren)) 1989 parseParens(); 1990 if (FormatTok->is(tok::identifier)) { 1991 nextToken(); 1992 // If there are two identifiers in a row, this is likely an elaborate 1993 // return type. In Java, this can be "implements", etc. 1994 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1995 return false; 1996 } 1997 } 1998 1999 // Just a declaration or something is wrong. 2000 if (FormatTok->isNot(tok::l_brace)) 2001 return true; 2002 FormatTok->BlockKind = BK_Block; 2003 2004 if (Style.Language == FormatStyle::LK_Java) { 2005 // Java enums are different. 2006 parseJavaEnumBody(); 2007 return true; 2008 } 2009 if (Style.Language == FormatStyle::LK_Proto) { 2010 parseBlock(/*MustBeDeclaration=*/true); 2011 return true; 2012 } 2013 2014 // Parse enum body. 2015 nextToken(); 2016 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2017 if (HasError) { 2018 if (FormatTok->is(tok::semi)) 2019 nextToken(); 2020 addUnwrappedLine(); 2021 } 2022 return true; 2023 2024 // There is no addUnwrappedLine() here so that we fall through to parsing a 2025 // structural element afterwards. Thus, in "enum A {} n, m;", 2026 // "} n, m;" will end up in one unwrapped line. 2027 } 2028 2029 void UnwrappedLineParser::parseJavaEnumBody() { 2030 // Determine whether the enum is simple, i.e. does not have a semicolon or 2031 // constants with class bodies. Simple enums can be formatted like braced 2032 // lists, contracted to a single line, etc. 2033 unsigned StoredPosition = Tokens->getPosition(); 2034 bool IsSimple = true; 2035 FormatToken *Tok = Tokens->getNextToken(); 2036 while (Tok) { 2037 if (Tok->is(tok::r_brace)) 2038 break; 2039 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2040 IsSimple = false; 2041 break; 2042 } 2043 // FIXME: This will also mark enums with braces in the arguments to enum 2044 // constants as "not simple". This is probably fine in practice, though. 2045 Tok = Tokens->getNextToken(); 2046 } 2047 FormatTok = Tokens->setPosition(StoredPosition); 2048 2049 if (IsSimple) { 2050 nextToken(); 2051 parseBracedList(); 2052 addUnwrappedLine(); 2053 return; 2054 } 2055 2056 // Parse the body of a more complex enum. 2057 // First add a line for everything up to the "{". 2058 nextToken(); 2059 addUnwrappedLine(); 2060 ++Line->Level; 2061 2062 // Parse the enum constants. 2063 while (FormatTok) { 2064 if (FormatTok->is(tok::l_brace)) { 2065 // Parse the constant's class body. 2066 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2067 /*MunchSemi=*/false); 2068 } else if (FormatTok->is(tok::l_paren)) { 2069 parseParens(); 2070 } else if (FormatTok->is(tok::comma)) { 2071 nextToken(); 2072 addUnwrappedLine(); 2073 } else if (FormatTok->is(tok::semi)) { 2074 nextToken(); 2075 addUnwrappedLine(); 2076 break; 2077 } else if (FormatTok->is(tok::r_brace)) { 2078 addUnwrappedLine(); 2079 break; 2080 } else { 2081 nextToken(); 2082 } 2083 } 2084 2085 // Parse the class body after the enum's ";" if any. 2086 parseLevel(/*HasOpeningBrace=*/true); 2087 nextToken(); 2088 --Line->Level; 2089 addUnwrappedLine(); 2090 } 2091 2092 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2093 const FormatToken &InitialToken = *FormatTok; 2094 nextToken(); 2095 2096 // The actual identifier can be a nested name specifier, and in macros 2097 // it is often token-pasted. 2098 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2099 tok::kw___attribute, tok::kw___declspec, 2100 tok::kw_alignas) || 2101 ((Style.Language == FormatStyle::LK_Java || 2102 Style.Language == FormatStyle::LK_JavaScript) && 2103 FormatTok->isOneOf(tok::period, tok::comma))) { 2104 if (Style.Language == FormatStyle::LK_JavaScript && 2105 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2106 // JavaScript/TypeScript supports inline object types in 2107 // extends/implements positions: 2108 // class Foo implements {bar: number} { } 2109 nextToken(); 2110 if (FormatTok->is(tok::l_brace)) { 2111 tryToParseBracedList(); 2112 continue; 2113 } 2114 } 2115 bool IsNonMacroIdentifier = 2116 FormatTok->is(tok::identifier) && 2117 FormatTok->TokenText != FormatTok->TokenText.upper(); 2118 nextToken(); 2119 // We can have macros or attributes in between 'class' and the class name. 2120 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2121 parseParens(); 2122 } 2123 2124 // Note that parsing away template declarations here leads to incorrectly 2125 // accepting function declarations as record declarations. 2126 // In general, we cannot solve this problem. Consider: 2127 // class A<int> B() {} 2128 // which can be a function definition or a class definition when B() is a 2129 // macro. If we find enough real-world cases where this is a problem, we 2130 // can parse for the 'template' keyword in the beginning of the statement, 2131 // and thus rule out the record production in case there is no template 2132 // (this would still leave us with an ambiguity between template function 2133 // and class declarations). 2134 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2135 while (!eof()) { 2136 if (FormatTok->is(tok::l_brace)) { 2137 calculateBraceTypes(/*ExpectClassBody=*/true); 2138 if (!tryToParseBracedList()) 2139 break; 2140 } 2141 if (FormatTok->Tok.is(tok::semi)) 2142 return; 2143 nextToken(); 2144 } 2145 } 2146 if (FormatTok->Tok.is(tok::l_brace)) { 2147 if (ParseAsExpr) { 2148 parseChildBlock(); 2149 } else { 2150 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2151 addUnwrappedLine(); 2152 2153 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2154 /*MunchSemi=*/false); 2155 } 2156 } 2157 // There is no addUnwrappedLine() here so that we fall through to parsing a 2158 // structural element afterwards. Thus, in "class A {} n, m;", 2159 // "} n, m;" will end up in one unwrapped line. 2160 } 2161 2162 void UnwrappedLineParser::parseObjCMethod() { 2163 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2164 "'(' or identifier expected."); 2165 do { 2166 if (FormatTok->Tok.is(tok::semi)) { 2167 nextToken(); 2168 addUnwrappedLine(); 2169 return; 2170 } else if (FormatTok->Tok.is(tok::l_brace)) { 2171 if (Style.BraceWrapping.AfterFunction) 2172 addUnwrappedLine(); 2173 parseBlock(/*MustBeDeclaration=*/false); 2174 addUnwrappedLine(); 2175 return; 2176 } else { 2177 nextToken(); 2178 } 2179 } while (!eof()); 2180 } 2181 2182 void UnwrappedLineParser::parseObjCProtocolList() { 2183 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2184 do { 2185 nextToken(); 2186 // Early exit in case someone forgot a close angle. 2187 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2188 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2189 return; 2190 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2191 nextToken(); // Skip '>'. 2192 } 2193 2194 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2195 do { 2196 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2197 nextToken(); 2198 addUnwrappedLine(); 2199 break; 2200 } 2201 if (FormatTok->is(tok::l_brace)) { 2202 parseBlock(/*MustBeDeclaration=*/false); 2203 // In ObjC interfaces, nothing should be following the "}". 2204 addUnwrappedLine(); 2205 } else if (FormatTok->is(tok::r_brace)) { 2206 // Ignore stray "}". parseStructuralElement doesn't consume them. 2207 nextToken(); 2208 addUnwrappedLine(); 2209 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2210 nextToken(); 2211 parseObjCMethod(); 2212 } else { 2213 parseStructuralElement(); 2214 } 2215 } while (!eof()); 2216 } 2217 2218 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2219 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2220 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2221 nextToken(); 2222 nextToken(); // interface name 2223 2224 // @interface can be followed by a lightweight generic 2225 // specialization list, then either a base class or a category. 2226 if (FormatTok->Tok.is(tok::less)) { 2227 // Unlike protocol lists, generic parameterizations support 2228 // nested angles: 2229 // 2230 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2231 // NSObject <NSCopying, NSSecureCoding> 2232 // 2233 // so we need to count how many open angles we have left. 2234 unsigned NumOpenAngles = 1; 2235 do { 2236 nextToken(); 2237 // Early exit in case someone forgot a close angle. 2238 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2239 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2240 break; 2241 if (FormatTok->Tok.is(tok::less)) 2242 ++NumOpenAngles; 2243 else if (FormatTok->Tok.is(tok::greater)) { 2244 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2245 --NumOpenAngles; 2246 } 2247 } while (!eof() && NumOpenAngles != 0); 2248 nextToken(); // Skip '>'. 2249 } 2250 if (FormatTok->Tok.is(tok::colon)) { 2251 nextToken(); 2252 nextToken(); // base class name 2253 } else if (FormatTok->Tok.is(tok::l_paren)) 2254 // Skip category, if present. 2255 parseParens(); 2256 2257 if (FormatTok->Tok.is(tok::less)) 2258 parseObjCProtocolList(); 2259 2260 if (FormatTok->Tok.is(tok::l_brace)) { 2261 if (Style.BraceWrapping.AfterObjCDeclaration) 2262 addUnwrappedLine(); 2263 parseBlock(/*MustBeDeclaration=*/true); 2264 } 2265 2266 // With instance variables, this puts '}' on its own line. Without instance 2267 // variables, this ends the @interface line. 2268 addUnwrappedLine(); 2269 2270 parseObjCUntilAtEnd(); 2271 } 2272 2273 // Returns true for the declaration/definition form of @protocol, 2274 // false for the expression form. 2275 bool UnwrappedLineParser::parseObjCProtocol() { 2276 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2277 nextToken(); 2278 2279 if (FormatTok->is(tok::l_paren)) 2280 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2281 return false; 2282 2283 // The definition/declaration form, 2284 // @protocol Foo 2285 // - (int)someMethod; 2286 // @end 2287 2288 nextToken(); // protocol name 2289 2290 if (FormatTok->Tok.is(tok::less)) 2291 parseObjCProtocolList(); 2292 2293 // Check for protocol declaration. 2294 if (FormatTok->Tok.is(tok::semi)) { 2295 nextToken(); 2296 addUnwrappedLine(); 2297 return true; 2298 } 2299 2300 addUnwrappedLine(); 2301 parseObjCUntilAtEnd(); 2302 return true; 2303 } 2304 2305 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2306 bool IsImport = FormatTok->is(Keywords.kw_import); 2307 assert(IsImport || FormatTok->is(tok::kw_export)); 2308 nextToken(); 2309 2310 // Consume the "default" in "export default class/function". 2311 if (FormatTok->is(tok::kw_default)) 2312 nextToken(); 2313 2314 // Consume "async function", "function" and "default function", so that these 2315 // get parsed as free-standing JS functions, i.e. do not require a trailing 2316 // semicolon. 2317 if (FormatTok->is(Keywords.kw_async)) 2318 nextToken(); 2319 if (FormatTok->is(Keywords.kw_function)) { 2320 nextToken(); 2321 return; 2322 } 2323 2324 // For imports, `export *`, `export {...}`, consume the rest of the line up 2325 // to the terminating `;`. For everything else, just return and continue 2326 // parsing the structural element, i.e. the declaration or expression for 2327 // `export default`. 2328 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2329 !FormatTok->isStringLiteral()) 2330 return; 2331 2332 while (!eof()) { 2333 if (FormatTok->is(tok::semi)) 2334 return; 2335 if (Line->Tokens.empty()) { 2336 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2337 // import statement should terminate. 2338 return; 2339 } 2340 if (FormatTok->is(tok::l_brace)) { 2341 FormatTok->BlockKind = BK_Block; 2342 nextToken(); 2343 parseBracedList(); 2344 } else { 2345 nextToken(); 2346 } 2347 } 2348 } 2349 2350 void UnwrappedLineParser::parseStatementMacro() 2351 { 2352 nextToken(); 2353 if (FormatTok->is(tok::l_paren)) 2354 parseParens(); 2355 if (FormatTok->is(tok::semi)) 2356 nextToken(); 2357 addUnwrappedLine(); 2358 } 2359 2360 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2361 StringRef Prefix = "") { 2362 llvm::dbgs() << Prefix << "Line(" << Line.Level 2363 << ", FSC=" << Line.FirstStartColumn << ")" 2364 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2365 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2366 E = Line.Tokens.end(); 2367 I != E; ++I) { 2368 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2369 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2370 << "] "; 2371 } 2372 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2373 E = Line.Tokens.end(); 2374 I != E; ++I) { 2375 const UnwrappedLineNode &Node = *I; 2376 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2377 I = Node.Children.begin(), 2378 E = Node.Children.end(); 2379 I != E; ++I) { 2380 printDebugInfo(*I, "\nChild: "); 2381 } 2382 } 2383 llvm::dbgs() << "\n"; 2384 } 2385 2386 void UnwrappedLineParser::addUnwrappedLine() { 2387 if (Line->Tokens.empty()) 2388 return; 2389 LLVM_DEBUG({ 2390 if (CurrentLines == &Lines) 2391 printDebugInfo(*Line); 2392 }); 2393 CurrentLines->push_back(std::move(*Line)); 2394 Line->Tokens.clear(); 2395 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2396 Line->FirstStartColumn = 0; 2397 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2398 CurrentLines->append( 2399 std::make_move_iterator(PreprocessorDirectives.begin()), 2400 std::make_move_iterator(PreprocessorDirectives.end())); 2401 PreprocessorDirectives.clear(); 2402 } 2403 // Disconnect the current token from the last token on the previous line. 2404 FormatTok->Previous = nullptr; 2405 } 2406 2407 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2408 2409 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2410 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2411 FormatTok.NewlinesBefore > 0; 2412 } 2413 2414 // Checks if \p FormatTok is a line comment that continues the line comment 2415 // section on \p Line. 2416 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2417 const UnwrappedLine &Line, 2418 llvm::Regex &CommentPragmasRegex) { 2419 if (Line.Tokens.empty()) 2420 return false; 2421 2422 StringRef IndentContent = FormatTok.TokenText; 2423 if (FormatTok.TokenText.startswith("//") || 2424 FormatTok.TokenText.startswith("/*")) 2425 IndentContent = FormatTok.TokenText.substr(2); 2426 if (CommentPragmasRegex.match(IndentContent)) 2427 return false; 2428 2429 // If Line starts with a line comment, then FormatTok continues the comment 2430 // section if its original column is greater or equal to the original start 2431 // column of the line. 2432 // 2433 // Define the min column token of a line as follows: if a line ends in '{' or 2434 // contains a '{' followed by a line comment, then the min column token is 2435 // that '{'. Otherwise, the min column token of the line is the first token of 2436 // the line. 2437 // 2438 // If Line starts with a token other than a line comment, then FormatTok 2439 // continues the comment section if its original column is greater than the 2440 // original start column of the min column token of the line. 2441 // 2442 // For example, the second line comment continues the first in these cases: 2443 // 2444 // // first line 2445 // // second line 2446 // 2447 // and: 2448 // 2449 // // first line 2450 // // second line 2451 // 2452 // and: 2453 // 2454 // int i; // first line 2455 // // second line 2456 // 2457 // and: 2458 // 2459 // do { // first line 2460 // // second line 2461 // int i; 2462 // } while (true); 2463 // 2464 // and: 2465 // 2466 // enum { 2467 // a, // first line 2468 // // second line 2469 // b 2470 // }; 2471 // 2472 // The second line comment doesn't continue the first in these cases: 2473 // 2474 // // first line 2475 // // second line 2476 // 2477 // and: 2478 // 2479 // int i; // first line 2480 // // second line 2481 // 2482 // and: 2483 // 2484 // do { // first line 2485 // // second line 2486 // int i; 2487 // } while (true); 2488 // 2489 // and: 2490 // 2491 // enum { 2492 // a, // first line 2493 // // second line 2494 // }; 2495 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2496 2497 // Scan for '{//'. If found, use the column of '{' as a min column for line 2498 // comment section continuation. 2499 const FormatToken *PreviousToken = nullptr; 2500 for (const UnwrappedLineNode &Node : Line.Tokens) { 2501 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2502 isLineComment(*Node.Tok)) { 2503 MinColumnToken = PreviousToken; 2504 break; 2505 } 2506 PreviousToken = Node.Tok; 2507 2508 // Grab the last newline preceding a token in this unwrapped line. 2509 if (Node.Tok->NewlinesBefore > 0) { 2510 MinColumnToken = Node.Tok; 2511 } 2512 } 2513 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2514 MinColumnToken = PreviousToken; 2515 } 2516 2517 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2518 MinColumnToken); 2519 } 2520 2521 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2522 bool JustComments = Line->Tokens.empty(); 2523 for (SmallVectorImpl<FormatToken *>::const_iterator 2524 I = CommentsBeforeNextToken.begin(), 2525 E = CommentsBeforeNextToken.end(); 2526 I != E; ++I) { 2527 // Line comments that belong to the same line comment section are put on the 2528 // same line since later we might want to reflow content between them. 2529 // Additional fine-grained breaking of line comment sections is controlled 2530 // by the class BreakableLineCommentSection in case it is desirable to keep 2531 // several line comment sections in the same unwrapped line. 2532 // 2533 // FIXME: Consider putting separate line comment sections as children to the 2534 // unwrapped line instead. 2535 (*I)->ContinuesLineCommentSection = 2536 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2537 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2538 addUnwrappedLine(); 2539 pushToken(*I); 2540 } 2541 if (NewlineBeforeNext && JustComments) 2542 addUnwrappedLine(); 2543 CommentsBeforeNextToken.clear(); 2544 } 2545 2546 void UnwrappedLineParser::nextToken(int LevelDifference) { 2547 if (eof()) 2548 return; 2549 flushComments(isOnNewLine(*FormatTok)); 2550 pushToken(FormatTok); 2551 FormatToken *Previous = FormatTok; 2552 if (Style.Language != FormatStyle::LK_JavaScript) 2553 readToken(LevelDifference); 2554 else 2555 readTokenWithJavaScriptASI(); 2556 FormatTok->Previous = Previous; 2557 } 2558 2559 void UnwrappedLineParser::distributeComments( 2560 const SmallVectorImpl<FormatToken *> &Comments, 2561 const FormatToken *NextTok) { 2562 // Whether or not a line comment token continues a line is controlled by 2563 // the method continuesLineCommentSection, with the following caveat: 2564 // 2565 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2566 // that each comment line from the trail is aligned with the next token, if 2567 // the next token exists. If a trail exists, the beginning of the maximal 2568 // trail is marked as a start of a new comment section. 2569 // 2570 // For example in this code: 2571 // 2572 // int a; // line about a 2573 // // line 1 about b 2574 // // line 2 about b 2575 // int b; 2576 // 2577 // the two lines about b form a maximal trail, so there are two sections, the 2578 // first one consisting of the single comment "// line about a" and the 2579 // second one consisting of the next two comments. 2580 if (Comments.empty()) 2581 return; 2582 bool ShouldPushCommentsInCurrentLine = true; 2583 bool HasTrailAlignedWithNextToken = false; 2584 unsigned StartOfTrailAlignedWithNextToken = 0; 2585 if (NextTok) { 2586 // We are skipping the first element intentionally. 2587 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2588 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2589 HasTrailAlignedWithNextToken = true; 2590 StartOfTrailAlignedWithNextToken = i; 2591 } 2592 } 2593 } 2594 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2595 FormatToken *FormatTok = Comments[i]; 2596 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2597 FormatTok->ContinuesLineCommentSection = false; 2598 } else { 2599 FormatTok->ContinuesLineCommentSection = 2600 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2601 } 2602 if (!FormatTok->ContinuesLineCommentSection && 2603 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2604 ShouldPushCommentsInCurrentLine = false; 2605 } 2606 if (ShouldPushCommentsInCurrentLine) { 2607 pushToken(FormatTok); 2608 } else { 2609 CommentsBeforeNextToken.push_back(FormatTok); 2610 } 2611 } 2612 } 2613 2614 void UnwrappedLineParser::readToken(int LevelDifference) { 2615 SmallVector<FormatToken *, 1> Comments; 2616 do { 2617 FormatTok = Tokens->getNextToken(); 2618 assert(FormatTok); 2619 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2620 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2621 distributeComments(Comments, FormatTok); 2622 Comments.clear(); 2623 // If there is an unfinished unwrapped line, we flush the preprocessor 2624 // directives only after that unwrapped line was finished later. 2625 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2626 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2627 assert((LevelDifference >= 0 || 2628 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2629 "LevelDifference makes Line->Level negative"); 2630 Line->Level += LevelDifference; 2631 // Comments stored before the preprocessor directive need to be output 2632 // before the preprocessor directive, at the same level as the 2633 // preprocessor directive, as we consider them to apply to the directive. 2634 flushComments(isOnNewLine(*FormatTok)); 2635 parsePPDirective(); 2636 } 2637 while (FormatTok->Type == TT_ConflictStart || 2638 FormatTok->Type == TT_ConflictEnd || 2639 FormatTok->Type == TT_ConflictAlternative) { 2640 if (FormatTok->Type == TT_ConflictStart) { 2641 conditionalCompilationStart(/*Unreachable=*/false); 2642 } else if (FormatTok->Type == TT_ConflictAlternative) { 2643 conditionalCompilationAlternative(); 2644 } else if (FormatTok->Type == TT_ConflictEnd) { 2645 conditionalCompilationEnd(); 2646 } 2647 FormatTok = Tokens->getNextToken(); 2648 FormatTok->MustBreakBefore = true; 2649 } 2650 2651 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2652 !Line->InPPDirective) { 2653 continue; 2654 } 2655 2656 if (!FormatTok->Tok.is(tok::comment)) { 2657 distributeComments(Comments, FormatTok); 2658 Comments.clear(); 2659 return; 2660 } 2661 2662 Comments.push_back(FormatTok); 2663 } while (!eof()); 2664 2665 distributeComments(Comments, nullptr); 2666 Comments.clear(); 2667 } 2668 2669 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2670 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2671 if (MustBreakBeforeNextToken) { 2672 Line->Tokens.back().Tok->MustBreakBefore = true; 2673 MustBreakBeforeNextToken = false; 2674 } 2675 } 2676 2677 } // end namespace format 2678 } // end namespace clang 2679