1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 176 if (Style.BraceWrapping.AfterControlStatement) 177 Parser->addUnwrappedLine(); 178 if (Style.BraceWrapping.IndentBraces) 179 ++LineLevel; 180 } 181 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 182 183 private: 184 unsigned &LineLevel; 185 unsigned OldLineLevel; 186 }; 187 188 namespace { 189 190 class IndexedTokenSource : public FormatTokenSource { 191 public: 192 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 193 : Tokens(Tokens), Position(-1) {} 194 195 FormatToken *getNextToken() override { 196 ++Position; 197 return Tokens[Position]; 198 } 199 200 unsigned getPosition() override { 201 assert(Position >= 0); 202 return Position; 203 } 204 205 FormatToken *setPosition(unsigned P) override { 206 Position = P; 207 return Tokens[Position]; 208 } 209 210 void reset() { Position = -1; } 211 212 private: 213 ArrayRef<FormatToken *> Tokens; 214 int Position; 215 }; 216 217 } // end anonymous namespace 218 219 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 220 const AdditionalKeywords &Keywords, 221 unsigned FirstStartColumn, 222 ArrayRef<FormatToken *> Tokens, 223 UnwrappedLineConsumer &Callback) 224 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 225 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 226 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 227 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 228 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 229 ? IG_Rejected 230 : IG_Inited), 231 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 232 233 void UnwrappedLineParser::reset() { 234 PPBranchLevel = -1; 235 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 236 ? IG_Rejected 237 : IG_Inited; 238 IncludeGuardToken = nullptr; 239 Line.reset(new UnwrappedLine); 240 CommentsBeforeNextToken.clear(); 241 FormatTok = nullptr; 242 MustBreakBeforeNextToken = false; 243 PreprocessorDirectives.clear(); 244 CurrentLines = &Lines; 245 DeclarationScopeStack.clear(); 246 PPStack.clear(); 247 Line->FirstStartColumn = FirstStartColumn; 248 } 249 250 void UnwrappedLineParser::parse() { 251 IndexedTokenSource TokenSource(AllTokens); 252 Line->FirstStartColumn = FirstStartColumn; 253 do { 254 LLVM_DEBUG(llvm::dbgs() << "----\n"); 255 reset(); 256 Tokens = &TokenSource; 257 TokenSource.reset(); 258 259 readToken(); 260 parseFile(); 261 262 // If we found an include guard then all preprocessor directives (other than 263 // the guard) are over-indented by one. 264 if (IncludeGuard == IG_Found) 265 for (auto &Line : Lines) 266 if (Line.InPPDirective && Line.Level > 0) 267 --Line.Level; 268 269 // Create line with eof token. 270 pushToken(FormatTok); 271 addUnwrappedLine(); 272 273 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 274 E = Lines.end(); 275 I != E; ++I) { 276 Callback.consumeUnwrappedLine(*I); 277 } 278 Callback.finishRun(); 279 Lines.clear(); 280 while (!PPLevelBranchIndex.empty() && 281 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 282 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 283 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 284 } 285 if (!PPLevelBranchIndex.empty()) { 286 ++PPLevelBranchIndex.back(); 287 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 288 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 289 } 290 } while (!PPLevelBranchIndex.empty()); 291 } 292 293 void UnwrappedLineParser::parseFile() { 294 // The top-level context in a file always has declarations, except for pre- 295 // processor directives and JavaScript files. 296 bool MustBeDeclaration = 297 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 298 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 299 MustBeDeclaration); 300 if (Style.Language == FormatStyle::LK_TextProto) 301 parseBracedList(); 302 else 303 parseLevel(/*HasOpeningBrace=*/false); 304 // Make sure to format the remaining tokens. 305 // 306 // LK_TextProto is special since its top-level is parsed as the body of a 307 // braced list, which does not necessarily have natural line separators such 308 // as a semicolon. Comments after the last entry that have been determined to 309 // not belong to that line, as in: 310 // key: value 311 // // endfile comment 312 // do not have a chance to be put on a line of their own until this point. 313 // Here we add this newline before end-of-file comments. 314 if (Style.Language == FormatStyle::LK_TextProto && 315 !CommentsBeforeNextToken.empty()) 316 addUnwrappedLine(); 317 flushComments(true); 318 addUnwrappedLine(); 319 } 320 321 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 322 bool SwitchLabelEncountered = false; 323 do { 324 tok::TokenKind kind = FormatTok->Tok.getKind(); 325 if (FormatTok->Type == TT_MacroBlockBegin) { 326 kind = tok::l_brace; 327 } else if (FormatTok->Type == TT_MacroBlockEnd) { 328 kind = tok::r_brace; 329 } 330 331 switch (kind) { 332 case tok::comment: 333 nextToken(); 334 addUnwrappedLine(); 335 break; 336 case tok::l_brace: 337 // FIXME: Add parameter whether this can happen - if this happens, we must 338 // be in a non-declaration context. 339 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 340 continue; 341 parseBlock(/*MustBeDeclaration=*/false); 342 addUnwrappedLine(); 343 break; 344 case tok::r_brace: 345 if (HasOpeningBrace) 346 return; 347 nextToken(); 348 addUnwrappedLine(); 349 break; 350 case tok::kw_default: { 351 unsigned StoredPosition = Tokens->getPosition(); 352 FormatToken *Next; 353 do { 354 Next = Tokens->getNextToken(); 355 } while (Next && Next->is(tok::comment)); 356 FormatTok = Tokens->setPosition(StoredPosition); 357 if (Next && Next->isNot(tok::colon)) { 358 // default not followed by ':' is not a case label; treat it like 359 // an identifier. 360 parseStructuralElement(); 361 break; 362 } 363 // Else, if it is 'default:', fall through to the case handling. 364 LLVM_FALLTHROUGH; 365 } 366 case tok::kw_case: 367 if (Style.Language == FormatStyle::LK_JavaScript && 368 Line->MustBeDeclaration) { 369 // A 'case: string' style field declaration. 370 parseStructuralElement(); 371 break; 372 } 373 if (!SwitchLabelEncountered && 374 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 375 ++Line->Level; 376 SwitchLabelEncountered = true; 377 parseStructuralElement(); 378 break; 379 default: 380 parseStructuralElement(); 381 break; 382 } 383 } while (!eof()); 384 } 385 386 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 387 // We'll parse forward through the tokens until we hit 388 // a closing brace or eof - note that getNextToken() will 389 // parse macros, so this will magically work inside macro 390 // definitions, too. 391 unsigned StoredPosition = Tokens->getPosition(); 392 FormatToken *Tok = FormatTok; 393 const FormatToken *PrevTok = Tok->Previous; 394 // Keep a stack of positions of lbrace tokens. We will 395 // update information about whether an lbrace starts a 396 // braced init list or a different block during the loop. 397 SmallVector<FormatToken *, 8> LBraceStack; 398 assert(Tok->Tok.is(tok::l_brace)); 399 do { 400 // Get next non-comment token. 401 FormatToken *NextTok; 402 unsigned ReadTokens = 0; 403 do { 404 NextTok = Tokens->getNextToken(); 405 ++ReadTokens; 406 } while (NextTok->is(tok::comment)); 407 408 switch (Tok->Tok.getKind()) { 409 case tok::l_brace: 410 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 411 if (PrevTok->isOneOf(tok::colon, tok::less)) 412 // A ':' indicates this code is in a type, or a braced list 413 // following a label in an object literal ({a: {b: 1}}). 414 // A '<' could be an object used in a comparison, but that is nonsense 415 // code (can never return true), so more likely it is a generic type 416 // argument (`X<{a: string; b: number}>`). 417 // The code below could be confused by semicolons between the 418 // individual members in a type member list, which would normally 419 // trigger BK_Block. In both cases, this must be parsed as an inline 420 // braced init. 421 Tok->BlockKind = BK_BracedInit; 422 else if (PrevTok->is(tok::r_paren)) 423 // `) { }` can only occur in function or method declarations in JS. 424 Tok->BlockKind = BK_Block; 425 } else { 426 Tok->BlockKind = BK_Unknown; 427 } 428 LBraceStack.push_back(Tok); 429 break; 430 case tok::r_brace: 431 if (LBraceStack.empty()) 432 break; 433 if (LBraceStack.back()->BlockKind == BK_Unknown) { 434 bool ProbablyBracedList = false; 435 if (Style.Language == FormatStyle::LK_Proto) { 436 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 437 } else { 438 // Using OriginalColumn to distinguish between ObjC methods and 439 // binary operators is a bit hacky. 440 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 441 NextTok->OriginalColumn == 0; 442 443 // If there is a comma, semicolon or right paren after the closing 444 // brace, we assume this is a braced initializer list. Note that 445 // regardless how we mark inner braces here, we will overwrite the 446 // BlockKind later if we parse a braced list (where all blocks 447 // inside are by default braced lists), or when we explicitly detect 448 // blocks (for example while parsing lambdas). 449 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 450 // braced list in JS. 451 ProbablyBracedList = 452 (Style.Language == FormatStyle::LK_JavaScript && 453 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 454 Keywords.kw_as)) || 455 (Style.isCpp() && NextTok->is(tok::l_paren)) || 456 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 457 tok::r_paren, tok::r_square, tok::l_brace, 458 tok::ellipsis) || 459 (NextTok->is(tok::identifier) && 460 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 461 (NextTok->is(tok::semi) && 462 (!ExpectClassBody || LBraceStack.size() != 1)) || 463 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 464 if (NextTok->is(tok::l_square)) { 465 // We can have an array subscript after a braced init 466 // list, but C++11 attributes are expected after blocks. 467 NextTok = Tokens->getNextToken(); 468 ++ReadTokens; 469 ProbablyBracedList = NextTok->isNot(tok::l_square); 470 } 471 } 472 if (ProbablyBracedList) { 473 Tok->BlockKind = BK_BracedInit; 474 LBraceStack.back()->BlockKind = BK_BracedInit; 475 } else { 476 Tok->BlockKind = BK_Block; 477 LBraceStack.back()->BlockKind = BK_Block; 478 } 479 } 480 LBraceStack.pop_back(); 481 break; 482 case tok::identifier: 483 if (!Tok->is(TT_StatementMacro)) 484 break; 485 LLVM_FALLTHROUGH; 486 case tok::at: 487 case tok::semi: 488 case tok::kw_if: 489 case tok::kw_while: 490 case tok::kw_for: 491 case tok::kw_switch: 492 case tok::kw_try: 493 case tok::kw___try: 494 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 495 LBraceStack.back()->BlockKind = BK_Block; 496 break; 497 default: 498 break; 499 } 500 PrevTok = Tok; 501 Tok = NextTok; 502 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 503 504 // Assume other blocks for all unclosed opening braces. 505 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 506 if (LBraceStack[i]->BlockKind == BK_Unknown) 507 LBraceStack[i]->BlockKind = BK_Block; 508 } 509 510 FormatTok = Tokens->setPosition(StoredPosition); 511 } 512 513 template <class T> 514 static inline void hash_combine(std::size_t &seed, const T &v) { 515 std::hash<T> hasher; 516 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 517 } 518 519 size_t UnwrappedLineParser::computePPHash() const { 520 size_t h = 0; 521 for (const auto &i : PPStack) { 522 hash_combine(h, size_t(i.Kind)); 523 hash_combine(h, i.Line); 524 } 525 return h; 526 } 527 528 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 529 bool MunchSemi) { 530 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 531 "'{' or macro block token expected"); 532 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 533 FormatTok->BlockKind = BK_Block; 534 535 size_t PPStartHash = computePPHash(); 536 537 unsigned InitialLevel = Line->Level; 538 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 539 540 if (MacroBlock && FormatTok->is(tok::l_paren)) 541 parseParens(); 542 543 size_t NbPreprocessorDirectives = 544 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 545 addUnwrappedLine(); 546 size_t OpeningLineIndex = 547 CurrentLines->empty() 548 ? (UnwrappedLine::kInvalidIndex) 549 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 550 551 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 552 MustBeDeclaration); 553 if (AddLevel) 554 ++Line->Level; 555 parseLevel(/*HasOpeningBrace=*/true); 556 557 if (eof()) 558 return; 559 560 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 561 : !FormatTok->is(tok::r_brace)) { 562 Line->Level = InitialLevel; 563 FormatTok->BlockKind = BK_Block; 564 return; 565 } 566 567 size_t PPEndHash = computePPHash(); 568 569 // Munch the closing brace. 570 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 571 572 if (MacroBlock && FormatTok->is(tok::l_paren)) 573 parseParens(); 574 575 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 576 nextToken(); 577 Line->Level = InitialLevel; 578 579 if (PPStartHash == PPEndHash) { 580 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 581 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 582 // Update the opening line to add the forward reference as well 583 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 584 CurrentLines->size() - 1; 585 } 586 } 587 } 588 589 static bool isGoogScope(const UnwrappedLine &Line) { 590 // FIXME: Closure-library specific stuff should not be hard-coded but be 591 // configurable. 592 if (Line.Tokens.size() < 4) 593 return false; 594 auto I = Line.Tokens.begin(); 595 if (I->Tok->TokenText != "goog") 596 return false; 597 ++I; 598 if (I->Tok->isNot(tok::period)) 599 return false; 600 ++I; 601 if (I->Tok->TokenText != "scope") 602 return false; 603 ++I; 604 return I->Tok->is(tok::l_paren); 605 } 606 607 static bool isIIFE(const UnwrappedLine &Line, 608 const AdditionalKeywords &Keywords) { 609 // Look for the start of an immediately invoked anonymous function. 610 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 611 // This is commonly done in JavaScript to create a new, anonymous scope. 612 // Example: (function() { ... })() 613 if (Line.Tokens.size() < 3) 614 return false; 615 auto I = Line.Tokens.begin(); 616 if (I->Tok->isNot(tok::l_paren)) 617 return false; 618 ++I; 619 if (I->Tok->isNot(Keywords.kw_function)) 620 return false; 621 ++I; 622 return I->Tok->is(tok::l_paren); 623 } 624 625 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 626 const FormatToken &InitialToken) { 627 if (InitialToken.is(tok::kw_namespace)) 628 return Style.BraceWrapping.AfterNamespace; 629 if (InitialToken.is(tok::kw_class)) 630 return Style.BraceWrapping.AfterClass; 631 if (InitialToken.is(tok::kw_union)) 632 return Style.BraceWrapping.AfterUnion; 633 if (InitialToken.is(tok::kw_struct)) 634 return Style.BraceWrapping.AfterStruct; 635 return false; 636 } 637 638 void UnwrappedLineParser::parseChildBlock() { 639 FormatTok->BlockKind = BK_Block; 640 nextToken(); 641 { 642 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 643 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 644 ScopedLineState LineState(*this); 645 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 646 /*MustBeDeclaration=*/false); 647 Line->Level += SkipIndent ? 0 : 1; 648 parseLevel(/*HasOpeningBrace=*/true); 649 flushComments(isOnNewLine(*FormatTok)); 650 Line->Level -= SkipIndent ? 0 : 1; 651 } 652 nextToken(); 653 } 654 655 void UnwrappedLineParser::parsePPDirective() { 656 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 657 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 658 nextToken(); 659 660 if (!FormatTok->Tok.getIdentifierInfo()) { 661 parsePPUnknown(); 662 return; 663 } 664 665 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 666 case tok::pp_define: 667 parsePPDefine(); 668 return; 669 case tok::pp_if: 670 parsePPIf(/*IfDef=*/false); 671 break; 672 case tok::pp_ifdef: 673 case tok::pp_ifndef: 674 parsePPIf(/*IfDef=*/true); 675 break; 676 case tok::pp_else: 677 parsePPElse(); 678 break; 679 case tok::pp_elif: 680 parsePPElIf(); 681 break; 682 case tok::pp_endif: 683 parsePPEndIf(); 684 break; 685 default: 686 parsePPUnknown(); 687 break; 688 } 689 } 690 691 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 692 size_t Line = CurrentLines->size(); 693 if (CurrentLines == &PreprocessorDirectives) 694 Line += Lines.size(); 695 696 if (Unreachable || 697 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 698 PPStack.push_back({PP_Unreachable, Line}); 699 else 700 PPStack.push_back({PP_Conditional, Line}); 701 } 702 703 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 704 ++PPBranchLevel; 705 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 706 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 707 PPLevelBranchIndex.push_back(0); 708 PPLevelBranchCount.push_back(0); 709 } 710 PPChainBranchIndex.push(0); 711 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 712 conditionalCompilationCondition(Unreachable || Skip); 713 } 714 715 void UnwrappedLineParser::conditionalCompilationAlternative() { 716 if (!PPStack.empty()) 717 PPStack.pop_back(); 718 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 719 if (!PPChainBranchIndex.empty()) 720 ++PPChainBranchIndex.top(); 721 conditionalCompilationCondition( 722 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 723 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 724 } 725 726 void UnwrappedLineParser::conditionalCompilationEnd() { 727 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 728 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 729 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 730 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 731 } 732 } 733 // Guard against #endif's without #if. 734 if (PPBranchLevel > -1) 735 --PPBranchLevel; 736 if (!PPChainBranchIndex.empty()) 737 PPChainBranchIndex.pop(); 738 if (!PPStack.empty()) 739 PPStack.pop_back(); 740 } 741 742 void UnwrappedLineParser::parsePPIf(bool IfDef) { 743 bool IfNDef = FormatTok->is(tok::pp_ifndef); 744 nextToken(); 745 bool Unreachable = false; 746 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 747 Unreachable = true; 748 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 749 Unreachable = true; 750 conditionalCompilationStart(Unreachable); 751 FormatToken *IfCondition = FormatTok; 752 // If there's a #ifndef on the first line, and the only lines before it are 753 // comments, it could be an include guard. 754 bool MaybeIncludeGuard = IfNDef; 755 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 756 for (auto &Line : Lines) { 757 if (!Line.Tokens.front().Tok->is(tok::comment)) { 758 MaybeIncludeGuard = false; 759 IncludeGuard = IG_Rejected; 760 break; 761 } 762 } 763 --PPBranchLevel; 764 parsePPUnknown(); 765 ++PPBranchLevel; 766 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 767 IncludeGuard = IG_IfNdefed; 768 IncludeGuardToken = IfCondition; 769 } 770 } 771 772 void UnwrappedLineParser::parsePPElse() { 773 // If a potential include guard has an #else, it's not an include guard. 774 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 775 IncludeGuard = IG_Rejected; 776 conditionalCompilationAlternative(); 777 if (PPBranchLevel > -1) 778 --PPBranchLevel; 779 parsePPUnknown(); 780 ++PPBranchLevel; 781 } 782 783 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 784 785 void UnwrappedLineParser::parsePPEndIf() { 786 conditionalCompilationEnd(); 787 parsePPUnknown(); 788 // If the #endif of a potential include guard is the last thing in the file, 789 // then we found an include guard. 790 unsigned TokenPosition = Tokens->getPosition(); 791 FormatToken *PeekNext = AllTokens[TokenPosition]; 792 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 793 PeekNext->is(tok::eof) && 794 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 795 IncludeGuard = IG_Found; 796 } 797 798 void UnwrappedLineParser::parsePPDefine() { 799 nextToken(); 800 801 if (FormatTok->Tok.getKind() != tok::identifier) { 802 IncludeGuard = IG_Rejected; 803 IncludeGuardToken = nullptr; 804 parsePPUnknown(); 805 return; 806 } 807 808 if (IncludeGuard == IG_IfNdefed && 809 IncludeGuardToken->TokenText == FormatTok->TokenText) { 810 IncludeGuard = IG_Defined; 811 IncludeGuardToken = nullptr; 812 for (auto &Line : Lines) { 813 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 814 IncludeGuard = IG_Rejected; 815 break; 816 } 817 } 818 } 819 820 nextToken(); 821 if (FormatTok->Tok.getKind() == tok::l_paren && 822 FormatTok->WhitespaceRange.getBegin() == 823 FormatTok->WhitespaceRange.getEnd()) { 824 parseParens(); 825 } 826 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 827 Line->Level += PPBranchLevel + 1; 828 addUnwrappedLine(); 829 ++Line->Level; 830 831 // Errors during a preprocessor directive can only affect the layout of the 832 // preprocessor directive, and thus we ignore them. An alternative approach 833 // would be to use the same approach we use on the file level (no 834 // re-indentation if there was a structural error) within the macro 835 // definition. 836 parseFile(); 837 } 838 839 void UnwrappedLineParser::parsePPUnknown() { 840 do { 841 nextToken(); 842 } while (!eof()); 843 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 844 Line->Level += PPBranchLevel + 1; 845 addUnwrappedLine(); 846 } 847 848 // Here we blacklist certain tokens that are not usually the first token in an 849 // unwrapped line. This is used in attempt to distinguish macro calls without 850 // trailing semicolons from other constructs split to several lines. 851 static bool tokenCanStartNewLine(const clang::Token &Tok) { 852 // Semicolon can be a null-statement, l_square can be a start of a macro or 853 // a C++11 attribute, but this doesn't seem to be common. 854 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 855 Tok.isNot(tok::l_square) && 856 // Tokens that can only be used as binary operators and a part of 857 // overloaded operator names. 858 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 859 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 860 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 861 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 862 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 863 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 864 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 865 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 866 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 867 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 868 Tok.isNot(tok::lesslessequal) && 869 // Colon is used in labels, base class lists, initializer lists, 870 // range-based for loops, ternary operator, but should never be the 871 // first token in an unwrapped line. 872 Tok.isNot(tok::colon) && 873 // 'noexcept' is a trailing annotation. 874 Tok.isNot(tok::kw_noexcept); 875 } 876 877 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 878 const FormatToken *FormatTok) { 879 // FIXME: This returns true for C/C++ keywords like 'struct'. 880 return FormatTok->is(tok::identifier) && 881 (FormatTok->Tok.getIdentifierInfo() == nullptr || 882 !FormatTok->isOneOf( 883 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 884 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 885 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 886 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 887 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 888 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 889 Keywords.kw_from)); 890 } 891 892 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 893 const FormatToken *FormatTok) { 894 return FormatTok->Tok.isLiteral() || 895 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 896 mustBeJSIdent(Keywords, FormatTok); 897 } 898 899 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 900 // when encountered after a value (see mustBeJSIdentOrValue). 901 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 902 const FormatToken *FormatTok) { 903 return FormatTok->isOneOf( 904 tok::kw_return, Keywords.kw_yield, 905 // conditionals 906 tok::kw_if, tok::kw_else, 907 // loops 908 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 909 // switch/case 910 tok::kw_switch, tok::kw_case, 911 // exceptions 912 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 913 // declaration 914 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 915 Keywords.kw_async, Keywords.kw_function, 916 // import/export 917 Keywords.kw_import, tok::kw_export); 918 } 919 920 // readTokenWithJavaScriptASI reads the next token and terminates the current 921 // line if JavaScript Automatic Semicolon Insertion must 922 // happen between the current token and the next token. 923 // 924 // This method is conservative - it cannot cover all edge cases of JavaScript, 925 // but only aims to correctly handle certain well known cases. It *must not* 926 // return true in speculative cases. 927 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 928 FormatToken *Previous = FormatTok; 929 readToken(); 930 FormatToken *Next = FormatTok; 931 932 bool IsOnSameLine = 933 CommentsBeforeNextToken.empty() 934 ? Next->NewlinesBefore == 0 935 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 936 if (IsOnSameLine) 937 return; 938 939 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 940 bool PreviousStartsTemplateExpr = 941 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 942 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 943 // If the line contains an '@' sign, the previous token might be an 944 // annotation, which can precede another identifier/value. 945 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 946 [](UnwrappedLineNode &LineNode) { 947 return LineNode.Tok->is(tok::at); 948 }) != Line->Tokens.end(); 949 if (HasAt) 950 return; 951 } 952 if (Next->is(tok::exclaim) && PreviousMustBeValue) 953 return addUnwrappedLine(); 954 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 955 bool NextEndsTemplateExpr = 956 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 957 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 958 (PreviousMustBeValue || 959 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 960 tok::minusminus))) 961 return addUnwrappedLine(); 962 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 963 isJSDeclOrStmt(Keywords, Next)) 964 return addUnwrappedLine(); 965 } 966 967 void UnwrappedLineParser::parseStructuralElement() { 968 assert(!FormatTok->is(tok::l_brace)); 969 if (Style.Language == FormatStyle::LK_TableGen && 970 FormatTok->is(tok::pp_include)) { 971 nextToken(); 972 if (FormatTok->is(tok::string_literal)) 973 nextToken(); 974 addUnwrappedLine(); 975 return; 976 } 977 switch (FormatTok->Tok.getKind()) { 978 case tok::kw_asm: 979 nextToken(); 980 if (FormatTok->is(tok::l_brace)) { 981 FormatTok->Type = TT_InlineASMBrace; 982 nextToken(); 983 while (FormatTok && FormatTok->isNot(tok::eof)) { 984 if (FormatTok->is(tok::r_brace)) { 985 FormatTok->Type = TT_InlineASMBrace; 986 nextToken(); 987 addUnwrappedLine(); 988 break; 989 } 990 FormatTok->Finalized = true; 991 nextToken(); 992 } 993 } 994 break; 995 case tok::kw_namespace: 996 parseNamespace(); 997 return; 998 case tok::kw_public: 999 case tok::kw_protected: 1000 case tok::kw_private: 1001 if (Style.Language == FormatStyle::LK_Java || 1002 Style.Language == FormatStyle::LK_JavaScript) 1003 nextToken(); 1004 else 1005 parseAccessSpecifier(); 1006 return; 1007 case tok::kw_if: 1008 parseIfThenElse(); 1009 return; 1010 case tok::kw_for: 1011 case tok::kw_while: 1012 parseForOrWhileLoop(); 1013 return; 1014 case tok::kw_do: 1015 parseDoWhile(); 1016 return; 1017 case tok::kw_switch: 1018 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1019 // 'switch: string' field declaration. 1020 break; 1021 parseSwitch(); 1022 return; 1023 case tok::kw_default: 1024 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1025 // 'default: string' field declaration. 1026 break; 1027 nextToken(); 1028 if (FormatTok->is(tok::colon)) { 1029 parseLabel(); 1030 return; 1031 } 1032 // e.g. "default void f() {}" in a Java interface. 1033 break; 1034 case tok::kw_case: 1035 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1036 // 'case: string' field declaration. 1037 break; 1038 parseCaseLabel(); 1039 return; 1040 case tok::kw_try: 1041 case tok::kw___try: 1042 parseTryCatch(); 1043 return; 1044 case tok::kw_extern: 1045 nextToken(); 1046 if (FormatTok->Tok.is(tok::string_literal)) { 1047 nextToken(); 1048 if (FormatTok->Tok.is(tok::l_brace)) { 1049 if (Style.BraceWrapping.AfterExternBlock) { 1050 addUnwrappedLine(); 1051 parseBlock(/*MustBeDeclaration=*/true); 1052 } else { 1053 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1054 } 1055 addUnwrappedLine(); 1056 return; 1057 } 1058 } 1059 break; 1060 case tok::kw_export: 1061 if (Style.Language == FormatStyle::LK_JavaScript) { 1062 parseJavaScriptEs6ImportExport(); 1063 return; 1064 } 1065 if (!Style.isCpp()) 1066 break; 1067 // Handle C++ "(inline|export) namespace". 1068 LLVM_FALLTHROUGH; 1069 case tok::kw_inline: 1070 nextToken(); 1071 if (FormatTok->Tok.is(tok::kw_namespace)) { 1072 parseNamespace(); 1073 return; 1074 } 1075 break; 1076 case tok::identifier: 1077 if (FormatTok->is(TT_ForEachMacro)) { 1078 parseForOrWhileLoop(); 1079 return; 1080 } 1081 if (FormatTok->is(TT_MacroBlockBegin)) { 1082 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1083 /*MunchSemi=*/false); 1084 return; 1085 } 1086 if (FormatTok->is(Keywords.kw_import)) { 1087 if (Style.Language == FormatStyle::LK_JavaScript) { 1088 parseJavaScriptEs6ImportExport(); 1089 return; 1090 } 1091 if (Style.Language == FormatStyle::LK_Proto) { 1092 nextToken(); 1093 if (FormatTok->is(tok::kw_public)) 1094 nextToken(); 1095 if (!FormatTok->is(tok::string_literal)) 1096 return; 1097 nextToken(); 1098 if (FormatTok->is(tok::semi)) 1099 nextToken(); 1100 addUnwrappedLine(); 1101 return; 1102 } 1103 } 1104 if (Style.isCpp() && 1105 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1106 Keywords.kw_slots, Keywords.kw_qslots)) { 1107 nextToken(); 1108 if (FormatTok->is(tok::colon)) { 1109 nextToken(); 1110 addUnwrappedLine(); 1111 return; 1112 } 1113 } 1114 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1115 parseStatementMacro(); 1116 return; 1117 } 1118 // In all other cases, parse the declaration. 1119 break; 1120 default: 1121 break; 1122 } 1123 do { 1124 const FormatToken *Previous = FormatTok->Previous; 1125 switch (FormatTok->Tok.getKind()) { 1126 case tok::at: 1127 nextToken(); 1128 if (FormatTok->Tok.is(tok::l_brace)) { 1129 nextToken(); 1130 parseBracedList(); 1131 break; 1132 } else if (Style.Language == FormatStyle::LK_Java && 1133 FormatTok->is(Keywords.kw_interface)) { 1134 nextToken(); 1135 break; 1136 } 1137 switch (FormatTok->Tok.getObjCKeywordID()) { 1138 case tok::objc_public: 1139 case tok::objc_protected: 1140 case tok::objc_package: 1141 case tok::objc_private: 1142 return parseAccessSpecifier(); 1143 case tok::objc_interface: 1144 case tok::objc_implementation: 1145 return parseObjCInterfaceOrImplementation(); 1146 case tok::objc_protocol: 1147 if (parseObjCProtocol()) 1148 return; 1149 break; 1150 case tok::objc_end: 1151 return; // Handled by the caller. 1152 case tok::objc_optional: 1153 case tok::objc_required: 1154 nextToken(); 1155 addUnwrappedLine(); 1156 return; 1157 case tok::objc_autoreleasepool: 1158 nextToken(); 1159 if (FormatTok->Tok.is(tok::l_brace)) { 1160 if (Style.BraceWrapping.AfterControlStatement) 1161 addUnwrappedLine(); 1162 parseBlock(/*MustBeDeclaration=*/false); 1163 } 1164 addUnwrappedLine(); 1165 return; 1166 case tok::objc_synchronized: 1167 nextToken(); 1168 if (FormatTok->Tok.is(tok::l_paren)) 1169 // Skip synchronization object 1170 parseParens(); 1171 if (FormatTok->Tok.is(tok::l_brace)) { 1172 if (Style.BraceWrapping.AfterControlStatement) 1173 addUnwrappedLine(); 1174 parseBlock(/*MustBeDeclaration=*/false); 1175 } 1176 addUnwrappedLine(); 1177 return; 1178 case tok::objc_try: 1179 // This branch isn't strictly necessary (the kw_try case below would 1180 // do this too after the tok::at is parsed above). But be explicit. 1181 parseTryCatch(); 1182 return; 1183 default: 1184 break; 1185 } 1186 break; 1187 case tok::kw_enum: 1188 // Ignore if this is part of "template <enum ...". 1189 if (Previous && Previous->is(tok::less)) { 1190 nextToken(); 1191 break; 1192 } 1193 1194 // parseEnum falls through and does not yet add an unwrapped line as an 1195 // enum definition can start a structural element. 1196 if (!parseEnum()) 1197 break; 1198 // This only applies for C++. 1199 if (!Style.isCpp()) { 1200 addUnwrappedLine(); 1201 return; 1202 } 1203 break; 1204 case tok::kw_typedef: 1205 nextToken(); 1206 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1207 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1208 parseEnum(); 1209 break; 1210 case tok::kw_struct: 1211 case tok::kw_union: 1212 case tok::kw_class: 1213 // parseRecord falls through and does not yet add an unwrapped line as a 1214 // record declaration or definition can start a structural element. 1215 parseRecord(); 1216 // This does not apply for Java and JavaScript. 1217 if (Style.Language == FormatStyle::LK_Java || 1218 Style.Language == FormatStyle::LK_JavaScript) { 1219 if (FormatTok->is(tok::semi)) 1220 nextToken(); 1221 addUnwrappedLine(); 1222 return; 1223 } 1224 break; 1225 case tok::period: 1226 nextToken(); 1227 // In Java, classes have an implicit static member "class". 1228 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1229 FormatTok->is(tok::kw_class)) 1230 nextToken(); 1231 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1232 FormatTok->Tok.getIdentifierInfo()) 1233 // JavaScript only has pseudo keywords, all keywords are allowed to 1234 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1235 nextToken(); 1236 break; 1237 case tok::semi: 1238 nextToken(); 1239 addUnwrappedLine(); 1240 return; 1241 case tok::r_brace: 1242 addUnwrappedLine(); 1243 return; 1244 case tok::l_paren: 1245 parseParens(); 1246 break; 1247 case tok::kw_operator: 1248 nextToken(); 1249 if (FormatTok->isBinaryOperator()) 1250 nextToken(); 1251 break; 1252 case tok::caret: 1253 nextToken(); 1254 if (FormatTok->Tok.isAnyIdentifier() || 1255 FormatTok->isSimpleTypeSpecifier()) 1256 nextToken(); 1257 if (FormatTok->is(tok::l_paren)) 1258 parseParens(); 1259 if (FormatTok->is(tok::l_brace)) 1260 parseChildBlock(); 1261 break; 1262 case tok::l_brace: 1263 if (!tryToParseBracedList()) { 1264 // A block outside of parentheses must be the last part of a 1265 // structural element. 1266 // FIXME: Figure out cases where this is not true, and add projections 1267 // for them (the one we know is missing are lambdas). 1268 if (Style.BraceWrapping.AfterFunction) 1269 addUnwrappedLine(); 1270 FormatTok->Type = TT_FunctionLBrace; 1271 parseBlock(/*MustBeDeclaration=*/false); 1272 addUnwrappedLine(); 1273 return; 1274 } 1275 // Otherwise this was a braced init list, and the structural 1276 // element continues. 1277 break; 1278 case tok::kw_try: 1279 // We arrive here when parsing function-try blocks. 1280 if (Style.BraceWrapping.AfterFunction) 1281 addUnwrappedLine(); 1282 parseTryCatch(); 1283 return; 1284 case tok::identifier: { 1285 if (FormatTok->is(TT_MacroBlockEnd)) { 1286 addUnwrappedLine(); 1287 return; 1288 } 1289 1290 // Function declarations (as opposed to function expressions) are parsed 1291 // on their own unwrapped line by continuing this loop. Function 1292 // expressions (functions that are not on their own line) must not create 1293 // a new unwrapped line, so they are special cased below. 1294 size_t TokenCount = Line->Tokens.size(); 1295 if (Style.Language == FormatStyle::LK_JavaScript && 1296 FormatTok->is(Keywords.kw_function) && 1297 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1298 Keywords.kw_async)))) { 1299 tryToParseJSFunction(); 1300 break; 1301 } 1302 if ((Style.Language == FormatStyle::LK_JavaScript || 1303 Style.Language == FormatStyle::LK_Java) && 1304 FormatTok->is(Keywords.kw_interface)) { 1305 if (Style.Language == FormatStyle::LK_JavaScript) { 1306 // In JavaScript/TypeScript, "interface" can be used as a standalone 1307 // identifier, e.g. in `var interface = 1;`. If "interface" is 1308 // followed by another identifier, it is very like to be an actual 1309 // interface declaration. 1310 unsigned StoredPosition = Tokens->getPosition(); 1311 FormatToken *Next = Tokens->getNextToken(); 1312 FormatTok = Tokens->setPosition(StoredPosition); 1313 if (Next && !mustBeJSIdent(Keywords, Next)) { 1314 nextToken(); 1315 break; 1316 } 1317 } 1318 parseRecord(); 1319 addUnwrappedLine(); 1320 return; 1321 } 1322 1323 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1324 parseStatementMacro(); 1325 return; 1326 } 1327 1328 // See if the following token should start a new unwrapped line. 1329 StringRef Text = FormatTok->TokenText; 1330 nextToken(); 1331 if (Line->Tokens.size() == 1 && 1332 // JS doesn't have macros, and within classes colons indicate fields, 1333 // not labels. 1334 Style.Language != FormatStyle::LK_JavaScript) { 1335 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1336 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1337 parseLabel(); 1338 return; 1339 } 1340 // Recognize function-like macro usages without trailing semicolon as 1341 // well as free-standing macros like Q_OBJECT. 1342 bool FunctionLike = FormatTok->is(tok::l_paren); 1343 if (FunctionLike) 1344 parseParens(); 1345 1346 bool FollowedByNewline = 1347 CommentsBeforeNextToken.empty() 1348 ? FormatTok->NewlinesBefore > 0 1349 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1350 1351 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1352 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1353 addUnwrappedLine(); 1354 return; 1355 } 1356 } 1357 break; 1358 } 1359 case tok::equal: 1360 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1361 // TT_JsFatArrow. The always start an expression or a child block if 1362 // followed by a curly. 1363 if (FormatTok->is(TT_JsFatArrow)) { 1364 nextToken(); 1365 if (FormatTok->is(tok::l_brace)) 1366 parseChildBlock(); 1367 break; 1368 } 1369 1370 nextToken(); 1371 if (FormatTok->Tok.is(tok::l_brace)) { 1372 nextToken(); 1373 parseBracedList(); 1374 } else if (Style.Language == FormatStyle::LK_Proto && 1375 FormatTok->Tok.is(tok::less)) { 1376 nextToken(); 1377 parseBracedList(/*ContinueOnSemicolons=*/false, 1378 /*ClosingBraceKind=*/tok::greater); 1379 } 1380 break; 1381 case tok::l_square: 1382 parseSquare(); 1383 break; 1384 case tok::kw_new: 1385 parseNew(); 1386 break; 1387 default: 1388 nextToken(); 1389 break; 1390 } 1391 } while (!eof()); 1392 } 1393 1394 bool UnwrappedLineParser::tryToParseLambda() { 1395 if (!Style.isCpp()) { 1396 nextToken(); 1397 return false; 1398 } 1399 assert(FormatTok->is(tok::l_square)); 1400 FormatToken &LSquare = *FormatTok; 1401 if (!tryToParseLambdaIntroducer()) 1402 return false; 1403 1404 while (FormatTok->isNot(tok::l_brace)) { 1405 if (FormatTok->isSimpleTypeSpecifier()) { 1406 nextToken(); 1407 continue; 1408 } 1409 switch (FormatTok->Tok.getKind()) { 1410 case tok::l_brace: 1411 break; 1412 case tok::l_paren: 1413 parseParens(); 1414 break; 1415 case tok::amp: 1416 case tok::star: 1417 case tok::kw_const: 1418 case tok::comma: 1419 case tok::less: 1420 case tok::greater: 1421 case tok::identifier: 1422 case tok::numeric_constant: 1423 case tok::coloncolon: 1424 case tok::kw_mutable: 1425 case tok::kw_noexcept: 1426 nextToken(); 1427 break; 1428 case tok::arrow: 1429 // This might or might not actually be a lambda arrow (this could be an 1430 // ObjC method invocation followed by a dereferencing arrow). We might 1431 // reset this back to TT_Unknown in TokenAnnotator. 1432 FormatTok->Type = TT_LambdaArrow; 1433 nextToken(); 1434 break; 1435 default: 1436 return true; 1437 } 1438 } 1439 LSquare.Type = TT_LambdaLSquare; 1440 parseChildBlock(); 1441 return true; 1442 } 1443 1444 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1445 const FormatToken *Previous = FormatTok->Previous; 1446 if (Previous && 1447 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1448 tok::kw_delete, tok::l_square) || 1449 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1450 Previous->isSimpleTypeSpecifier())) { 1451 nextToken(); 1452 return false; 1453 } 1454 nextToken(); 1455 if (FormatTok->is(tok::l_square)) { 1456 return false; 1457 } 1458 parseSquare(/*LambdaIntroducer=*/true); 1459 return true; 1460 } 1461 1462 void UnwrappedLineParser::tryToParseJSFunction() { 1463 assert(FormatTok->is(Keywords.kw_function) || 1464 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1465 if (FormatTok->is(Keywords.kw_async)) 1466 nextToken(); 1467 // Consume "function". 1468 nextToken(); 1469 1470 // Consume * (generator function). Treat it like C++'s overloaded operators. 1471 if (FormatTok->is(tok::star)) { 1472 FormatTok->Type = TT_OverloadedOperator; 1473 nextToken(); 1474 } 1475 1476 // Consume function name. 1477 if (FormatTok->is(tok::identifier)) 1478 nextToken(); 1479 1480 if (FormatTok->isNot(tok::l_paren)) 1481 return; 1482 1483 // Parse formal parameter list. 1484 parseParens(); 1485 1486 if (FormatTok->is(tok::colon)) { 1487 // Parse a type definition. 1488 nextToken(); 1489 1490 // Eat the type declaration. For braced inline object types, balance braces, 1491 // otherwise just parse until finding an l_brace for the function body. 1492 if (FormatTok->is(tok::l_brace)) 1493 tryToParseBracedList(); 1494 else 1495 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1496 nextToken(); 1497 } 1498 1499 if (FormatTok->is(tok::semi)) 1500 return; 1501 1502 parseChildBlock(); 1503 } 1504 1505 bool UnwrappedLineParser::tryToParseBracedList() { 1506 if (FormatTok->BlockKind == BK_Unknown) 1507 calculateBraceTypes(); 1508 assert(FormatTok->BlockKind != BK_Unknown); 1509 if (FormatTok->BlockKind == BK_Block) 1510 return false; 1511 nextToken(); 1512 parseBracedList(); 1513 return true; 1514 } 1515 1516 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1517 tok::TokenKind ClosingBraceKind) { 1518 bool HasError = false; 1519 1520 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1521 // replace this by using parseAssigmentExpression() inside. 1522 do { 1523 if (Style.Language == FormatStyle::LK_JavaScript) { 1524 if (FormatTok->is(Keywords.kw_function) || 1525 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1526 tryToParseJSFunction(); 1527 continue; 1528 } 1529 if (FormatTok->is(TT_JsFatArrow)) { 1530 nextToken(); 1531 // Fat arrows can be followed by simple expressions or by child blocks 1532 // in curly braces. 1533 if (FormatTok->is(tok::l_brace)) { 1534 parseChildBlock(); 1535 continue; 1536 } 1537 } 1538 if (FormatTok->is(tok::l_brace)) { 1539 // Could be a method inside of a braced list `{a() { return 1; }}`. 1540 if (tryToParseBracedList()) 1541 continue; 1542 parseChildBlock(); 1543 } 1544 } 1545 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1546 nextToken(); 1547 return !HasError; 1548 } 1549 switch (FormatTok->Tok.getKind()) { 1550 case tok::caret: 1551 nextToken(); 1552 if (FormatTok->is(tok::l_brace)) { 1553 parseChildBlock(); 1554 } 1555 break; 1556 case tok::l_square: 1557 tryToParseLambda(); 1558 break; 1559 case tok::l_paren: 1560 parseParens(); 1561 // JavaScript can just have free standing methods and getters/setters in 1562 // object literals. Detect them by a "{" following ")". 1563 if (Style.Language == FormatStyle::LK_JavaScript) { 1564 if (FormatTok->is(tok::l_brace)) 1565 parseChildBlock(); 1566 break; 1567 } 1568 break; 1569 case tok::l_brace: 1570 // Assume there are no blocks inside a braced init list apart 1571 // from the ones we explicitly parse out (like lambdas). 1572 FormatTok->BlockKind = BK_BracedInit; 1573 nextToken(); 1574 parseBracedList(); 1575 break; 1576 case tok::less: 1577 if (Style.Language == FormatStyle::LK_Proto) { 1578 nextToken(); 1579 parseBracedList(/*ContinueOnSemicolons=*/false, 1580 /*ClosingBraceKind=*/tok::greater); 1581 } else { 1582 nextToken(); 1583 } 1584 break; 1585 case tok::semi: 1586 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1587 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1588 // used for error recovery if we have otherwise determined that this is 1589 // a braced list. 1590 if (Style.Language == FormatStyle::LK_JavaScript) { 1591 nextToken(); 1592 break; 1593 } 1594 HasError = true; 1595 if (!ContinueOnSemicolons) 1596 return !HasError; 1597 nextToken(); 1598 break; 1599 case tok::comma: 1600 nextToken(); 1601 break; 1602 default: 1603 nextToken(); 1604 break; 1605 } 1606 } while (!eof()); 1607 return false; 1608 } 1609 1610 void UnwrappedLineParser::parseParens() { 1611 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1612 nextToken(); 1613 do { 1614 switch (FormatTok->Tok.getKind()) { 1615 case tok::l_paren: 1616 parseParens(); 1617 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1618 parseChildBlock(); 1619 break; 1620 case tok::r_paren: 1621 nextToken(); 1622 return; 1623 case tok::r_brace: 1624 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1625 return; 1626 case tok::l_square: 1627 tryToParseLambda(); 1628 break; 1629 case tok::l_brace: 1630 if (!tryToParseBracedList()) 1631 parseChildBlock(); 1632 break; 1633 case tok::at: 1634 nextToken(); 1635 if (FormatTok->Tok.is(tok::l_brace)) { 1636 nextToken(); 1637 parseBracedList(); 1638 } 1639 break; 1640 case tok::kw_class: 1641 if (Style.Language == FormatStyle::LK_JavaScript) 1642 parseRecord(/*ParseAsExpr=*/true); 1643 else 1644 nextToken(); 1645 break; 1646 case tok::identifier: 1647 if (Style.Language == FormatStyle::LK_JavaScript && 1648 (FormatTok->is(Keywords.kw_function) || 1649 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1650 tryToParseJSFunction(); 1651 else 1652 nextToken(); 1653 break; 1654 default: 1655 nextToken(); 1656 break; 1657 } 1658 } while (!eof()); 1659 } 1660 1661 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1662 if (!LambdaIntroducer) { 1663 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1664 if (tryToParseLambda()) 1665 return; 1666 } 1667 do { 1668 switch (FormatTok->Tok.getKind()) { 1669 case tok::l_paren: 1670 parseParens(); 1671 break; 1672 case tok::r_square: 1673 nextToken(); 1674 return; 1675 case tok::r_brace: 1676 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1677 return; 1678 case tok::l_square: 1679 parseSquare(); 1680 break; 1681 case tok::l_brace: { 1682 if (!tryToParseBracedList()) 1683 parseChildBlock(); 1684 break; 1685 } 1686 case tok::at: 1687 nextToken(); 1688 if (FormatTok->Tok.is(tok::l_brace)) { 1689 nextToken(); 1690 parseBracedList(); 1691 } 1692 break; 1693 default: 1694 nextToken(); 1695 break; 1696 } 1697 } while (!eof()); 1698 } 1699 1700 void UnwrappedLineParser::parseIfThenElse() { 1701 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1702 nextToken(); 1703 if (FormatTok->Tok.is(tok::kw_constexpr)) 1704 nextToken(); 1705 if (FormatTok->Tok.is(tok::l_paren)) 1706 parseParens(); 1707 bool NeedsUnwrappedLine = false; 1708 if (FormatTok->Tok.is(tok::l_brace)) { 1709 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1710 parseBlock(/*MustBeDeclaration=*/false); 1711 if (Style.BraceWrapping.BeforeElse) 1712 addUnwrappedLine(); 1713 else 1714 NeedsUnwrappedLine = true; 1715 } else { 1716 addUnwrappedLine(); 1717 ++Line->Level; 1718 parseStructuralElement(); 1719 --Line->Level; 1720 } 1721 if (FormatTok->Tok.is(tok::kw_else)) { 1722 nextToken(); 1723 if (FormatTok->Tok.is(tok::l_brace)) { 1724 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1725 parseBlock(/*MustBeDeclaration=*/false); 1726 addUnwrappedLine(); 1727 } else if (FormatTok->Tok.is(tok::kw_if)) { 1728 parseIfThenElse(); 1729 } else { 1730 addUnwrappedLine(); 1731 ++Line->Level; 1732 parseStructuralElement(); 1733 if (FormatTok->is(tok::eof)) 1734 addUnwrappedLine(); 1735 --Line->Level; 1736 } 1737 } else if (NeedsUnwrappedLine) { 1738 addUnwrappedLine(); 1739 } 1740 } 1741 1742 void UnwrappedLineParser::parseTryCatch() { 1743 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1744 nextToken(); 1745 bool NeedsUnwrappedLine = false; 1746 if (FormatTok->is(tok::colon)) { 1747 // We are in a function try block, what comes is an initializer list. 1748 nextToken(); 1749 while (FormatTok->is(tok::identifier)) { 1750 nextToken(); 1751 if (FormatTok->is(tok::l_paren)) 1752 parseParens(); 1753 if (FormatTok->is(tok::comma)) 1754 nextToken(); 1755 } 1756 } 1757 // Parse try with resource. 1758 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1759 parseParens(); 1760 } 1761 if (FormatTok->is(tok::l_brace)) { 1762 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1763 parseBlock(/*MustBeDeclaration=*/false); 1764 if (Style.BraceWrapping.BeforeCatch) { 1765 addUnwrappedLine(); 1766 } else { 1767 NeedsUnwrappedLine = true; 1768 } 1769 } else if (!FormatTok->is(tok::kw_catch)) { 1770 // The C++ standard requires a compound-statement after a try. 1771 // If there's none, we try to assume there's a structuralElement 1772 // and try to continue. 1773 addUnwrappedLine(); 1774 ++Line->Level; 1775 parseStructuralElement(); 1776 --Line->Level; 1777 } 1778 while (1) { 1779 if (FormatTok->is(tok::at)) 1780 nextToken(); 1781 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1782 tok::kw___finally) || 1783 ((Style.Language == FormatStyle::LK_Java || 1784 Style.Language == FormatStyle::LK_JavaScript) && 1785 FormatTok->is(Keywords.kw_finally)) || 1786 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1787 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1788 break; 1789 nextToken(); 1790 while (FormatTok->isNot(tok::l_brace)) { 1791 if (FormatTok->is(tok::l_paren)) { 1792 parseParens(); 1793 continue; 1794 } 1795 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1796 return; 1797 nextToken(); 1798 } 1799 NeedsUnwrappedLine = false; 1800 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1801 parseBlock(/*MustBeDeclaration=*/false); 1802 if (Style.BraceWrapping.BeforeCatch) 1803 addUnwrappedLine(); 1804 else 1805 NeedsUnwrappedLine = true; 1806 } 1807 if (NeedsUnwrappedLine) 1808 addUnwrappedLine(); 1809 } 1810 1811 void UnwrappedLineParser::parseNamespace() { 1812 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1813 1814 const FormatToken &InitialToken = *FormatTok; 1815 nextToken(); 1816 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1817 nextToken(); 1818 if (FormatTok->Tok.is(tok::l_brace)) { 1819 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1820 addUnwrappedLine(); 1821 1822 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1823 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1824 DeclarationScopeStack.size() > 1); 1825 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1826 // Munch the semicolon after a namespace. This is more common than one would 1827 // think. Puttin the semicolon into its own line is very ugly. 1828 if (FormatTok->Tok.is(tok::semi)) 1829 nextToken(); 1830 addUnwrappedLine(); 1831 } 1832 // FIXME: Add error handling. 1833 } 1834 1835 void UnwrappedLineParser::parseNew() { 1836 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1837 nextToken(); 1838 if (Style.Language != FormatStyle::LK_Java) 1839 return; 1840 1841 // In Java, we can parse everything up to the parens, which aren't optional. 1842 do { 1843 // There should not be a ;, { or } before the new's open paren. 1844 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1845 return; 1846 1847 // Consume the parens. 1848 if (FormatTok->is(tok::l_paren)) { 1849 parseParens(); 1850 1851 // If there is a class body of an anonymous class, consume that as child. 1852 if (FormatTok->is(tok::l_brace)) 1853 parseChildBlock(); 1854 return; 1855 } 1856 nextToken(); 1857 } while (!eof()); 1858 } 1859 1860 void UnwrappedLineParser::parseForOrWhileLoop() { 1861 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1862 "'for', 'while' or foreach macro expected"); 1863 nextToken(); 1864 // JS' for await ( ... 1865 if (Style.Language == FormatStyle::LK_JavaScript && 1866 FormatTok->is(Keywords.kw_await)) 1867 nextToken(); 1868 if (FormatTok->Tok.is(tok::l_paren)) 1869 parseParens(); 1870 if (FormatTok->Tok.is(tok::l_brace)) { 1871 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1872 parseBlock(/*MustBeDeclaration=*/false); 1873 addUnwrappedLine(); 1874 } else { 1875 addUnwrappedLine(); 1876 ++Line->Level; 1877 parseStructuralElement(); 1878 --Line->Level; 1879 } 1880 } 1881 1882 void UnwrappedLineParser::parseDoWhile() { 1883 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1884 nextToken(); 1885 if (FormatTok->Tok.is(tok::l_brace)) { 1886 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1887 parseBlock(/*MustBeDeclaration=*/false); 1888 if (Style.BraceWrapping.IndentBraces) 1889 addUnwrappedLine(); 1890 } else { 1891 addUnwrappedLine(); 1892 ++Line->Level; 1893 parseStructuralElement(); 1894 --Line->Level; 1895 } 1896 1897 // FIXME: Add error handling. 1898 if (!FormatTok->Tok.is(tok::kw_while)) { 1899 addUnwrappedLine(); 1900 return; 1901 } 1902 1903 nextToken(); 1904 parseStructuralElement(); 1905 } 1906 1907 void UnwrappedLineParser::parseLabel() { 1908 nextToken(); 1909 unsigned OldLineLevel = Line->Level; 1910 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1911 --Line->Level; 1912 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1913 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1914 parseBlock(/*MustBeDeclaration=*/false); 1915 if (FormatTok->Tok.is(tok::kw_break)) { 1916 if (Style.BraceWrapping.AfterControlStatement) 1917 addUnwrappedLine(); 1918 parseStructuralElement(); 1919 } 1920 addUnwrappedLine(); 1921 } else { 1922 if (FormatTok->is(tok::semi)) 1923 nextToken(); 1924 addUnwrappedLine(); 1925 } 1926 Line->Level = OldLineLevel; 1927 if (FormatTok->isNot(tok::l_brace)) { 1928 parseStructuralElement(); 1929 addUnwrappedLine(); 1930 } 1931 } 1932 1933 void UnwrappedLineParser::parseCaseLabel() { 1934 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1935 // FIXME: fix handling of complex expressions here. 1936 do { 1937 nextToken(); 1938 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1939 parseLabel(); 1940 } 1941 1942 void UnwrappedLineParser::parseSwitch() { 1943 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1944 nextToken(); 1945 if (FormatTok->Tok.is(tok::l_paren)) 1946 parseParens(); 1947 if (FormatTok->Tok.is(tok::l_brace)) { 1948 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1949 parseBlock(/*MustBeDeclaration=*/false); 1950 addUnwrappedLine(); 1951 } else { 1952 addUnwrappedLine(); 1953 ++Line->Level; 1954 parseStructuralElement(); 1955 --Line->Level; 1956 } 1957 } 1958 1959 void UnwrappedLineParser::parseAccessSpecifier() { 1960 nextToken(); 1961 // Understand Qt's slots. 1962 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1963 nextToken(); 1964 // Otherwise, we don't know what it is, and we'd better keep the next token. 1965 if (FormatTok->Tok.is(tok::colon)) 1966 nextToken(); 1967 addUnwrappedLine(); 1968 } 1969 1970 bool UnwrappedLineParser::parseEnum() { 1971 // Won't be 'enum' for NS_ENUMs. 1972 if (FormatTok->Tok.is(tok::kw_enum)) 1973 nextToken(); 1974 1975 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1976 // declarations. An "enum" keyword followed by a colon would be a syntax 1977 // error and thus assume it is just an identifier. 1978 if (Style.Language == FormatStyle::LK_JavaScript && 1979 FormatTok->isOneOf(tok::colon, tok::question)) 1980 return false; 1981 1982 // Eat up enum class ... 1983 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1984 nextToken(); 1985 1986 while (FormatTok->Tok.getIdentifierInfo() || 1987 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1988 tok::greater, tok::comma, tok::question)) { 1989 nextToken(); 1990 // We can have macros or attributes in between 'enum' and the enum name. 1991 if (FormatTok->is(tok::l_paren)) 1992 parseParens(); 1993 if (FormatTok->is(tok::identifier)) { 1994 nextToken(); 1995 // If there are two identifiers in a row, this is likely an elaborate 1996 // return type. In Java, this can be "implements", etc. 1997 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1998 return false; 1999 } 2000 } 2001 2002 // Just a declaration or something is wrong. 2003 if (FormatTok->isNot(tok::l_brace)) 2004 return true; 2005 FormatTok->BlockKind = BK_Block; 2006 2007 if (Style.Language == FormatStyle::LK_Java) { 2008 // Java enums are different. 2009 parseJavaEnumBody(); 2010 return true; 2011 } 2012 if (Style.Language == FormatStyle::LK_Proto) { 2013 parseBlock(/*MustBeDeclaration=*/true); 2014 return true; 2015 } 2016 2017 // Parse enum body. 2018 nextToken(); 2019 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2020 if (HasError) { 2021 if (FormatTok->is(tok::semi)) 2022 nextToken(); 2023 addUnwrappedLine(); 2024 } 2025 return true; 2026 2027 // There is no addUnwrappedLine() here so that we fall through to parsing a 2028 // structural element afterwards. Thus, in "enum A {} n, m;", 2029 // "} n, m;" will end up in one unwrapped line. 2030 } 2031 2032 void UnwrappedLineParser::parseJavaEnumBody() { 2033 // Determine whether the enum is simple, i.e. does not have a semicolon or 2034 // constants with class bodies. Simple enums can be formatted like braced 2035 // lists, contracted to a single line, etc. 2036 unsigned StoredPosition = Tokens->getPosition(); 2037 bool IsSimple = true; 2038 FormatToken *Tok = Tokens->getNextToken(); 2039 while (Tok) { 2040 if (Tok->is(tok::r_brace)) 2041 break; 2042 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2043 IsSimple = false; 2044 break; 2045 } 2046 // FIXME: This will also mark enums with braces in the arguments to enum 2047 // constants as "not simple". This is probably fine in practice, though. 2048 Tok = Tokens->getNextToken(); 2049 } 2050 FormatTok = Tokens->setPosition(StoredPosition); 2051 2052 if (IsSimple) { 2053 nextToken(); 2054 parseBracedList(); 2055 addUnwrappedLine(); 2056 return; 2057 } 2058 2059 // Parse the body of a more complex enum. 2060 // First add a line for everything up to the "{". 2061 nextToken(); 2062 addUnwrappedLine(); 2063 ++Line->Level; 2064 2065 // Parse the enum constants. 2066 while (FormatTok) { 2067 if (FormatTok->is(tok::l_brace)) { 2068 // Parse the constant's class body. 2069 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2070 /*MunchSemi=*/false); 2071 } else if (FormatTok->is(tok::l_paren)) { 2072 parseParens(); 2073 } else if (FormatTok->is(tok::comma)) { 2074 nextToken(); 2075 addUnwrappedLine(); 2076 } else if (FormatTok->is(tok::semi)) { 2077 nextToken(); 2078 addUnwrappedLine(); 2079 break; 2080 } else if (FormatTok->is(tok::r_brace)) { 2081 addUnwrappedLine(); 2082 break; 2083 } else { 2084 nextToken(); 2085 } 2086 } 2087 2088 // Parse the class body after the enum's ";" if any. 2089 parseLevel(/*HasOpeningBrace=*/true); 2090 nextToken(); 2091 --Line->Level; 2092 addUnwrappedLine(); 2093 } 2094 2095 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2096 const FormatToken &InitialToken = *FormatTok; 2097 nextToken(); 2098 2099 // The actual identifier can be a nested name specifier, and in macros 2100 // it is often token-pasted. 2101 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2102 tok::kw___attribute, tok::kw___declspec, 2103 tok::kw_alignas) || 2104 ((Style.Language == FormatStyle::LK_Java || 2105 Style.Language == FormatStyle::LK_JavaScript) && 2106 FormatTok->isOneOf(tok::period, tok::comma))) { 2107 if (Style.Language == FormatStyle::LK_JavaScript && 2108 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2109 // JavaScript/TypeScript supports inline object types in 2110 // extends/implements positions: 2111 // class Foo implements {bar: number} { } 2112 nextToken(); 2113 if (FormatTok->is(tok::l_brace)) { 2114 tryToParseBracedList(); 2115 continue; 2116 } 2117 } 2118 bool IsNonMacroIdentifier = 2119 FormatTok->is(tok::identifier) && 2120 FormatTok->TokenText != FormatTok->TokenText.upper(); 2121 nextToken(); 2122 // We can have macros or attributes in between 'class' and the class name. 2123 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2124 parseParens(); 2125 } 2126 2127 // Note that parsing away template declarations here leads to incorrectly 2128 // accepting function declarations as record declarations. 2129 // In general, we cannot solve this problem. Consider: 2130 // class A<int> B() {} 2131 // which can be a function definition or a class definition when B() is a 2132 // macro. If we find enough real-world cases where this is a problem, we 2133 // can parse for the 'template' keyword in the beginning of the statement, 2134 // and thus rule out the record production in case there is no template 2135 // (this would still leave us with an ambiguity between template function 2136 // and class declarations). 2137 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2138 while (!eof()) { 2139 if (FormatTok->is(tok::l_brace)) { 2140 calculateBraceTypes(/*ExpectClassBody=*/true); 2141 if (!tryToParseBracedList()) 2142 break; 2143 } 2144 if (FormatTok->Tok.is(tok::semi)) 2145 return; 2146 nextToken(); 2147 } 2148 } 2149 if (FormatTok->Tok.is(tok::l_brace)) { 2150 if (ParseAsExpr) { 2151 parseChildBlock(); 2152 } else { 2153 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2154 addUnwrappedLine(); 2155 2156 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2157 /*MunchSemi=*/false); 2158 } 2159 } 2160 // There is no addUnwrappedLine() here so that we fall through to parsing a 2161 // structural element afterwards. Thus, in "class A {} n, m;", 2162 // "} n, m;" will end up in one unwrapped line. 2163 } 2164 2165 void UnwrappedLineParser::parseObjCMethod() { 2166 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2167 "'(' or identifier expected."); 2168 do { 2169 if (FormatTok->Tok.is(tok::semi)) { 2170 nextToken(); 2171 addUnwrappedLine(); 2172 return; 2173 } else if (FormatTok->Tok.is(tok::l_brace)) { 2174 if (Style.BraceWrapping.AfterFunction) 2175 addUnwrappedLine(); 2176 parseBlock(/*MustBeDeclaration=*/false); 2177 addUnwrappedLine(); 2178 return; 2179 } else { 2180 nextToken(); 2181 } 2182 } while (!eof()); 2183 } 2184 2185 void UnwrappedLineParser::parseObjCProtocolList() { 2186 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2187 do { 2188 nextToken(); 2189 // Early exit in case someone forgot a close angle. 2190 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2191 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2192 return; 2193 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2194 nextToken(); // Skip '>'. 2195 } 2196 2197 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2198 do { 2199 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2200 nextToken(); 2201 addUnwrappedLine(); 2202 break; 2203 } 2204 if (FormatTok->is(tok::l_brace)) { 2205 parseBlock(/*MustBeDeclaration=*/false); 2206 // In ObjC interfaces, nothing should be following the "}". 2207 addUnwrappedLine(); 2208 } else if (FormatTok->is(tok::r_brace)) { 2209 // Ignore stray "}". parseStructuralElement doesn't consume them. 2210 nextToken(); 2211 addUnwrappedLine(); 2212 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2213 nextToken(); 2214 parseObjCMethod(); 2215 } else { 2216 parseStructuralElement(); 2217 } 2218 } while (!eof()); 2219 } 2220 2221 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2222 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2223 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2224 nextToken(); 2225 nextToken(); // interface name 2226 2227 // @interface can be followed by a lightweight generic 2228 // specialization list, then either a base class or a category. 2229 if (FormatTok->Tok.is(tok::less)) { 2230 // Unlike protocol lists, generic parameterizations support 2231 // nested angles: 2232 // 2233 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2234 // NSObject <NSCopying, NSSecureCoding> 2235 // 2236 // so we need to count how many open angles we have left. 2237 unsigned NumOpenAngles = 1; 2238 do { 2239 nextToken(); 2240 // Early exit in case someone forgot a close angle. 2241 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2242 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2243 break; 2244 if (FormatTok->Tok.is(tok::less)) 2245 ++NumOpenAngles; 2246 else if (FormatTok->Tok.is(tok::greater)) { 2247 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2248 --NumOpenAngles; 2249 } 2250 } while (!eof() && NumOpenAngles != 0); 2251 nextToken(); // Skip '>'. 2252 } 2253 if (FormatTok->Tok.is(tok::colon)) { 2254 nextToken(); 2255 nextToken(); // base class name 2256 } else if (FormatTok->Tok.is(tok::l_paren)) 2257 // Skip category, if present. 2258 parseParens(); 2259 2260 if (FormatTok->Tok.is(tok::less)) 2261 parseObjCProtocolList(); 2262 2263 if (FormatTok->Tok.is(tok::l_brace)) { 2264 if (Style.BraceWrapping.AfterObjCDeclaration) 2265 addUnwrappedLine(); 2266 parseBlock(/*MustBeDeclaration=*/true); 2267 } 2268 2269 // With instance variables, this puts '}' on its own line. Without instance 2270 // variables, this ends the @interface line. 2271 addUnwrappedLine(); 2272 2273 parseObjCUntilAtEnd(); 2274 } 2275 2276 // Returns true for the declaration/definition form of @protocol, 2277 // false for the expression form. 2278 bool UnwrappedLineParser::parseObjCProtocol() { 2279 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2280 nextToken(); 2281 2282 if (FormatTok->is(tok::l_paren)) 2283 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2284 return false; 2285 2286 // The definition/declaration form, 2287 // @protocol Foo 2288 // - (int)someMethod; 2289 // @end 2290 2291 nextToken(); // protocol name 2292 2293 if (FormatTok->Tok.is(tok::less)) 2294 parseObjCProtocolList(); 2295 2296 // Check for protocol declaration. 2297 if (FormatTok->Tok.is(tok::semi)) { 2298 nextToken(); 2299 addUnwrappedLine(); 2300 return true; 2301 } 2302 2303 addUnwrappedLine(); 2304 parseObjCUntilAtEnd(); 2305 return true; 2306 } 2307 2308 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2309 bool IsImport = FormatTok->is(Keywords.kw_import); 2310 assert(IsImport || FormatTok->is(tok::kw_export)); 2311 nextToken(); 2312 2313 // Consume the "default" in "export default class/function". 2314 if (FormatTok->is(tok::kw_default)) 2315 nextToken(); 2316 2317 // Consume "async function", "function" and "default function", so that these 2318 // get parsed as free-standing JS functions, i.e. do not require a trailing 2319 // semicolon. 2320 if (FormatTok->is(Keywords.kw_async)) 2321 nextToken(); 2322 if (FormatTok->is(Keywords.kw_function)) { 2323 nextToken(); 2324 return; 2325 } 2326 2327 // For imports, `export *`, `export {...}`, consume the rest of the line up 2328 // to the terminating `;`. For everything else, just return and continue 2329 // parsing the structural element, i.e. the declaration or expression for 2330 // `export default`. 2331 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2332 !FormatTok->isStringLiteral()) 2333 return; 2334 2335 while (!eof()) { 2336 if (FormatTok->is(tok::semi)) 2337 return; 2338 if (Line->Tokens.empty()) { 2339 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2340 // import statement should terminate. 2341 return; 2342 } 2343 if (FormatTok->is(tok::l_brace)) { 2344 FormatTok->BlockKind = BK_Block; 2345 nextToken(); 2346 parseBracedList(); 2347 } else { 2348 nextToken(); 2349 } 2350 } 2351 } 2352 2353 void UnwrappedLineParser::parseStatementMacro() 2354 { 2355 nextToken(); 2356 if (FormatTok->is(tok::l_paren)) 2357 parseParens(); 2358 if (FormatTok->is(tok::semi)) 2359 nextToken(); 2360 addUnwrappedLine(); 2361 } 2362 2363 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2364 StringRef Prefix = "") { 2365 llvm::dbgs() << Prefix << "Line(" << Line.Level 2366 << ", FSC=" << Line.FirstStartColumn << ")" 2367 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2368 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2369 E = Line.Tokens.end(); 2370 I != E; ++I) { 2371 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2372 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2373 << "] "; 2374 } 2375 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2376 E = Line.Tokens.end(); 2377 I != E; ++I) { 2378 const UnwrappedLineNode &Node = *I; 2379 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2380 I = Node.Children.begin(), 2381 E = Node.Children.end(); 2382 I != E; ++I) { 2383 printDebugInfo(*I, "\nChild: "); 2384 } 2385 } 2386 llvm::dbgs() << "\n"; 2387 } 2388 2389 void UnwrappedLineParser::addUnwrappedLine() { 2390 if (Line->Tokens.empty()) 2391 return; 2392 LLVM_DEBUG({ 2393 if (CurrentLines == &Lines) 2394 printDebugInfo(*Line); 2395 }); 2396 CurrentLines->push_back(std::move(*Line)); 2397 Line->Tokens.clear(); 2398 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2399 Line->FirstStartColumn = 0; 2400 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2401 CurrentLines->append( 2402 std::make_move_iterator(PreprocessorDirectives.begin()), 2403 std::make_move_iterator(PreprocessorDirectives.end())); 2404 PreprocessorDirectives.clear(); 2405 } 2406 // Disconnect the current token from the last token on the previous line. 2407 FormatTok->Previous = nullptr; 2408 } 2409 2410 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2411 2412 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2413 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2414 FormatTok.NewlinesBefore > 0; 2415 } 2416 2417 // Checks if \p FormatTok is a line comment that continues the line comment 2418 // section on \p Line. 2419 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2420 const UnwrappedLine &Line, 2421 llvm::Regex &CommentPragmasRegex) { 2422 if (Line.Tokens.empty()) 2423 return false; 2424 2425 StringRef IndentContent = FormatTok.TokenText; 2426 if (FormatTok.TokenText.startswith("//") || 2427 FormatTok.TokenText.startswith("/*")) 2428 IndentContent = FormatTok.TokenText.substr(2); 2429 if (CommentPragmasRegex.match(IndentContent)) 2430 return false; 2431 2432 // If Line starts with a line comment, then FormatTok continues the comment 2433 // section if its original column is greater or equal to the original start 2434 // column of the line. 2435 // 2436 // Define the min column token of a line as follows: if a line ends in '{' or 2437 // contains a '{' followed by a line comment, then the min column token is 2438 // that '{'. Otherwise, the min column token of the line is the first token of 2439 // the line. 2440 // 2441 // If Line starts with a token other than a line comment, then FormatTok 2442 // continues the comment section if its original column is greater than the 2443 // original start column of the min column token of the line. 2444 // 2445 // For example, the second line comment continues the first in these cases: 2446 // 2447 // // first line 2448 // // second line 2449 // 2450 // and: 2451 // 2452 // // first line 2453 // // second line 2454 // 2455 // and: 2456 // 2457 // int i; // first line 2458 // // second line 2459 // 2460 // and: 2461 // 2462 // do { // first line 2463 // // second line 2464 // int i; 2465 // } while (true); 2466 // 2467 // and: 2468 // 2469 // enum { 2470 // a, // first line 2471 // // second line 2472 // b 2473 // }; 2474 // 2475 // The second line comment doesn't continue the first in these cases: 2476 // 2477 // // first line 2478 // // second line 2479 // 2480 // and: 2481 // 2482 // int i; // first line 2483 // // second line 2484 // 2485 // and: 2486 // 2487 // do { // first line 2488 // // second line 2489 // int i; 2490 // } while (true); 2491 // 2492 // and: 2493 // 2494 // enum { 2495 // a, // first line 2496 // // second line 2497 // }; 2498 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2499 2500 // Scan for '{//'. If found, use the column of '{' as a min column for line 2501 // comment section continuation. 2502 const FormatToken *PreviousToken = nullptr; 2503 for (const UnwrappedLineNode &Node : Line.Tokens) { 2504 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2505 isLineComment(*Node.Tok)) { 2506 MinColumnToken = PreviousToken; 2507 break; 2508 } 2509 PreviousToken = Node.Tok; 2510 2511 // Grab the last newline preceding a token in this unwrapped line. 2512 if (Node.Tok->NewlinesBefore > 0) { 2513 MinColumnToken = Node.Tok; 2514 } 2515 } 2516 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2517 MinColumnToken = PreviousToken; 2518 } 2519 2520 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2521 MinColumnToken); 2522 } 2523 2524 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2525 bool JustComments = Line->Tokens.empty(); 2526 for (SmallVectorImpl<FormatToken *>::const_iterator 2527 I = CommentsBeforeNextToken.begin(), 2528 E = CommentsBeforeNextToken.end(); 2529 I != E; ++I) { 2530 // Line comments that belong to the same line comment section are put on the 2531 // same line since later we might want to reflow content between them. 2532 // Additional fine-grained breaking of line comment sections is controlled 2533 // by the class BreakableLineCommentSection in case it is desirable to keep 2534 // several line comment sections in the same unwrapped line. 2535 // 2536 // FIXME: Consider putting separate line comment sections as children to the 2537 // unwrapped line instead. 2538 (*I)->ContinuesLineCommentSection = 2539 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2540 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2541 addUnwrappedLine(); 2542 pushToken(*I); 2543 } 2544 if (NewlineBeforeNext && JustComments) 2545 addUnwrappedLine(); 2546 CommentsBeforeNextToken.clear(); 2547 } 2548 2549 void UnwrappedLineParser::nextToken(int LevelDifference) { 2550 if (eof()) 2551 return; 2552 flushComments(isOnNewLine(*FormatTok)); 2553 pushToken(FormatTok); 2554 FormatToken *Previous = FormatTok; 2555 if (Style.Language != FormatStyle::LK_JavaScript) 2556 readToken(LevelDifference); 2557 else 2558 readTokenWithJavaScriptASI(); 2559 FormatTok->Previous = Previous; 2560 } 2561 2562 void UnwrappedLineParser::distributeComments( 2563 const SmallVectorImpl<FormatToken *> &Comments, 2564 const FormatToken *NextTok) { 2565 // Whether or not a line comment token continues a line is controlled by 2566 // the method continuesLineCommentSection, with the following caveat: 2567 // 2568 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2569 // that each comment line from the trail is aligned with the next token, if 2570 // the next token exists. If a trail exists, the beginning of the maximal 2571 // trail is marked as a start of a new comment section. 2572 // 2573 // For example in this code: 2574 // 2575 // int a; // line about a 2576 // // line 1 about b 2577 // // line 2 about b 2578 // int b; 2579 // 2580 // the two lines about b form a maximal trail, so there are two sections, the 2581 // first one consisting of the single comment "// line about a" and the 2582 // second one consisting of the next two comments. 2583 if (Comments.empty()) 2584 return; 2585 bool ShouldPushCommentsInCurrentLine = true; 2586 bool HasTrailAlignedWithNextToken = false; 2587 unsigned StartOfTrailAlignedWithNextToken = 0; 2588 if (NextTok) { 2589 // We are skipping the first element intentionally. 2590 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2591 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2592 HasTrailAlignedWithNextToken = true; 2593 StartOfTrailAlignedWithNextToken = i; 2594 } 2595 } 2596 } 2597 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2598 FormatToken *FormatTok = Comments[i]; 2599 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2600 FormatTok->ContinuesLineCommentSection = false; 2601 } else { 2602 FormatTok->ContinuesLineCommentSection = 2603 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2604 } 2605 if (!FormatTok->ContinuesLineCommentSection && 2606 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2607 ShouldPushCommentsInCurrentLine = false; 2608 } 2609 if (ShouldPushCommentsInCurrentLine) { 2610 pushToken(FormatTok); 2611 } else { 2612 CommentsBeforeNextToken.push_back(FormatTok); 2613 } 2614 } 2615 } 2616 2617 void UnwrappedLineParser::readToken(int LevelDifference) { 2618 SmallVector<FormatToken *, 1> Comments; 2619 do { 2620 FormatTok = Tokens->getNextToken(); 2621 assert(FormatTok); 2622 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2623 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2624 distributeComments(Comments, FormatTok); 2625 Comments.clear(); 2626 // If there is an unfinished unwrapped line, we flush the preprocessor 2627 // directives only after that unwrapped line was finished later. 2628 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2629 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2630 assert((LevelDifference >= 0 || 2631 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2632 "LevelDifference makes Line->Level negative"); 2633 Line->Level += LevelDifference; 2634 // Comments stored before the preprocessor directive need to be output 2635 // before the preprocessor directive, at the same level as the 2636 // preprocessor directive, as we consider them to apply to the directive. 2637 flushComments(isOnNewLine(*FormatTok)); 2638 parsePPDirective(); 2639 } 2640 while (FormatTok->Type == TT_ConflictStart || 2641 FormatTok->Type == TT_ConflictEnd || 2642 FormatTok->Type == TT_ConflictAlternative) { 2643 if (FormatTok->Type == TT_ConflictStart) { 2644 conditionalCompilationStart(/*Unreachable=*/false); 2645 } else if (FormatTok->Type == TT_ConflictAlternative) { 2646 conditionalCompilationAlternative(); 2647 } else if (FormatTok->Type == TT_ConflictEnd) { 2648 conditionalCompilationEnd(); 2649 } 2650 FormatTok = Tokens->getNextToken(); 2651 FormatTok->MustBreakBefore = true; 2652 } 2653 2654 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2655 !Line->InPPDirective) { 2656 continue; 2657 } 2658 2659 if (!FormatTok->Tok.is(tok::comment)) { 2660 distributeComments(Comments, FormatTok); 2661 Comments.clear(); 2662 return; 2663 } 2664 2665 Comments.push_back(FormatTok); 2666 } while (!eof()); 2667 2668 distributeComments(Comments, nullptr); 2669 Comments.clear(); 2670 } 2671 2672 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2673 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2674 if (MustBreakBeforeNextToken) { 2675 Line->Tokens.back().Tok->MustBreakBefore = true; 2676 MustBreakBeforeNextToken = false; 2677 } 2678 } 2679 2680 } // end namespace format 2681 } // end namespace clang 2682