1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 176 if (Style.BraceWrapping.AfterControlStatement) 177 Parser->addUnwrappedLine(); 178 if (Style.BraceWrapping.IndentBraces) 179 ++LineLevel; 180 } 181 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 182 183 private: 184 unsigned &LineLevel; 185 unsigned OldLineLevel; 186 }; 187 188 namespace { 189 190 class IndexedTokenSource : public FormatTokenSource { 191 public: 192 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 193 : Tokens(Tokens), Position(-1) {} 194 195 FormatToken *getNextToken() override { 196 ++Position; 197 return Tokens[Position]; 198 } 199 200 unsigned getPosition() override { 201 assert(Position >= 0); 202 return Position; 203 } 204 205 FormatToken *setPosition(unsigned P) override { 206 Position = P; 207 return Tokens[Position]; 208 } 209 210 void reset() { Position = -1; } 211 212 private: 213 ArrayRef<FormatToken *> Tokens; 214 int Position; 215 }; 216 217 } // end anonymous namespace 218 219 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 220 const AdditionalKeywords &Keywords, 221 unsigned FirstStartColumn, 222 ArrayRef<FormatToken *> Tokens, 223 UnwrappedLineConsumer &Callback) 224 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 225 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 226 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 227 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 228 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 229 ? IG_Rejected 230 : IG_Inited), 231 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 232 233 void UnwrappedLineParser::reset() { 234 PPBranchLevel = -1; 235 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 236 ? IG_Rejected 237 : IG_Inited; 238 IncludeGuardToken = nullptr; 239 Line.reset(new UnwrappedLine); 240 CommentsBeforeNextToken.clear(); 241 FormatTok = nullptr; 242 MustBreakBeforeNextToken = false; 243 PreprocessorDirectives.clear(); 244 CurrentLines = &Lines; 245 DeclarationScopeStack.clear(); 246 PPStack.clear(); 247 Line->FirstStartColumn = FirstStartColumn; 248 } 249 250 void UnwrappedLineParser::parse() { 251 IndexedTokenSource TokenSource(AllTokens); 252 Line->FirstStartColumn = FirstStartColumn; 253 do { 254 LLVM_DEBUG(llvm::dbgs() << "----\n"); 255 reset(); 256 Tokens = &TokenSource; 257 TokenSource.reset(); 258 259 readToken(); 260 parseFile(); 261 262 // If we found an include guard then all preprocessor directives (other than 263 // the guard) are over-indented by one. 264 if (IncludeGuard == IG_Found) 265 for (auto &Line : Lines) 266 if (Line.InPPDirective && Line.Level > 0) 267 --Line.Level; 268 269 // Create line with eof token. 270 pushToken(FormatTok); 271 addUnwrappedLine(); 272 273 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 274 E = Lines.end(); 275 I != E; ++I) { 276 Callback.consumeUnwrappedLine(*I); 277 } 278 Callback.finishRun(); 279 Lines.clear(); 280 while (!PPLevelBranchIndex.empty() && 281 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 282 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 283 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 284 } 285 if (!PPLevelBranchIndex.empty()) { 286 ++PPLevelBranchIndex.back(); 287 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 288 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 289 } 290 } while (!PPLevelBranchIndex.empty()); 291 } 292 293 void UnwrappedLineParser::parseFile() { 294 // The top-level context in a file always has declarations, except for pre- 295 // processor directives and JavaScript files. 296 bool MustBeDeclaration = 297 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 298 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 299 MustBeDeclaration); 300 if (Style.Language == FormatStyle::LK_TextProto) 301 parseBracedList(); 302 else 303 parseLevel(/*HasOpeningBrace=*/false); 304 // Make sure to format the remaining tokens. 305 // 306 // LK_TextProto is special since its top-level is parsed as the body of a 307 // braced list, which does not necessarily have natural line separators such 308 // as a semicolon. Comments after the last entry that have been determined to 309 // not belong to that line, as in: 310 // key: value 311 // // endfile comment 312 // do not have a chance to be put on a line of their own until this point. 313 // Here we add this newline before end-of-file comments. 314 if (Style.Language == FormatStyle::LK_TextProto && 315 !CommentsBeforeNextToken.empty()) 316 addUnwrappedLine(); 317 flushComments(true); 318 addUnwrappedLine(); 319 } 320 321 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 322 bool SwitchLabelEncountered = false; 323 do { 324 tok::TokenKind kind = FormatTok->Tok.getKind(); 325 if (FormatTok->Type == TT_MacroBlockBegin) { 326 kind = tok::l_brace; 327 } else if (FormatTok->Type == TT_MacroBlockEnd) { 328 kind = tok::r_brace; 329 } 330 331 switch (kind) { 332 case tok::comment: 333 nextToken(); 334 addUnwrappedLine(); 335 break; 336 case tok::l_brace: 337 // FIXME: Add parameter whether this can happen - if this happens, we must 338 // be in a non-declaration context. 339 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 340 continue; 341 parseBlock(/*MustBeDeclaration=*/false); 342 addUnwrappedLine(); 343 break; 344 case tok::r_brace: 345 if (HasOpeningBrace) 346 return; 347 nextToken(); 348 addUnwrappedLine(); 349 break; 350 case tok::kw_default: { 351 unsigned StoredPosition = Tokens->getPosition(); 352 FormatToken *Next; 353 do { 354 Next = Tokens->getNextToken(); 355 } while (Next && Next->is(tok::comment)); 356 FormatTok = Tokens->setPosition(StoredPosition); 357 if (Next && Next->isNot(tok::colon)) { 358 // default not followed by ':' is not a case label; treat it like 359 // an identifier. 360 parseStructuralElement(); 361 break; 362 } 363 // Else, if it is 'default:', fall through to the case handling. 364 LLVM_FALLTHROUGH; 365 } 366 case tok::kw_case: 367 if (Style.Language == FormatStyle::LK_JavaScript && 368 Line->MustBeDeclaration) { 369 // A 'case: string' style field declaration. 370 parseStructuralElement(); 371 break; 372 } 373 if (!SwitchLabelEncountered && 374 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 375 ++Line->Level; 376 SwitchLabelEncountered = true; 377 parseStructuralElement(); 378 break; 379 default: 380 parseStructuralElement(); 381 break; 382 } 383 } while (!eof()); 384 } 385 386 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 387 // We'll parse forward through the tokens until we hit 388 // a closing brace or eof - note that getNextToken() will 389 // parse macros, so this will magically work inside macro 390 // definitions, too. 391 unsigned StoredPosition = Tokens->getPosition(); 392 FormatToken *Tok = FormatTok; 393 const FormatToken *PrevTok = Tok->Previous; 394 // Keep a stack of positions of lbrace tokens. We will 395 // update information about whether an lbrace starts a 396 // braced init list or a different block during the loop. 397 SmallVector<FormatToken *, 8> LBraceStack; 398 assert(Tok->Tok.is(tok::l_brace)); 399 do { 400 // Get next non-comment token. 401 FormatToken *NextTok; 402 unsigned ReadTokens = 0; 403 do { 404 NextTok = Tokens->getNextToken(); 405 ++ReadTokens; 406 } while (NextTok->is(tok::comment)); 407 408 switch (Tok->Tok.getKind()) { 409 case tok::l_brace: 410 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 411 if (PrevTok->isOneOf(tok::colon, tok::less)) 412 // A ':' indicates this code is in a type, or a braced list 413 // following a label in an object literal ({a: {b: 1}}). 414 // A '<' could be an object used in a comparison, but that is nonsense 415 // code (can never return true), so more likely it is a generic type 416 // argument (`X<{a: string; b: number}>`). 417 // The code below could be confused by semicolons between the 418 // individual members in a type member list, which would normally 419 // trigger BK_Block. In both cases, this must be parsed as an inline 420 // braced init. 421 Tok->BlockKind = BK_BracedInit; 422 else if (PrevTok->is(tok::r_paren)) 423 // `) { }` can only occur in function or method declarations in JS. 424 Tok->BlockKind = BK_Block; 425 } else { 426 Tok->BlockKind = BK_Unknown; 427 } 428 LBraceStack.push_back(Tok); 429 break; 430 case tok::r_brace: 431 if (LBraceStack.empty()) 432 break; 433 if (LBraceStack.back()->BlockKind == BK_Unknown) { 434 bool ProbablyBracedList = false; 435 if (Style.Language == FormatStyle::LK_Proto) { 436 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 437 } else { 438 // Using OriginalColumn to distinguish between ObjC methods and 439 // binary operators is a bit hacky. 440 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 441 NextTok->OriginalColumn == 0; 442 443 // If there is a comma, semicolon or right paren after the closing 444 // brace, we assume this is a braced initializer list. Note that 445 // regardless how we mark inner braces here, we will overwrite the 446 // BlockKind later if we parse a braced list (where all blocks 447 // inside are by default braced lists), or when we explicitly detect 448 // blocks (for example while parsing lambdas). 449 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 450 // braced list in JS. 451 ProbablyBracedList = 452 (Style.Language == FormatStyle::LK_JavaScript && 453 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 454 Keywords.kw_as)) || 455 (Style.isCpp() && NextTok->is(tok::l_paren)) || 456 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 457 tok::r_paren, tok::r_square, tok::l_brace, 458 tok::ellipsis) || 459 (NextTok->is(tok::identifier) && 460 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 461 (NextTok->is(tok::semi) && 462 (!ExpectClassBody || LBraceStack.size() != 1)) || 463 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 464 if (NextTok->is(tok::l_square)) { 465 // We can have an array subscript after a braced init 466 // list, but C++11 attributes are expected after blocks. 467 NextTok = Tokens->getNextToken(); 468 ++ReadTokens; 469 ProbablyBracedList = NextTok->isNot(tok::l_square); 470 } 471 } 472 if (ProbablyBracedList) { 473 Tok->BlockKind = BK_BracedInit; 474 LBraceStack.back()->BlockKind = BK_BracedInit; 475 } else { 476 Tok->BlockKind = BK_Block; 477 LBraceStack.back()->BlockKind = BK_Block; 478 } 479 } 480 LBraceStack.pop_back(); 481 break; 482 case tok::identifier: 483 if (!Tok->is(TT_StatementMacro)) 484 break; 485 LLVM_FALLTHROUGH; 486 case tok::at: 487 case tok::semi: 488 case tok::kw_if: 489 case tok::kw_while: 490 case tok::kw_for: 491 case tok::kw_switch: 492 case tok::kw_try: 493 case tok::kw___try: 494 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 495 LBraceStack.back()->BlockKind = BK_Block; 496 break; 497 default: 498 break; 499 } 500 PrevTok = Tok; 501 Tok = NextTok; 502 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 503 504 // Assume other blocks for all unclosed opening braces. 505 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 506 if (LBraceStack[i]->BlockKind == BK_Unknown) 507 LBraceStack[i]->BlockKind = BK_Block; 508 } 509 510 FormatTok = Tokens->setPosition(StoredPosition); 511 } 512 513 template <class T> 514 static inline void hash_combine(std::size_t &seed, const T &v) { 515 std::hash<T> hasher; 516 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 517 } 518 519 size_t UnwrappedLineParser::computePPHash() const { 520 size_t h = 0; 521 for (const auto &i : PPStack) { 522 hash_combine(h, size_t(i.Kind)); 523 hash_combine(h, i.Line); 524 } 525 return h; 526 } 527 528 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 529 bool MunchSemi) { 530 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 531 "'{' or macro block token expected"); 532 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 533 FormatTok->BlockKind = BK_Block; 534 535 size_t PPStartHash = computePPHash(); 536 537 unsigned InitialLevel = Line->Level; 538 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 539 540 if (MacroBlock && FormatTok->is(tok::l_paren)) 541 parseParens(); 542 543 size_t NbPreprocessorDirectives = 544 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 545 addUnwrappedLine(); 546 size_t OpeningLineIndex = 547 CurrentLines->empty() 548 ? (UnwrappedLine::kInvalidIndex) 549 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 550 551 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 552 MustBeDeclaration); 553 if (AddLevel) 554 ++Line->Level; 555 parseLevel(/*HasOpeningBrace=*/true); 556 557 if (eof()) 558 return; 559 560 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 561 : !FormatTok->is(tok::r_brace)) { 562 Line->Level = InitialLevel; 563 FormatTok->BlockKind = BK_Block; 564 return; 565 } 566 567 size_t PPEndHash = computePPHash(); 568 569 // Munch the closing brace. 570 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 571 572 if (MacroBlock && FormatTok->is(tok::l_paren)) 573 parseParens(); 574 575 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 576 nextToken(); 577 Line->Level = InitialLevel; 578 579 if (PPStartHash == PPEndHash) { 580 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 581 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 582 // Update the opening line to add the forward reference as well 583 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 584 CurrentLines->size() - 1; 585 } 586 } 587 } 588 589 static bool isGoogScope(const UnwrappedLine &Line) { 590 // FIXME: Closure-library specific stuff should not be hard-coded but be 591 // configurable. 592 if (Line.Tokens.size() < 4) 593 return false; 594 auto I = Line.Tokens.begin(); 595 if (I->Tok->TokenText != "goog") 596 return false; 597 ++I; 598 if (I->Tok->isNot(tok::period)) 599 return false; 600 ++I; 601 if (I->Tok->TokenText != "scope") 602 return false; 603 ++I; 604 return I->Tok->is(tok::l_paren); 605 } 606 607 static bool isIIFE(const UnwrappedLine &Line, 608 const AdditionalKeywords &Keywords) { 609 // Look for the start of an immediately invoked anonymous function. 610 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 611 // This is commonly done in JavaScript to create a new, anonymous scope. 612 // Example: (function() { ... })() 613 if (Line.Tokens.size() < 3) 614 return false; 615 auto I = Line.Tokens.begin(); 616 if (I->Tok->isNot(tok::l_paren)) 617 return false; 618 ++I; 619 if (I->Tok->isNot(Keywords.kw_function)) 620 return false; 621 ++I; 622 return I->Tok->is(tok::l_paren); 623 } 624 625 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 626 const FormatToken &InitialToken) { 627 if (InitialToken.is(tok::kw_namespace)) 628 return Style.BraceWrapping.AfterNamespace; 629 if (InitialToken.is(tok::kw_class)) 630 return Style.BraceWrapping.AfterClass; 631 if (InitialToken.is(tok::kw_union)) 632 return Style.BraceWrapping.AfterUnion; 633 if (InitialToken.is(tok::kw_struct)) 634 return Style.BraceWrapping.AfterStruct; 635 return false; 636 } 637 638 void UnwrappedLineParser::parseChildBlock() { 639 FormatTok->BlockKind = BK_Block; 640 nextToken(); 641 { 642 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 643 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 644 ScopedLineState LineState(*this); 645 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 646 /*MustBeDeclaration=*/false); 647 Line->Level += SkipIndent ? 0 : 1; 648 parseLevel(/*HasOpeningBrace=*/true); 649 flushComments(isOnNewLine(*FormatTok)); 650 Line->Level -= SkipIndent ? 0 : 1; 651 } 652 nextToken(); 653 } 654 655 void UnwrappedLineParser::parsePPDirective() { 656 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 657 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 658 nextToken(); 659 660 if (!FormatTok->Tok.getIdentifierInfo()) { 661 parsePPUnknown(); 662 return; 663 } 664 665 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 666 case tok::pp_define: 667 parsePPDefine(); 668 return; 669 case tok::pp_if: 670 parsePPIf(/*IfDef=*/false); 671 break; 672 case tok::pp_ifdef: 673 case tok::pp_ifndef: 674 parsePPIf(/*IfDef=*/true); 675 break; 676 case tok::pp_else: 677 parsePPElse(); 678 break; 679 case tok::pp_elif: 680 parsePPElIf(); 681 break; 682 case tok::pp_endif: 683 parsePPEndIf(); 684 break; 685 default: 686 parsePPUnknown(); 687 break; 688 } 689 } 690 691 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 692 size_t Line = CurrentLines->size(); 693 if (CurrentLines == &PreprocessorDirectives) 694 Line += Lines.size(); 695 696 if (Unreachable || 697 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 698 PPStack.push_back({PP_Unreachable, Line}); 699 else 700 PPStack.push_back({PP_Conditional, Line}); 701 } 702 703 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 704 ++PPBranchLevel; 705 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 706 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 707 PPLevelBranchIndex.push_back(0); 708 PPLevelBranchCount.push_back(0); 709 } 710 PPChainBranchIndex.push(0); 711 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 712 conditionalCompilationCondition(Unreachable || Skip); 713 } 714 715 void UnwrappedLineParser::conditionalCompilationAlternative() { 716 if (!PPStack.empty()) 717 PPStack.pop_back(); 718 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 719 if (!PPChainBranchIndex.empty()) 720 ++PPChainBranchIndex.top(); 721 conditionalCompilationCondition( 722 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 723 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 724 } 725 726 void UnwrappedLineParser::conditionalCompilationEnd() { 727 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 728 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 729 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 730 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 731 } 732 } 733 // Guard against #endif's without #if. 734 if (PPBranchLevel > -1) 735 --PPBranchLevel; 736 if (!PPChainBranchIndex.empty()) 737 PPChainBranchIndex.pop(); 738 if (!PPStack.empty()) 739 PPStack.pop_back(); 740 } 741 742 void UnwrappedLineParser::parsePPIf(bool IfDef) { 743 bool IfNDef = FormatTok->is(tok::pp_ifndef); 744 nextToken(); 745 bool Unreachable = false; 746 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 747 Unreachable = true; 748 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 749 Unreachable = true; 750 conditionalCompilationStart(Unreachable); 751 FormatToken *IfCondition = FormatTok; 752 // If there's a #ifndef on the first line, and the only lines before it are 753 // comments, it could be an include guard. 754 bool MaybeIncludeGuard = IfNDef; 755 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 756 for (auto &Line : Lines) { 757 if (!Line.Tokens.front().Tok->is(tok::comment)) { 758 MaybeIncludeGuard = false; 759 IncludeGuard = IG_Rejected; 760 break; 761 } 762 } 763 --PPBranchLevel; 764 parsePPUnknown(); 765 ++PPBranchLevel; 766 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 767 IncludeGuard = IG_IfNdefed; 768 IncludeGuardToken = IfCondition; 769 } 770 } 771 772 void UnwrappedLineParser::parsePPElse() { 773 // If a potential include guard has an #else, it's not an include guard. 774 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 775 IncludeGuard = IG_Rejected; 776 conditionalCompilationAlternative(); 777 if (PPBranchLevel > -1) 778 --PPBranchLevel; 779 parsePPUnknown(); 780 ++PPBranchLevel; 781 } 782 783 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 784 785 void UnwrappedLineParser::parsePPEndIf() { 786 conditionalCompilationEnd(); 787 parsePPUnknown(); 788 // If the #endif of a potential include guard is the last thing in the file, 789 // then we found an include guard. 790 unsigned TokenPosition = Tokens->getPosition(); 791 FormatToken *PeekNext = AllTokens[TokenPosition]; 792 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 793 PeekNext->is(tok::eof) && 794 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 795 IncludeGuard = IG_Found; 796 } 797 798 void UnwrappedLineParser::parsePPDefine() { 799 nextToken(); 800 801 if (FormatTok->Tok.getKind() != tok::identifier) { 802 IncludeGuard = IG_Rejected; 803 IncludeGuardToken = nullptr; 804 parsePPUnknown(); 805 return; 806 } 807 808 if (IncludeGuard == IG_IfNdefed && 809 IncludeGuardToken->TokenText == FormatTok->TokenText) { 810 IncludeGuard = IG_Defined; 811 IncludeGuardToken = nullptr; 812 for (auto &Line : Lines) { 813 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 814 IncludeGuard = IG_Rejected; 815 break; 816 } 817 } 818 } 819 820 nextToken(); 821 if (FormatTok->Tok.getKind() == tok::l_paren && 822 FormatTok->WhitespaceRange.getBegin() == 823 FormatTok->WhitespaceRange.getEnd()) { 824 parseParens(); 825 } 826 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 827 Line->Level += PPBranchLevel + 1; 828 addUnwrappedLine(); 829 ++Line->Level; 830 831 // Errors during a preprocessor directive can only affect the layout of the 832 // preprocessor directive, and thus we ignore them. An alternative approach 833 // would be to use the same approach we use on the file level (no 834 // re-indentation if there was a structural error) within the macro 835 // definition. 836 parseFile(); 837 } 838 839 void UnwrappedLineParser::parsePPUnknown() { 840 do { 841 nextToken(); 842 } while (!eof()); 843 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 844 Line->Level += PPBranchLevel + 1; 845 addUnwrappedLine(); 846 } 847 848 // Here we blacklist certain tokens that are not usually the first token in an 849 // unwrapped line. This is used in attempt to distinguish macro calls without 850 // trailing semicolons from other constructs split to several lines. 851 static bool tokenCanStartNewLine(const clang::Token &Tok) { 852 // Semicolon can be a null-statement, l_square can be a start of a macro or 853 // a C++11 attribute, but this doesn't seem to be common. 854 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 855 Tok.isNot(tok::l_square) && 856 // Tokens that can only be used as binary operators and a part of 857 // overloaded operator names. 858 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 859 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 860 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 861 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 862 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 863 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 864 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 865 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 866 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 867 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 868 Tok.isNot(tok::lesslessequal) && 869 // Colon is used in labels, base class lists, initializer lists, 870 // range-based for loops, ternary operator, but should never be the 871 // first token in an unwrapped line. 872 Tok.isNot(tok::colon) && 873 // 'noexcept' is a trailing annotation. 874 Tok.isNot(tok::kw_noexcept); 875 } 876 877 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 878 const FormatToken *FormatTok) { 879 // FIXME: This returns true for C/C++ keywords like 'struct'. 880 return FormatTok->is(tok::identifier) && 881 (FormatTok->Tok.getIdentifierInfo() == nullptr || 882 !FormatTok->isOneOf( 883 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 884 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 885 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 886 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 887 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 888 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 889 Keywords.kw_from)); 890 } 891 892 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 893 const FormatToken *FormatTok) { 894 return FormatTok->Tok.isLiteral() || 895 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 896 mustBeJSIdent(Keywords, FormatTok); 897 } 898 899 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 900 // when encountered after a value (see mustBeJSIdentOrValue). 901 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 902 const FormatToken *FormatTok) { 903 return FormatTok->isOneOf( 904 tok::kw_return, Keywords.kw_yield, 905 // conditionals 906 tok::kw_if, tok::kw_else, 907 // loops 908 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 909 // switch/case 910 tok::kw_switch, tok::kw_case, 911 // exceptions 912 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 913 // declaration 914 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 915 Keywords.kw_async, Keywords.kw_function, 916 // import/export 917 Keywords.kw_import, tok::kw_export); 918 } 919 920 // readTokenWithJavaScriptASI reads the next token and terminates the current 921 // line if JavaScript Automatic Semicolon Insertion must 922 // happen between the current token and the next token. 923 // 924 // This method is conservative - it cannot cover all edge cases of JavaScript, 925 // but only aims to correctly handle certain well known cases. It *must not* 926 // return true in speculative cases. 927 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 928 FormatToken *Previous = FormatTok; 929 readToken(); 930 FormatToken *Next = FormatTok; 931 932 bool IsOnSameLine = 933 CommentsBeforeNextToken.empty() 934 ? Next->NewlinesBefore == 0 935 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 936 if (IsOnSameLine) 937 return; 938 939 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 940 bool PreviousStartsTemplateExpr = 941 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 942 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 943 // If the line contains an '@' sign, the previous token might be an 944 // annotation, which can precede another identifier/value. 945 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 946 [](UnwrappedLineNode &LineNode) { 947 return LineNode.Tok->is(tok::at); 948 }) != Line->Tokens.end(); 949 if (HasAt) 950 return; 951 } 952 if (Next->is(tok::exclaim) && PreviousMustBeValue) 953 return addUnwrappedLine(); 954 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 955 bool NextEndsTemplateExpr = 956 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 957 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 958 (PreviousMustBeValue || 959 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 960 tok::minusminus))) 961 return addUnwrappedLine(); 962 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 963 isJSDeclOrStmt(Keywords, Next)) 964 return addUnwrappedLine(); 965 } 966 967 void UnwrappedLineParser::parseStructuralElement() { 968 assert(!FormatTok->is(tok::l_brace)); 969 if (Style.Language == FormatStyle::LK_TableGen && 970 FormatTok->is(tok::pp_include)) { 971 nextToken(); 972 if (FormatTok->is(tok::string_literal)) 973 nextToken(); 974 addUnwrappedLine(); 975 return; 976 } 977 switch (FormatTok->Tok.getKind()) { 978 case tok::kw_asm: 979 nextToken(); 980 if (FormatTok->is(tok::l_brace)) { 981 FormatTok->Type = TT_InlineASMBrace; 982 nextToken(); 983 while (FormatTok && FormatTok->isNot(tok::eof)) { 984 if (FormatTok->is(tok::r_brace)) { 985 FormatTok->Type = TT_InlineASMBrace; 986 nextToken(); 987 addUnwrappedLine(); 988 break; 989 } 990 FormatTok->Finalized = true; 991 nextToken(); 992 } 993 } 994 break; 995 case tok::kw_namespace: 996 parseNamespace(); 997 return; 998 case tok::kw_public: 999 case tok::kw_protected: 1000 case tok::kw_private: 1001 if (Style.Language == FormatStyle::LK_Java || 1002 Style.Language == FormatStyle::LK_JavaScript) 1003 nextToken(); 1004 else 1005 parseAccessSpecifier(); 1006 return; 1007 case tok::kw_if: 1008 parseIfThenElse(); 1009 return; 1010 case tok::kw_for: 1011 case tok::kw_while: 1012 parseForOrWhileLoop(); 1013 return; 1014 case tok::kw_do: 1015 parseDoWhile(); 1016 return; 1017 case tok::kw_switch: 1018 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1019 // 'switch: string' field declaration. 1020 break; 1021 parseSwitch(); 1022 return; 1023 case tok::kw_default: 1024 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1025 // 'default: string' field declaration. 1026 break; 1027 nextToken(); 1028 if (FormatTok->is(tok::colon)) { 1029 parseLabel(); 1030 return; 1031 } 1032 // e.g. "default void f() {}" in a Java interface. 1033 break; 1034 case tok::kw_case: 1035 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1036 // 'case: string' field declaration. 1037 break; 1038 parseCaseLabel(); 1039 return; 1040 case tok::kw_try: 1041 case tok::kw___try: 1042 parseTryCatch(); 1043 return; 1044 case tok::kw_extern: 1045 nextToken(); 1046 if (FormatTok->Tok.is(tok::string_literal)) { 1047 nextToken(); 1048 if (FormatTok->Tok.is(tok::l_brace)) { 1049 if (Style.BraceWrapping.AfterExternBlock) { 1050 addUnwrappedLine(); 1051 parseBlock(/*MustBeDeclaration=*/true); 1052 } else { 1053 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1054 } 1055 addUnwrappedLine(); 1056 return; 1057 } 1058 } 1059 break; 1060 case tok::kw_export: 1061 if (Style.Language == FormatStyle::LK_JavaScript) { 1062 parseJavaScriptEs6ImportExport(); 1063 return; 1064 } 1065 if (!Style.isCpp()) 1066 break; 1067 // Handle C++ "(inline|export) namespace". 1068 LLVM_FALLTHROUGH; 1069 case tok::kw_inline: 1070 nextToken(); 1071 if (FormatTok->Tok.is(tok::kw_namespace)) { 1072 parseNamespace(); 1073 return; 1074 } 1075 break; 1076 case tok::identifier: 1077 if (FormatTok->is(TT_ForEachMacro)) { 1078 parseForOrWhileLoop(); 1079 return; 1080 } 1081 if (FormatTok->is(TT_MacroBlockBegin)) { 1082 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1083 /*MunchSemi=*/false); 1084 return; 1085 } 1086 if (FormatTok->is(Keywords.kw_import)) { 1087 if (Style.Language == FormatStyle::LK_JavaScript) { 1088 parseJavaScriptEs6ImportExport(); 1089 return; 1090 } 1091 if (Style.Language == FormatStyle::LK_Proto) { 1092 nextToken(); 1093 if (FormatTok->is(tok::kw_public)) 1094 nextToken(); 1095 if (!FormatTok->is(tok::string_literal)) 1096 return; 1097 nextToken(); 1098 if (FormatTok->is(tok::semi)) 1099 nextToken(); 1100 addUnwrappedLine(); 1101 return; 1102 } 1103 } 1104 if (Style.isCpp() && 1105 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1106 Keywords.kw_slots, Keywords.kw_qslots)) { 1107 nextToken(); 1108 if (FormatTok->is(tok::colon)) { 1109 nextToken(); 1110 addUnwrappedLine(); 1111 return; 1112 } 1113 } 1114 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1115 parseStatementMacro(); 1116 return; 1117 } 1118 // In all other cases, parse the declaration. 1119 break; 1120 default: 1121 break; 1122 } 1123 do { 1124 const FormatToken *Previous = FormatTok->Previous; 1125 switch (FormatTok->Tok.getKind()) { 1126 case tok::at: 1127 nextToken(); 1128 if (FormatTok->Tok.is(tok::l_brace)) { 1129 nextToken(); 1130 parseBracedList(); 1131 break; 1132 } else if (Style.Language == FormatStyle::LK_Java && 1133 FormatTok->is(Keywords.kw_interface)) { 1134 nextToken(); 1135 break; 1136 } 1137 switch (FormatTok->Tok.getObjCKeywordID()) { 1138 case tok::objc_public: 1139 case tok::objc_protected: 1140 case tok::objc_package: 1141 case tok::objc_private: 1142 return parseAccessSpecifier(); 1143 case tok::objc_interface: 1144 case tok::objc_implementation: 1145 return parseObjCInterfaceOrImplementation(); 1146 case tok::objc_protocol: 1147 if (parseObjCProtocol()) 1148 return; 1149 break; 1150 case tok::objc_end: 1151 return; // Handled by the caller. 1152 case tok::objc_optional: 1153 case tok::objc_required: 1154 nextToken(); 1155 addUnwrappedLine(); 1156 return; 1157 case tok::objc_autoreleasepool: 1158 nextToken(); 1159 if (FormatTok->Tok.is(tok::l_brace)) { 1160 if (Style.BraceWrapping.AfterControlStatement) 1161 addUnwrappedLine(); 1162 parseBlock(/*MustBeDeclaration=*/false); 1163 } 1164 addUnwrappedLine(); 1165 return; 1166 case tok::objc_synchronized: 1167 nextToken(); 1168 if (FormatTok->Tok.is(tok::l_paren)) 1169 // Skip synchronization object 1170 parseParens(); 1171 if (FormatTok->Tok.is(tok::l_brace)) { 1172 if (Style.BraceWrapping.AfterControlStatement) 1173 addUnwrappedLine(); 1174 parseBlock(/*MustBeDeclaration=*/false); 1175 } 1176 addUnwrappedLine(); 1177 return; 1178 case tok::objc_try: 1179 // This branch isn't strictly necessary (the kw_try case below would 1180 // do this too after the tok::at is parsed above). But be explicit. 1181 parseTryCatch(); 1182 return; 1183 default: 1184 break; 1185 } 1186 break; 1187 case tok::kw_enum: 1188 // Ignore if this is part of "template <enum ...". 1189 if (Previous && Previous->is(tok::less)) { 1190 nextToken(); 1191 break; 1192 } 1193 1194 // parseEnum falls through and does not yet add an unwrapped line as an 1195 // enum definition can start a structural element. 1196 if (!parseEnum()) 1197 break; 1198 // This only applies for C++. 1199 if (!Style.isCpp()) { 1200 addUnwrappedLine(); 1201 return; 1202 } 1203 break; 1204 case tok::kw_typedef: 1205 nextToken(); 1206 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1207 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1208 parseEnum(); 1209 break; 1210 case tok::kw_struct: 1211 case tok::kw_union: 1212 case tok::kw_class: 1213 // parseRecord falls through and does not yet add an unwrapped line as a 1214 // record declaration or definition can start a structural element. 1215 parseRecord(); 1216 // This does not apply for Java and JavaScript. 1217 if (Style.Language == FormatStyle::LK_Java || 1218 Style.Language == FormatStyle::LK_JavaScript) { 1219 if (FormatTok->is(tok::semi)) 1220 nextToken(); 1221 addUnwrappedLine(); 1222 return; 1223 } 1224 break; 1225 case tok::period: 1226 nextToken(); 1227 // In Java, classes have an implicit static member "class". 1228 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1229 FormatTok->is(tok::kw_class)) 1230 nextToken(); 1231 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1232 FormatTok->Tok.getIdentifierInfo()) 1233 // JavaScript only has pseudo keywords, all keywords are allowed to 1234 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1235 nextToken(); 1236 break; 1237 case tok::semi: 1238 nextToken(); 1239 addUnwrappedLine(); 1240 return; 1241 case tok::r_brace: 1242 addUnwrappedLine(); 1243 return; 1244 case tok::l_paren: 1245 parseParens(); 1246 break; 1247 case tok::kw_operator: 1248 nextToken(); 1249 if (FormatTok->isBinaryOperator()) 1250 nextToken(); 1251 break; 1252 case tok::caret: 1253 nextToken(); 1254 if (FormatTok->Tok.isAnyIdentifier() || 1255 FormatTok->isSimpleTypeSpecifier()) 1256 nextToken(); 1257 if (FormatTok->is(tok::l_paren)) 1258 parseParens(); 1259 if (FormatTok->is(tok::l_brace)) 1260 parseChildBlock(); 1261 break; 1262 case tok::l_brace: 1263 if (!tryToParseBracedList()) { 1264 // A block outside of parentheses must be the last part of a 1265 // structural element. 1266 // FIXME: Figure out cases where this is not true, and add projections 1267 // for them (the one we know is missing are lambdas). 1268 if (Style.BraceWrapping.AfterFunction) 1269 addUnwrappedLine(); 1270 FormatTok->Type = TT_FunctionLBrace; 1271 parseBlock(/*MustBeDeclaration=*/false); 1272 addUnwrappedLine(); 1273 return; 1274 } 1275 // Otherwise this was a braced init list, and the structural 1276 // element continues. 1277 break; 1278 case tok::kw_try: 1279 // We arrive here when parsing function-try blocks. 1280 if (Style.BraceWrapping.AfterFunction) 1281 addUnwrappedLine(); 1282 parseTryCatch(); 1283 return; 1284 case tok::identifier: { 1285 if (FormatTok->is(TT_MacroBlockEnd)) { 1286 addUnwrappedLine(); 1287 return; 1288 } 1289 1290 // Function declarations (as opposed to function expressions) are parsed 1291 // on their own unwrapped line by continuing this loop. Function 1292 // expressions (functions that are not on their own line) must not create 1293 // a new unwrapped line, so they are special cased below. 1294 size_t TokenCount = Line->Tokens.size(); 1295 if (Style.Language == FormatStyle::LK_JavaScript && 1296 FormatTok->is(Keywords.kw_function) && 1297 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1298 Keywords.kw_async)))) { 1299 tryToParseJSFunction(); 1300 break; 1301 } 1302 if ((Style.Language == FormatStyle::LK_JavaScript || 1303 Style.Language == FormatStyle::LK_Java) && 1304 FormatTok->is(Keywords.kw_interface)) { 1305 if (Style.Language == FormatStyle::LK_JavaScript) { 1306 // In JavaScript/TypeScript, "interface" can be used as a standalone 1307 // identifier, e.g. in `var interface = 1;`. If "interface" is 1308 // followed by another identifier, it is very like to be an actual 1309 // interface declaration. 1310 unsigned StoredPosition = Tokens->getPosition(); 1311 FormatToken *Next = Tokens->getNextToken(); 1312 FormatTok = Tokens->setPosition(StoredPosition); 1313 if (Next && !mustBeJSIdent(Keywords, Next)) { 1314 nextToken(); 1315 break; 1316 } 1317 } 1318 parseRecord(); 1319 addUnwrappedLine(); 1320 return; 1321 } 1322 1323 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1324 parseStatementMacro(); 1325 return; 1326 } 1327 1328 // See if the following token should start a new unwrapped line. 1329 StringRef Text = FormatTok->TokenText; 1330 nextToken(); 1331 if (Line->Tokens.size() == 1 && 1332 // JS doesn't have macros, and within classes colons indicate fields, 1333 // not labels. 1334 Style.Language != FormatStyle::LK_JavaScript) { 1335 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1336 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1337 parseLabel(); 1338 return; 1339 } 1340 // Recognize function-like macro usages without trailing semicolon as 1341 // well as free-standing macros like Q_OBJECT. 1342 bool FunctionLike = FormatTok->is(tok::l_paren); 1343 if (FunctionLike) 1344 parseParens(); 1345 1346 bool FollowedByNewline = 1347 CommentsBeforeNextToken.empty() 1348 ? FormatTok->NewlinesBefore > 0 1349 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1350 1351 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1352 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1353 addUnwrappedLine(); 1354 return; 1355 } 1356 } 1357 break; 1358 } 1359 case tok::equal: 1360 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1361 // TT_JsFatArrow. The always start an expression or a child block if 1362 // followed by a curly. 1363 if (FormatTok->is(TT_JsFatArrow)) { 1364 nextToken(); 1365 if (FormatTok->is(tok::l_brace)) 1366 parseChildBlock(); 1367 break; 1368 } 1369 1370 nextToken(); 1371 if (FormatTok->Tok.is(tok::l_brace)) { 1372 nextToken(); 1373 parseBracedList(); 1374 } else if (Style.Language == FormatStyle::LK_Proto && 1375 FormatTok->Tok.is(tok::less)) { 1376 nextToken(); 1377 parseBracedList(/*ContinueOnSemicolons=*/false, 1378 /*ClosingBraceKind=*/tok::greater); 1379 } 1380 break; 1381 case tok::l_square: 1382 parseSquare(); 1383 break; 1384 case tok::kw_new: 1385 parseNew(); 1386 break; 1387 default: 1388 nextToken(); 1389 break; 1390 } 1391 } while (!eof()); 1392 } 1393 1394 bool UnwrappedLineParser::tryToParseLambda() { 1395 if (!Style.isCpp()) { 1396 nextToken(); 1397 return false; 1398 } 1399 assert(FormatTok->is(tok::l_square)); 1400 FormatToken &LSquare = *FormatTok; 1401 if (!tryToParseLambdaIntroducer()) 1402 return false; 1403 1404 while (FormatTok->isNot(tok::l_brace)) { 1405 if (FormatTok->isSimpleTypeSpecifier()) { 1406 nextToken(); 1407 continue; 1408 } 1409 switch (FormatTok->Tok.getKind()) { 1410 case tok::l_brace: 1411 break; 1412 case tok::l_paren: 1413 parseParens(); 1414 break; 1415 case tok::amp: 1416 case tok::star: 1417 case tok::kw_const: 1418 case tok::comma: 1419 case tok::less: 1420 case tok::greater: 1421 case tok::identifier: 1422 case tok::numeric_constant: 1423 case tok::coloncolon: 1424 case tok::kw_mutable: 1425 case tok::kw_noexcept: 1426 // Specialization of a template with an integer parameter can contain 1427 // arithmetic, logical, comparison and ternary operators. 1428 case tok::plus: 1429 case tok::minus: 1430 case tok::exclaim: 1431 case tok::tilde: 1432 case tok::slash: 1433 case tok::percent: 1434 case tok::lessless: 1435 case tok::pipe: 1436 case tok::pipepipe: 1437 case tok::ampamp: 1438 case tok::caret: 1439 case tok::equalequal: 1440 case tok::exclaimequal: 1441 case tok::greaterequal: 1442 case tok::lessequal: 1443 case tok::question: 1444 case tok::colon: 1445 case tok::kw_true: 1446 case tok::kw_false: 1447 nextToken(); 1448 break; 1449 case tok::arrow: 1450 // This might or might not actually be a lambda arrow (this could be an 1451 // ObjC method invocation followed by a dereferencing arrow). We might 1452 // reset this back to TT_Unknown in TokenAnnotator. 1453 FormatTok->Type = TT_LambdaArrow; 1454 nextToken(); 1455 break; 1456 default: 1457 return true; 1458 } 1459 } 1460 LSquare.Type = TT_LambdaLSquare; 1461 parseChildBlock(); 1462 return true; 1463 } 1464 1465 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1466 const FormatToken *Previous = FormatTok->Previous; 1467 if (Previous && 1468 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1469 tok::kw_delete, tok::l_square) || 1470 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1471 Previous->isSimpleTypeSpecifier())) { 1472 nextToken(); 1473 return false; 1474 } 1475 nextToken(); 1476 if (FormatTok->is(tok::l_square)) { 1477 return false; 1478 } 1479 parseSquare(/*LambdaIntroducer=*/true); 1480 return true; 1481 } 1482 1483 void UnwrappedLineParser::tryToParseJSFunction() { 1484 assert(FormatTok->is(Keywords.kw_function) || 1485 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1486 if (FormatTok->is(Keywords.kw_async)) 1487 nextToken(); 1488 // Consume "function". 1489 nextToken(); 1490 1491 // Consume * (generator function). Treat it like C++'s overloaded operators. 1492 if (FormatTok->is(tok::star)) { 1493 FormatTok->Type = TT_OverloadedOperator; 1494 nextToken(); 1495 } 1496 1497 // Consume function name. 1498 if (FormatTok->is(tok::identifier)) 1499 nextToken(); 1500 1501 if (FormatTok->isNot(tok::l_paren)) 1502 return; 1503 1504 // Parse formal parameter list. 1505 parseParens(); 1506 1507 if (FormatTok->is(tok::colon)) { 1508 // Parse a type definition. 1509 nextToken(); 1510 1511 // Eat the type declaration. For braced inline object types, balance braces, 1512 // otherwise just parse until finding an l_brace for the function body. 1513 if (FormatTok->is(tok::l_brace)) 1514 tryToParseBracedList(); 1515 else 1516 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1517 nextToken(); 1518 } 1519 1520 if (FormatTok->is(tok::semi)) 1521 return; 1522 1523 parseChildBlock(); 1524 } 1525 1526 bool UnwrappedLineParser::tryToParseBracedList() { 1527 if (FormatTok->BlockKind == BK_Unknown) 1528 calculateBraceTypes(); 1529 assert(FormatTok->BlockKind != BK_Unknown); 1530 if (FormatTok->BlockKind == BK_Block) 1531 return false; 1532 nextToken(); 1533 parseBracedList(); 1534 return true; 1535 } 1536 1537 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1538 tok::TokenKind ClosingBraceKind) { 1539 bool HasError = false; 1540 1541 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1542 // replace this by using parseAssigmentExpression() inside. 1543 do { 1544 if (Style.Language == FormatStyle::LK_JavaScript) { 1545 if (FormatTok->is(Keywords.kw_function) || 1546 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1547 tryToParseJSFunction(); 1548 continue; 1549 } 1550 if (FormatTok->is(TT_JsFatArrow)) { 1551 nextToken(); 1552 // Fat arrows can be followed by simple expressions or by child blocks 1553 // in curly braces. 1554 if (FormatTok->is(tok::l_brace)) { 1555 parseChildBlock(); 1556 continue; 1557 } 1558 } 1559 if (FormatTok->is(tok::l_brace)) { 1560 // Could be a method inside of a braced list `{a() { return 1; }}`. 1561 if (tryToParseBracedList()) 1562 continue; 1563 parseChildBlock(); 1564 } 1565 } 1566 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1567 nextToken(); 1568 return !HasError; 1569 } 1570 switch (FormatTok->Tok.getKind()) { 1571 case tok::caret: 1572 nextToken(); 1573 if (FormatTok->is(tok::l_brace)) { 1574 parseChildBlock(); 1575 } 1576 break; 1577 case tok::l_square: 1578 tryToParseLambda(); 1579 break; 1580 case tok::l_paren: 1581 parseParens(); 1582 // JavaScript can just have free standing methods and getters/setters in 1583 // object literals. Detect them by a "{" following ")". 1584 if (Style.Language == FormatStyle::LK_JavaScript) { 1585 if (FormatTok->is(tok::l_brace)) 1586 parseChildBlock(); 1587 break; 1588 } 1589 break; 1590 case tok::l_brace: 1591 // Assume there are no blocks inside a braced init list apart 1592 // from the ones we explicitly parse out (like lambdas). 1593 FormatTok->BlockKind = BK_BracedInit; 1594 nextToken(); 1595 parseBracedList(); 1596 break; 1597 case tok::less: 1598 if (Style.Language == FormatStyle::LK_Proto) { 1599 nextToken(); 1600 parseBracedList(/*ContinueOnSemicolons=*/false, 1601 /*ClosingBraceKind=*/tok::greater); 1602 } else { 1603 nextToken(); 1604 } 1605 break; 1606 case tok::semi: 1607 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1608 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1609 // used for error recovery if we have otherwise determined that this is 1610 // a braced list. 1611 if (Style.Language == FormatStyle::LK_JavaScript) { 1612 nextToken(); 1613 break; 1614 } 1615 HasError = true; 1616 if (!ContinueOnSemicolons) 1617 return !HasError; 1618 nextToken(); 1619 break; 1620 case tok::comma: 1621 nextToken(); 1622 break; 1623 default: 1624 nextToken(); 1625 break; 1626 } 1627 } while (!eof()); 1628 return false; 1629 } 1630 1631 void UnwrappedLineParser::parseParens() { 1632 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1633 nextToken(); 1634 do { 1635 switch (FormatTok->Tok.getKind()) { 1636 case tok::l_paren: 1637 parseParens(); 1638 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1639 parseChildBlock(); 1640 break; 1641 case tok::r_paren: 1642 nextToken(); 1643 return; 1644 case tok::r_brace: 1645 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1646 return; 1647 case tok::l_square: 1648 tryToParseLambda(); 1649 break; 1650 case tok::l_brace: 1651 if (!tryToParseBracedList()) 1652 parseChildBlock(); 1653 break; 1654 case tok::at: 1655 nextToken(); 1656 if (FormatTok->Tok.is(tok::l_brace)) { 1657 nextToken(); 1658 parseBracedList(); 1659 } 1660 break; 1661 case tok::kw_class: 1662 if (Style.Language == FormatStyle::LK_JavaScript) 1663 parseRecord(/*ParseAsExpr=*/true); 1664 else 1665 nextToken(); 1666 break; 1667 case tok::identifier: 1668 if (Style.Language == FormatStyle::LK_JavaScript && 1669 (FormatTok->is(Keywords.kw_function) || 1670 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1671 tryToParseJSFunction(); 1672 else 1673 nextToken(); 1674 break; 1675 default: 1676 nextToken(); 1677 break; 1678 } 1679 } while (!eof()); 1680 } 1681 1682 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1683 if (!LambdaIntroducer) { 1684 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1685 if (tryToParseLambda()) 1686 return; 1687 } 1688 do { 1689 switch (FormatTok->Tok.getKind()) { 1690 case tok::l_paren: 1691 parseParens(); 1692 break; 1693 case tok::r_square: 1694 nextToken(); 1695 return; 1696 case tok::r_brace: 1697 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1698 return; 1699 case tok::l_square: 1700 parseSquare(); 1701 break; 1702 case tok::l_brace: { 1703 if (!tryToParseBracedList()) 1704 parseChildBlock(); 1705 break; 1706 } 1707 case tok::at: 1708 nextToken(); 1709 if (FormatTok->Tok.is(tok::l_brace)) { 1710 nextToken(); 1711 parseBracedList(); 1712 } 1713 break; 1714 default: 1715 nextToken(); 1716 break; 1717 } 1718 } while (!eof()); 1719 } 1720 1721 void UnwrappedLineParser::parseIfThenElse() { 1722 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1723 nextToken(); 1724 if (FormatTok->Tok.is(tok::kw_constexpr)) 1725 nextToken(); 1726 if (FormatTok->Tok.is(tok::l_paren)) 1727 parseParens(); 1728 bool NeedsUnwrappedLine = false; 1729 if (FormatTok->Tok.is(tok::l_brace)) { 1730 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1731 parseBlock(/*MustBeDeclaration=*/false); 1732 if (Style.BraceWrapping.BeforeElse) 1733 addUnwrappedLine(); 1734 else 1735 NeedsUnwrappedLine = true; 1736 } else { 1737 addUnwrappedLine(); 1738 ++Line->Level; 1739 parseStructuralElement(); 1740 --Line->Level; 1741 } 1742 if (FormatTok->Tok.is(tok::kw_else)) { 1743 nextToken(); 1744 if (FormatTok->Tok.is(tok::l_brace)) { 1745 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1746 parseBlock(/*MustBeDeclaration=*/false); 1747 addUnwrappedLine(); 1748 } else if (FormatTok->Tok.is(tok::kw_if)) { 1749 parseIfThenElse(); 1750 } else { 1751 addUnwrappedLine(); 1752 ++Line->Level; 1753 parseStructuralElement(); 1754 if (FormatTok->is(tok::eof)) 1755 addUnwrappedLine(); 1756 --Line->Level; 1757 } 1758 } else if (NeedsUnwrappedLine) { 1759 addUnwrappedLine(); 1760 } 1761 } 1762 1763 void UnwrappedLineParser::parseTryCatch() { 1764 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1765 nextToken(); 1766 bool NeedsUnwrappedLine = false; 1767 if (FormatTok->is(tok::colon)) { 1768 // We are in a function try block, what comes is an initializer list. 1769 nextToken(); 1770 while (FormatTok->is(tok::identifier)) { 1771 nextToken(); 1772 if (FormatTok->is(tok::l_paren)) 1773 parseParens(); 1774 if (FormatTok->is(tok::comma)) 1775 nextToken(); 1776 } 1777 } 1778 // Parse try with resource. 1779 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1780 parseParens(); 1781 } 1782 if (FormatTok->is(tok::l_brace)) { 1783 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1784 parseBlock(/*MustBeDeclaration=*/false); 1785 if (Style.BraceWrapping.BeforeCatch) { 1786 addUnwrappedLine(); 1787 } else { 1788 NeedsUnwrappedLine = true; 1789 } 1790 } else if (!FormatTok->is(tok::kw_catch)) { 1791 // The C++ standard requires a compound-statement after a try. 1792 // If there's none, we try to assume there's a structuralElement 1793 // and try to continue. 1794 addUnwrappedLine(); 1795 ++Line->Level; 1796 parseStructuralElement(); 1797 --Line->Level; 1798 } 1799 while (1) { 1800 if (FormatTok->is(tok::at)) 1801 nextToken(); 1802 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1803 tok::kw___finally) || 1804 ((Style.Language == FormatStyle::LK_Java || 1805 Style.Language == FormatStyle::LK_JavaScript) && 1806 FormatTok->is(Keywords.kw_finally)) || 1807 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1808 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1809 break; 1810 nextToken(); 1811 while (FormatTok->isNot(tok::l_brace)) { 1812 if (FormatTok->is(tok::l_paren)) { 1813 parseParens(); 1814 continue; 1815 } 1816 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1817 return; 1818 nextToken(); 1819 } 1820 NeedsUnwrappedLine = false; 1821 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1822 parseBlock(/*MustBeDeclaration=*/false); 1823 if (Style.BraceWrapping.BeforeCatch) 1824 addUnwrappedLine(); 1825 else 1826 NeedsUnwrappedLine = true; 1827 } 1828 if (NeedsUnwrappedLine) 1829 addUnwrappedLine(); 1830 } 1831 1832 void UnwrappedLineParser::parseNamespace() { 1833 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1834 1835 const FormatToken &InitialToken = *FormatTok; 1836 nextToken(); 1837 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1838 nextToken(); 1839 if (FormatTok->Tok.is(tok::l_brace)) { 1840 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1841 addUnwrappedLine(); 1842 1843 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1844 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1845 DeclarationScopeStack.size() > 1); 1846 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1847 // Munch the semicolon after a namespace. This is more common than one would 1848 // think. Puttin the semicolon into its own line is very ugly. 1849 if (FormatTok->Tok.is(tok::semi)) 1850 nextToken(); 1851 addUnwrappedLine(); 1852 } 1853 // FIXME: Add error handling. 1854 } 1855 1856 void UnwrappedLineParser::parseNew() { 1857 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1858 nextToken(); 1859 if (Style.Language != FormatStyle::LK_Java) 1860 return; 1861 1862 // In Java, we can parse everything up to the parens, which aren't optional. 1863 do { 1864 // There should not be a ;, { or } before the new's open paren. 1865 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1866 return; 1867 1868 // Consume the parens. 1869 if (FormatTok->is(tok::l_paren)) { 1870 parseParens(); 1871 1872 // If there is a class body of an anonymous class, consume that as child. 1873 if (FormatTok->is(tok::l_brace)) 1874 parseChildBlock(); 1875 return; 1876 } 1877 nextToken(); 1878 } while (!eof()); 1879 } 1880 1881 void UnwrappedLineParser::parseForOrWhileLoop() { 1882 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1883 "'for', 'while' or foreach macro expected"); 1884 nextToken(); 1885 // JS' for await ( ... 1886 if (Style.Language == FormatStyle::LK_JavaScript && 1887 FormatTok->is(Keywords.kw_await)) 1888 nextToken(); 1889 if (FormatTok->Tok.is(tok::l_paren)) 1890 parseParens(); 1891 if (FormatTok->Tok.is(tok::l_brace)) { 1892 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1893 parseBlock(/*MustBeDeclaration=*/false); 1894 addUnwrappedLine(); 1895 } else { 1896 addUnwrappedLine(); 1897 ++Line->Level; 1898 parseStructuralElement(); 1899 --Line->Level; 1900 } 1901 } 1902 1903 void UnwrappedLineParser::parseDoWhile() { 1904 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1905 nextToken(); 1906 if (FormatTok->Tok.is(tok::l_brace)) { 1907 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1908 parseBlock(/*MustBeDeclaration=*/false); 1909 if (Style.BraceWrapping.IndentBraces) 1910 addUnwrappedLine(); 1911 } else { 1912 addUnwrappedLine(); 1913 ++Line->Level; 1914 parseStructuralElement(); 1915 --Line->Level; 1916 } 1917 1918 // FIXME: Add error handling. 1919 if (!FormatTok->Tok.is(tok::kw_while)) { 1920 addUnwrappedLine(); 1921 return; 1922 } 1923 1924 nextToken(); 1925 parseStructuralElement(); 1926 } 1927 1928 void UnwrappedLineParser::parseLabel() { 1929 nextToken(); 1930 unsigned OldLineLevel = Line->Level; 1931 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1932 --Line->Level; 1933 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1934 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1935 parseBlock(/*MustBeDeclaration=*/false); 1936 if (FormatTok->Tok.is(tok::kw_break)) { 1937 if (Style.BraceWrapping.AfterControlStatement) 1938 addUnwrappedLine(); 1939 parseStructuralElement(); 1940 } 1941 addUnwrappedLine(); 1942 } else { 1943 if (FormatTok->is(tok::semi)) 1944 nextToken(); 1945 addUnwrappedLine(); 1946 } 1947 Line->Level = OldLineLevel; 1948 if (FormatTok->isNot(tok::l_brace)) { 1949 parseStructuralElement(); 1950 addUnwrappedLine(); 1951 } 1952 } 1953 1954 void UnwrappedLineParser::parseCaseLabel() { 1955 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1956 // FIXME: fix handling of complex expressions here. 1957 do { 1958 nextToken(); 1959 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1960 parseLabel(); 1961 } 1962 1963 void UnwrappedLineParser::parseSwitch() { 1964 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1965 nextToken(); 1966 if (FormatTok->Tok.is(tok::l_paren)) 1967 parseParens(); 1968 if (FormatTok->Tok.is(tok::l_brace)) { 1969 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1970 parseBlock(/*MustBeDeclaration=*/false); 1971 addUnwrappedLine(); 1972 } else { 1973 addUnwrappedLine(); 1974 ++Line->Level; 1975 parseStructuralElement(); 1976 --Line->Level; 1977 } 1978 } 1979 1980 void UnwrappedLineParser::parseAccessSpecifier() { 1981 nextToken(); 1982 // Understand Qt's slots. 1983 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1984 nextToken(); 1985 // Otherwise, we don't know what it is, and we'd better keep the next token. 1986 if (FormatTok->Tok.is(tok::colon)) 1987 nextToken(); 1988 addUnwrappedLine(); 1989 } 1990 1991 bool UnwrappedLineParser::parseEnum() { 1992 // Won't be 'enum' for NS_ENUMs. 1993 if (FormatTok->Tok.is(tok::kw_enum)) 1994 nextToken(); 1995 1996 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1997 // declarations. An "enum" keyword followed by a colon would be a syntax 1998 // error and thus assume it is just an identifier. 1999 if (Style.Language == FormatStyle::LK_JavaScript && 2000 FormatTok->isOneOf(tok::colon, tok::question)) 2001 return false; 2002 2003 // Eat up enum class ... 2004 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2005 nextToken(); 2006 2007 while (FormatTok->Tok.getIdentifierInfo() || 2008 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2009 tok::greater, tok::comma, tok::question)) { 2010 nextToken(); 2011 // We can have macros or attributes in between 'enum' and the enum name. 2012 if (FormatTok->is(tok::l_paren)) 2013 parseParens(); 2014 if (FormatTok->is(tok::identifier)) { 2015 nextToken(); 2016 // If there are two identifiers in a row, this is likely an elaborate 2017 // return type. In Java, this can be "implements", etc. 2018 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2019 return false; 2020 } 2021 } 2022 2023 // Just a declaration or something is wrong. 2024 if (FormatTok->isNot(tok::l_brace)) 2025 return true; 2026 FormatTok->BlockKind = BK_Block; 2027 2028 if (Style.Language == FormatStyle::LK_Java) { 2029 // Java enums are different. 2030 parseJavaEnumBody(); 2031 return true; 2032 } 2033 if (Style.Language == FormatStyle::LK_Proto) { 2034 parseBlock(/*MustBeDeclaration=*/true); 2035 return true; 2036 } 2037 2038 // Parse enum body. 2039 nextToken(); 2040 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2041 if (HasError) { 2042 if (FormatTok->is(tok::semi)) 2043 nextToken(); 2044 addUnwrappedLine(); 2045 } 2046 return true; 2047 2048 // There is no addUnwrappedLine() here so that we fall through to parsing a 2049 // structural element afterwards. Thus, in "enum A {} n, m;", 2050 // "} n, m;" will end up in one unwrapped line. 2051 } 2052 2053 void UnwrappedLineParser::parseJavaEnumBody() { 2054 // Determine whether the enum is simple, i.e. does not have a semicolon or 2055 // constants with class bodies. Simple enums can be formatted like braced 2056 // lists, contracted to a single line, etc. 2057 unsigned StoredPosition = Tokens->getPosition(); 2058 bool IsSimple = true; 2059 FormatToken *Tok = Tokens->getNextToken(); 2060 while (Tok) { 2061 if (Tok->is(tok::r_brace)) 2062 break; 2063 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2064 IsSimple = false; 2065 break; 2066 } 2067 // FIXME: This will also mark enums with braces in the arguments to enum 2068 // constants as "not simple". This is probably fine in practice, though. 2069 Tok = Tokens->getNextToken(); 2070 } 2071 FormatTok = Tokens->setPosition(StoredPosition); 2072 2073 if (IsSimple) { 2074 nextToken(); 2075 parseBracedList(); 2076 addUnwrappedLine(); 2077 return; 2078 } 2079 2080 // Parse the body of a more complex enum. 2081 // First add a line for everything up to the "{". 2082 nextToken(); 2083 addUnwrappedLine(); 2084 ++Line->Level; 2085 2086 // Parse the enum constants. 2087 while (FormatTok) { 2088 if (FormatTok->is(tok::l_brace)) { 2089 // Parse the constant's class body. 2090 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2091 /*MunchSemi=*/false); 2092 } else if (FormatTok->is(tok::l_paren)) { 2093 parseParens(); 2094 } else if (FormatTok->is(tok::comma)) { 2095 nextToken(); 2096 addUnwrappedLine(); 2097 } else if (FormatTok->is(tok::semi)) { 2098 nextToken(); 2099 addUnwrappedLine(); 2100 break; 2101 } else if (FormatTok->is(tok::r_brace)) { 2102 addUnwrappedLine(); 2103 break; 2104 } else { 2105 nextToken(); 2106 } 2107 } 2108 2109 // Parse the class body after the enum's ";" if any. 2110 parseLevel(/*HasOpeningBrace=*/true); 2111 nextToken(); 2112 --Line->Level; 2113 addUnwrappedLine(); 2114 } 2115 2116 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2117 const FormatToken &InitialToken = *FormatTok; 2118 nextToken(); 2119 2120 // The actual identifier can be a nested name specifier, and in macros 2121 // it is often token-pasted. 2122 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2123 tok::kw___attribute, tok::kw___declspec, 2124 tok::kw_alignas) || 2125 ((Style.Language == FormatStyle::LK_Java || 2126 Style.Language == FormatStyle::LK_JavaScript) && 2127 FormatTok->isOneOf(tok::period, tok::comma))) { 2128 if (Style.Language == FormatStyle::LK_JavaScript && 2129 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2130 // JavaScript/TypeScript supports inline object types in 2131 // extends/implements positions: 2132 // class Foo implements {bar: number} { } 2133 nextToken(); 2134 if (FormatTok->is(tok::l_brace)) { 2135 tryToParseBracedList(); 2136 continue; 2137 } 2138 } 2139 bool IsNonMacroIdentifier = 2140 FormatTok->is(tok::identifier) && 2141 FormatTok->TokenText != FormatTok->TokenText.upper(); 2142 nextToken(); 2143 // We can have macros or attributes in between 'class' and the class name. 2144 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2145 parseParens(); 2146 } 2147 2148 // Note that parsing away template declarations here leads to incorrectly 2149 // accepting function declarations as record declarations. 2150 // In general, we cannot solve this problem. Consider: 2151 // class A<int> B() {} 2152 // which can be a function definition or a class definition when B() is a 2153 // macro. If we find enough real-world cases where this is a problem, we 2154 // can parse for the 'template' keyword in the beginning of the statement, 2155 // and thus rule out the record production in case there is no template 2156 // (this would still leave us with an ambiguity between template function 2157 // and class declarations). 2158 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2159 while (!eof()) { 2160 if (FormatTok->is(tok::l_brace)) { 2161 calculateBraceTypes(/*ExpectClassBody=*/true); 2162 if (!tryToParseBracedList()) 2163 break; 2164 } 2165 if (FormatTok->Tok.is(tok::semi)) 2166 return; 2167 nextToken(); 2168 } 2169 } 2170 if (FormatTok->Tok.is(tok::l_brace)) { 2171 if (ParseAsExpr) { 2172 parseChildBlock(); 2173 } else { 2174 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2175 addUnwrappedLine(); 2176 2177 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2178 /*MunchSemi=*/false); 2179 } 2180 } 2181 // There is no addUnwrappedLine() here so that we fall through to parsing a 2182 // structural element afterwards. Thus, in "class A {} n, m;", 2183 // "} n, m;" will end up in one unwrapped line. 2184 } 2185 2186 void UnwrappedLineParser::parseObjCMethod() { 2187 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2188 "'(' or identifier expected."); 2189 do { 2190 if (FormatTok->Tok.is(tok::semi)) { 2191 nextToken(); 2192 addUnwrappedLine(); 2193 return; 2194 } else if (FormatTok->Tok.is(tok::l_brace)) { 2195 if (Style.BraceWrapping.AfterFunction) 2196 addUnwrappedLine(); 2197 parseBlock(/*MustBeDeclaration=*/false); 2198 addUnwrappedLine(); 2199 return; 2200 } else { 2201 nextToken(); 2202 } 2203 } while (!eof()); 2204 } 2205 2206 void UnwrappedLineParser::parseObjCProtocolList() { 2207 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2208 do { 2209 nextToken(); 2210 // Early exit in case someone forgot a close angle. 2211 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2212 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2213 return; 2214 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2215 nextToken(); // Skip '>'. 2216 } 2217 2218 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2219 do { 2220 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2221 nextToken(); 2222 addUnwrappedLine(); 2223 break; 2224 } 2225 if (FormatTok->is(tok::l_brace)) { 2226 parseBlock(/*MustBeDeclaration=*/false); 2227 // In ObjC interfaces, nothing should be following the "}". 2228 addUnwrappedLine(); 2229 } else if (FormatTok->is(tok::r_brace)) { 2230 // Ignore stray "}". parseStructuralElement doesn't consume them. 2231 nextToken(); 2232 addUnwrappedLine(); 2233 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2234 nextToken(); 2235 parseObjCMethod(); 2236 } else { 2237 parseStructuralElement(); 2238 } 2239 } while (!eof()); 2240 } 2241 2242 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2243 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2244 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2245 nextToken(); 2246 nextToken(); // interface name 2247 2248 // @interface can be followed by a lightweight generic 2249 // specialization list, then either a base class or a category. 2250 if (FormatTok->Tok.is(tok::less)) { 2251 // Unlike protocol lists, generic parameterizations support 2252 // nested angles: 2253 // 2254 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2255 // NSObject <NSCopying, NSSecureCoding> 2256 // 2257 // so we need to count how many open angles we have left. 2258 unsigned NumOpenAngles = 1; 2259 do { 2260 nextToken(); 2261 // Early exit in case someone forgot a close angle. 2262 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2263 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2264 break; 2265 if (FormatTok->Tok.is(tok::less)) 2266 ++NumOpenAngles; 2267 else if (FormatTok->Tok.is(tok::greater)) { 2268 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2269 --NumOpenAngles; 2270 } 2271 } while (!eof() && NumOpenAngles != 0); 2272 nextToken(); // Skip '>'. 2273 } 2274 if (FormatTok->Tok.is(tok::colon)) { 2275 nextToken(); 2276 nextToken(); // base class name 2277 } else if (FormatTok->Tok.is(tok::l_paren)) 2278 // Skip category, if present. 2279 parseParens(); 2280 2281 if (FormatTok->Tok.is(tok::less)) 2282 parseObjCProtocolList(); 2283 2284 if (FormatTok->Tok.is(tok::l_brace)) { 2285 if (Style.BraceWrapping.AfterObjCDeclaration) 2286 addUnwrappedLine(); 2287 parseBlock(/*MustBeDeclaration=*/true); 2288 } 2289 2290 // With instance variables, this puts '}' on its own line. Without instance 2291 // variables, this ends the @interface line. 2292 addUnwrappedLine(); 2293 2294 parseObjCUntilAtEnd(); 2295 } 2296 2297 // Returns true for the declaration/definition form of @protocol, 2298 // false for the expression form. 2299 bool UnwrappedLineParser::parseObjCProtocol() { 2300 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2301 nextToken(); 2302 2303 if (FormatTok->is(tok::l_paren)) 2304 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2305 return false; 2306 2307 // The definition/declaration form, 2308 // @protocol Foo 2309 // - (int)someMethod; 2310 // @end 2311 2312 nextToken(); // protocol name 2313 2314 if (FormatTok->Tok.is(tok::less)) 2315 parseObjCProtocolList(); 2316 2317 // Check for protocol declaration. 2318 if (FormatTok->Tok.is(tok::semi)) { 2319 nextToken(); 2320 addUnwrappedLine(); 2321 return true; 2322 } 2323 2324 addUnwrappedLine(); 2325 parseObjCUntilAtEnd(); 2326 return true; 2327 } 2328 2329 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2330 bool IsImport = FormatTok->is(Keywords.kw_import); 2331 assert(IsImport || FormatTok->is(tok::kw_export)); 2332 nextToken(); 2333 2334 // Consume the "default" in "export default class/function". 2335 if (FormatTok->is(tok::kw_default)) 2336 nextToken(); 2337 2338 // Consume "async function", "function" and "default function", so that these 2339 // get parsed as free-standing JS functions, i.e. do not require a trailing 2340 // semicolon. 2341 if (FormatTok->is(Keywords.kw_async)) 2342 nextToken(); 2343 if (FormatTok->is(Keywords.kw_function)) { 2344 nextToken(); 2345 return; 2346 } 2347 2348 // For imports, `export *`, `export {...}`, consume the rest of the line up 2349 // to the terminating `;`. For everything else, just return and continue 2350 // parsing the structural element, i.e. the declaration or expression for 2351 // `export default`. 2352 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2353 !FormatTok->isStringLiteral()) 2354 return; 2355 2356 while (!eof()) { 2357 if (FormatTok->is(tok::semi)) 2358 return; 2359 if (Line->Tokens.empty()) { 2360 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2361 // import statement should terminate. 2362 return; 2363 } 2364 if (FormatTok->is(tok::l_brace)) { 2365 FormatTok->BlockKind = BK_Block; 2366 nextToken(); 2367 parseBracedList(); 2368 } else { 2369 nextToken(); 2370 } 2371 } 2372 } 2373 2374 void UnwrappedLineParser::parseStatementMacro() { 2375 nextToken(); 2376 if (FormatTok->is(tok::l_paren)) 2377 parseParens(); 2378 if (FormatTok->is(tok::semi)) 2379 nextToken(); 2380 addUnwrappedLine(); 2381 } 2382 2383 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2384 StringRef Prefix = "") { 2385 llvm::dbgs() << Prefix << "Line(" << Line.Level 2386 << ", FSC=" << Line.FirstStartColumn << ")" 2387 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2388 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2389 E = Line.Tokens.end(); 2390 I != E; ++I) { 2391 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2392 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2393 << "] "; 2394 } 2395 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2396 E = Line.Tokens.end(); 2397 I != E; ++I) { 2398 const UnwrappedLineNode &Node = *I; 2399 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2400 I = Node.Children.begin(), 2401 E = Node.Children.end(); 2402 I != E; ++I) { 2403 printDebugInfo(*I, "\nChild: "); 2404 } 2405 } 2406 llvm::dbgs() << "\n"; 2407 } 2408 2409 void UnwrappedLineParser::addUnwrappedLine() { 2410 if (Line->Tokens.empty()) 2411 return; 2412 LLVM_DEBUG({ 2413 if (CurrentLines == &Lines) 2414 printDebugInfo(*Line); 2415 }); 2416 CurrentLines->push_back(std::move(*Line)); 2417 Line->Tokens.clear(); 2418 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2419 Line->FirstStartColumn = 0; 2420 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2421 CurrentLines->append( 2422 std::make_move_iterator(PreprocessorDirectives.begin()), 2423 std::make_move_iterator(PreprocessorDirectives.end())); 2424 PreprocessorDirectives.clear(); 2425 } 2426 // Disconnect the current token from the last token on the previous line. 2427 FormatTok->Previous = nullptr; 2428 } 2429 2430 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2431 2432 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2433 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2434 FormatTok.NewlinesBefore > 0; 2435 } 2436 2437 // Checks if \p FormatTok is a line comment that continues the line comment 2438 // section on \p Line. 2439 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2440 const UnwrappedLine &Line, 2441 llvm::Regex &CommentPragmasRegex) { 2442 if (Line.Tokens.empty()) 2443 return false; 2444 2445 StringRef IndentContent = FormatTok.TokenText; 2446 if (FormatTok.TokenText.startswith("//") || 2447 FormatTok.TokenText.startswith("/*")) 2448 IndentContent = FormatTok.TokenText.substr(2); 2449 if (CommentPragmasRegex.match(IndentContent)) 2450 return false; 2451 2452 // If Line starts with a line comment, then FormatTok continues the comment 2453 // section if its original column is greater or equal to the original start 2454 // column of the line. 2455 // 2456 // Define the min column token of a line as follows: if a line ends in '{' or 2457 // contains a '{' followed by a line comment, then the min column token is 2458 // that '{'. Otherwise, the min column token of the line is the first token of 2459 // the line. 2460 // 2461 // If Line starts with a token other than a line comment, then FormatTok 2462 // continues the comment section if its original column is greater than the 2463 // original start column of the min column token of the line. 2464 // 2465 // For example, the second line comment continues the first in these cases: 2466 // 2467 // // first line 2468 // // second line 2469 // 2470 // and: 2471 // 2472 // // first line 2473 // // second line 2474 // 2475 // and: 2476 // 2477 // int i; // first line 2478 // // second line 2479 // 2480 // and: 2481 // 2482 // do { // first line 2483 // // second line 2484 // int i; 2485 // } while (true); 2486 // 2487 // and: 2488 // 2489 // enum { 2490 // a, // first line 2491 // // second line 2492 // b 2493 // }; 2494 // 2495 // The second line comment doesn't continue the first in these cases: 2496 // 2497 // // first line 2498 // // second line 2499 // 2500 // and: 2501 // 2502 // int i; // first line 2503 // // second line 2504 // 2505 // and: 2506 // 2507 // do { // first line 2508 // // second line 2509 // int i; 2510 // } while (true); 2511 // 2512 // and: 2513 // 2514 // enum { 2515 // a, // first line 2516 // // second line 2517 // }; 2518 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2519 2520 // Scan for '{//'. If found, use the column of '{' as a min column for line 2521 // comment section continuation. 2522 const FormatToken *PreviousToken = nullptr; 2523 for (const UnwrappedLineNode &Node : Line.Tokens) { 2524 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2525 isLineComment(*Node.Tok)) { 2526 MinColumnToken = PreviousToken; 2527 break; 2528 } 2529 PreviousToken = Node.Tok; 2530 2531 // Grab the last newline preceding a token in this unwrapped line. 2532 if (Node.Tok->NewlinesBefore > 0) { 2533 MinColumnToken = Node.Tok; 2534 } 2535 } 2536 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2537 MinColumnToken = PreviousToken; 2538 } 2539 2540 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2541 MinColumnToken); 2542 } 2543 2544 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2545 bool JustComments = Line->Tokens.empty(); 2546 for (SmallVectorImpl<FormatToken *>::const_iterator 2547 I = CommentsBeforeNextToken.begin(), 2548 E = CommentsBeforeNextToken.end(); 2549 I != E; ++I) { 2550 // Line comments that belong to the same line comment section are put on the 2551 // same line since later we might want to reflow content between them. 2552 // Additional fine-grained breaking of line comment sections is controlled 2553 // by the class BreakableLineCommentSection in case it is desirable to keep 2554 // several line comment sections in the same unwrapped line. 2555 // 2556 // FIXME: Consider putting separate line comment sections as children to the 2557 // unwrapped line instead. 2558 (*I)->ContinuesLineCommentSection = 2559 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2560 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2561 addUnwrappedLine(); 2562 pushToken(*I); 2563 } 2564 if (NewlineBeforeNext && JustComments) 2565 addUnwrappedLine(); 2566 CommentsBeforeNextToken.clear(); 2567 } 2568 2569 void UnwrappedLineParser::nextToken(int LevelDifference) { 2570 if (eof()) 2571 return; 2572 flushComments(isOnNewLine(*FormatTok)); 2573 pushToken(FormatTok); 2574 FormatToken *Previous = FormatTok; 2575 if (Style.Language != FormatStyle::LK_JavaScript) 2576 readToken(LevelDifference); 2577 else 2578 readTokenWithJavaScriptASI(); 2579 FormatTok->Previous = Previous; 2580 } 2581 2582 void UnwrappedLineParser::distributeComments( 2583 const SmallVectorImpl<FormatToken *> &Comments, 2584 const FormatToken *NextTok) { 2585 // Whether or not a line comment token continues a line is controlled by 2586 // the method continuesLineCommentSection, with the following caveat: 2587 // 2588 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2589 // that each comment line from the trail is aligned with the next token, if 2590 // the next token exists. If a trail exists, the beginning of the maximal 2591 // trail is marked as a start of a new comment section. 2592 // 2593 // For example in this code: 2594 // 2595 // int a; // line about a 2596 // // line 1 about b 2597 // // line 2 about b 2598 // int b; 2599 // 2600 // the two lines about b form a maximal trail, so there are two sections, the 2601 // first one consisting of the single comment "// line about a" and the 2602 // second one consisting of the next two comments. 2603 if (Comments.empty()) 2604 return; 2605 bool ShouldPushCommentsInCurrentLine = true; 2606 bool HasTrailAlignedWithNextToken = false; 2607 unsigned StartOfTrailAlignedWithNextToken = 0; 2608 if (NextTok) { 2609 // We are skipping the first element intentionally. 2610 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2611 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2612 HasTrailAlignedWithNextToken = true; 2613 StartOfTrailAlignedWithNextToken = i; 2614 } 2615 } 2616 } 2617 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2618 FormatToken *FormatTok = Comments[i]; 2619 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2620 FormatTok->ContinuesLineCommentSection = false; 2621 } else { 2622 FormatTok->ContinuesLineCommentSection = 2623 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2624 } 2625 if (!FormatTok->ContinuesLineCommentSection && 2626 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2627 ShouldPushCommentsInCurrentLine = false; 2628 } 2629 if (ShouldPushCommentsInCurrentLine) { 2630 pushToken(FormatTok); 2631 } else { 2632 CommentsBeforeNextToken.push_back(FormatTok); 2633 } 2634 } 2635 } 2636 2637 void UnwrappedLineParser::readToken(int LevelDifference) { 2638 SmallVector<FormatToken *, 1> Comments; 2639 do { 2640 FormatTok = Tokens->getNextToken(); 2641 assert(FormatTok); 2642 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2643 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2644 distributeComments(Comments, FormatTok); 2645 Comments.clear(); 2646 // If there is an unfinished unwrapped line, we flush the preprocessor 2647 // directives only after that unwrapped line was finished later. 2648 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2649 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2650 assert((LevelDifference >= 0 || 2651 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2652 "LevelDifference makes Line->Level negative"); 2653 Line->Level += LevelDifference; 2654 // Comments stored before the preprocessor directive need to be output 2655 // before the preprocessor directive, at the same level as the 2656 // preprocessor directive, as we consider them to apply to the directive. 2657 flushComments(isOnNewLine(*FormatTok)); 2658 parsePPDirective(); 2659 } 2660 while (FormatTok->Type == TT_ConflictStart || 2661 FormatTok->Type == TT_ConflictEnd || 2662 FormatTok->Type == TT_ConflictAlternative) { 2663 if (FormatTok->Type == TT_ConflictStart) { 2664 conditionalCompilationStart(/*Unreachable=*/false); 2665 } else if (FormatTok->Type == TT_ConflictAlternative) { 2666 conditionalCompilationAlternative(); 2667 } else if (FormatTok->Type == TT_ConflictEnd) { 2668 conditionalCompilationEnd(); 2669 } 2670 FormatTok = Tokens->getNextToken(); 2671 FormatTok->MustBreakBefore = true; 2672 } 2673 2674 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2675 !Line->InPPDirective) { 2676 continue; 2677 } 2678 2679 if (!FormatTok->Tok.is(tok::comment)) { 2680 distributeComments(Comments, FormatTok); 2681 Comments.clear(); 2682 return; 2683 } 2684 2685 Comments.push_back(FormatTok); 2686 } while (!eof()); 2687 2688 distributeComments(Comments, nullptr); 2689 Comments.clear(); 2690 } 2691 2692 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2693 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2694 if (MustBreakBeforeNextToken) { 2695 Line->Tokens.back().Tok->MustBreakBefore = true; 2696 MustBreakBeforeNextToken = false; 2697 } 2698 } 2699 2700 } // end namespace format 2701 } // end namespace clang 2702