1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #include <algorithm> 22 23 #define DEBUG_TYPE "format-parser" 24 25 namespace clang { 26 namespace format { 27 28 class FormatTokenSource { 29 public: 30 virtual ~FormatTokenSource() {} 31 virtual FormatToken *getNextToken() = 0; 32 33 virtual unsigned getPosition() = 0; 34 virtual FormatToken *setPosition(unsigned Position) = 0; 35 }; 36 37 namespace { 38 39 class ScopedDeclarationState { 40 public: 41 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 42 bool MustBeDeclaration) 43 : Line(Line), Stack(Stack) { 44 Line.MustBeDeclaration = MustBeDeclaration; 45 Stack.push_back(MustBeDeclaration); 46 } 47 ~ScopedDeclarationState() { 48 Stack.pop_back(); 49 if (!Stack.empty()) 50 Line.MustBeDeclaration = Stack.back(); 51 else 52 Line.MustBeDeclaration = true; 53 } 54 55 private: 56 UnwrappedLine &Line; 57 std::vector<bool> &Stack; 58 }; 59 60 static bool isLineComment(const FormatToken &FormatTok) { 61 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 62 } 63 64 // Checks if \p FormatTok is a line comment that continues the line comment 65 // \p Previous. The original column of \p MinColumnToken is used to determine 66 // whether \p FormatTok is indented enough to the right to continue \p Previous. 67 static bool continuesLineComment(const FormatToken &FormatTok, 68 const FormatToken *Previous, 69 const FormatToken *MinColumnToken) { 70 if (!Previous || !MinColumnToken) 71 return false; 72 unsigned MinContinueColumn = 73 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 74 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 75 isLineComment(*Previous) && 76 FormatTok.OriginalColumn >= MinContinueColumn; 77 } 78 79 class ScopedMacroState : public FormatTokenSource { 80 public: 81 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 82 FormatToken *&ResetToken) 83 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 84 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 85 Token(nullptr), PreviousToken(nullptr) { 86 FakeEOF.Tok.startToken(); 87 FakeEOF.Tok.setKind(tok::eof); 88 TokenSource = this; 89 Line.Level = 0; 90 Line.InPPDirective = true; 91 } 92 93 ~ScopedMacroState() override { 94 TokenSource = PreviousTokenSource; 95 ResetToken = Token; 96 Line.InPPDirective = false; 97 Line.Level = PreviousLineLevel; 98 } 99 100 FormatToken *getNextToken() override { 101 // The \c UnwrappedLineParser guards against this by never calling 102 // \c getNextToken() after it has encountered the first eof token. 103 assert(!eof()); 104 PreviousToken = Token; 105 Token = PreviousTokenSource->getNextToken(); 106 if (eof()) 107 return &FakeEOF; 108 return Token; 109 } 110 111 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 112 113 FormatToken *setPosition(unsigned Position) override { 114 PreviousToken = nullptr; 115 Token = PreviousTokenSource->setPosition(Position); 116 return Token; 117 } 118 119 private: 120 bool eof() { 121 return Token && Token->HasUnescapedNewline && 122 !continuesLineComment(*Token, PreviousToken, 123 /*MinColumnToken=*/PreviousToken); 124 } 125 126 FormatToken FakeEOF; 127 UnwrappedLine &Line; 128 FormatTokenSource *&TokenSource; 129 FormatToken *&ResetToken; 130 unsigned PreviousLineLevel; 131 FormatTokenSource *PreviousTokenSource; 132 133 FormatToken *Token; 134 FormatToken *PreviousToken; 135 }; 136 137 } // end anonymous namespace 138 139 class ScopedLineState { 140 public: 141 ScopedLineState(UnwrappedLineParser &Parser, 142 bool SwitchToPreprocessorLines = false) 143 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 144 if (SwitchToPreprocessorLines) 145 Parser.CurrentLines = &Parser.PreprocessorDirectives; 146 else if (!Parser.Line->Tokens.empty()) 147 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 148 PreBlockLine = std::move(Parser.Line); 149 Parser.Line = llvm::make_unique<UnwrappedLine>(); 150 Parser.Line->Level = PreBlockLine->Level; 151 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 152 } 153 154 ~ScopedLineState() { 155 if (!Parser.Line->Tokens.empty()) { 156 Parser.addUnwrappedLine(); 157 } 158 assert(Parser.Line->Tokens.empty()); 159 Parser.Line = std::move(PreBlockLine); 160 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 161 Parser.MustBreakBeforeNextToken = true; 162 Parser.CurrentLines = OriginalLines; 163 } 164 165 private: 166 UnwrappedLineParser &Parser; 167 168 std::unique_ptr<UnwrappedLine> PreBlockLine; 169 SmallVectorImpl<UnwrappedLine> *OriginalLines; 170 }; 171 172 class CompoundStatementIndenter { 173 public: 174 CompoundStatementIndenter(UnwrappedLineParser *Parser, 175 const FormatStyle &Style, unsigned &LineLevel) 176 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 177 if (Style.BraceWrapping.AfterControlStatement) 178 Parser->addUnwrappedLine(); 179 if (Style.BraceWrapping.IndentBraces) 180 ++LineLevel; 181 } 182 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 183 184 private: 185 unsigned &LineLevel; 186 unsigned OldLineLevel; 187 }; 188 189 namespace { 190 191 class IndexedTokenSource : public FormatTokenSource { 192 public: 193 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 194 : Tokens(Tokens), Position(-1) {} 195 196 FormatToken *getNextToken() override { 197 ++Position; 198 return Tokens[Position]; 199 } 200 201 unsigned getPosition() override { 202 assert(Position >= 0); 203 return Position; 204 } 205 206 FormatToken *setPosition(unsigned P) override { 207 Position = P; 208 return Tokens[Position]; 209 } 210 211 void reset() { Position = -1; } 212 213 private: 214 ArrayRef<FormatToken *> Tokens; 215 int Position; 216 }; 217 218 } // end anonymous namespace 219 220 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 221 const AdditionalKeywords &Keywords, 222 unsigned FirstStartColumn, 223 ArrayRef<FormatToken *> Tokens, 224 UnwrappedLineConsumer &Callback) 225 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 226 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 227 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 228 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 229 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 230 ? IG_Rejected 231 : IG_Inited), 232 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 233 234 void UnwrappedLineParser::reset() { 235 PPBranchLevel = -1; 236 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 237 ? IG_Rejected 238 : IG_Inited; 239 IncludeGuardToken = nullptr; 240 Line.reset(new UnwrappedLine); 241 CommentsBeforeNextToken.clear(); 242 FormatTok = nullptr; 243 MustBreakBeforeNextToken = false; 244 PreprocessorDirectives.clear(); 245 CurrentLines = &Lines; 246 DeclarationScopeStack.clear(); 247 PPStack.clear(); 248 Line->FirstStartColumn = FirstStartColumn; 249 } 250 251 void UnwrappedLineParser::parse() { 252 IndexedTokenSource TokenSource(AllTokens); 253 Line->FirstStartColumn = FirstStartColumn; 254 do { 255 LLVM_DEBUG(llvm::dbgs() << "----\n"); 256 reset(); 257 Tokens = &TokenSource; 258 TokenSource.reset(); 259 260 readToken(); 261 parseFile(); 262 263 // If we found an include guard then all preprocessor directives (other than 264 // the guard) are over-indented by one. 265 if (IncludeGuard == IG_Found) 266 for (auto &Line : Lines) 267 if (Line.InPPDirective && Line.Level > 0) 268 --Line.Level; 269 270 // Create line with eof token. 271 pushToken(FormatTok); 272 addUnwrappedLine(); 273 274 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 275 E = Lines.end(); 276 I != E; ++I) { 277 Callback.consumeUnwrappedLine(*I); 278 } 279 Callback.finishRun(); 280 Lines.clear(); 281 while (!PPLevelBranchIndex.empty() && 282 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 283 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 284 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 285 } 286 if (!PPLevelBranchIndex.empty()) { 287 ++PPLevelBranchIndex.back(); 288 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 289 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 290 } 291 } while (!PPLevelBranchIndex.empty()); 292 } 293 294 void UnwrappedLineParser::parseFile() { 295 // The top-level context in a file always has declarations, except for pre- 296 // processor directives and JavaScript files. 297 bool MustBeDeclaration = 298 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 299 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 300 MustBeDeclaration); 301 if (Style.Language == FormatStyle::LK_TextProto) 302 parseBracedList(); 303 else 304 parseLevel(/*HasOpeningBrace=*/false); 305 // Make sure to format the remaining tokens. 306 flushComments(true); 307 addUnwrappedLine(); 308 } 309 310 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 311 bool SwitchLabelEncountered = false; 312 do { 313 tok::TokenKind kind = FormatTok->Tok.getKind(); 314 if (FormatTok->Type == TT_MacroBlockBegin) { 315 kind = tok::l_brace; 316 } else if (FormatTok->Type == TT_MacroBlockEnd) { 317 kind = tok::r_brace; 318 } 319 320 switch (kind) { 321 case tok::comment: 322 nextToken(); 323 addUnwrappedLine(); 324 break; 325 case tok::l_brace: 326 // FIXME: Add parameter whether this can happen - if this happens, we must 327 // be in a non-declaration context. 328 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 329 continue; 330 parseBlock(/*MustBeDeclaration=*/false); 331 addUnwrappedLine(); 332 break; 333 case tok::r_brace: 334 if (HasOpeningBrace) 335 return; 336 nextToken(); 337 addUnwrappedLine(); 338 break; 339 case tok::kw_default: { 340 unsigned StoredPosition = Tokens->getPosition(); 341 FormatToken *Next = Tokens->getNextToken(); 342 FormatTok = Tokens->setPosition(StoredPosition); 343 if (Next && Next->isNot(tok::colon)) { 344 // default not followed by ':' is not a case label; treat it like 345 // an identifier. 346 parseStructuralElement(); 347 break; 348 } 349 // Else, if it is 'default:', fall through to the case handling. 350 LLVM_FALLTHROUGH; 351 } 352 case tok::kw_case: 353 if (Style.Language == FormatStyle::LK_JavaScript && 354 Line->MustBeDeclaration) { 355 // A 'case: string' style field declaration. 356 parseStructuralElement(); 357 break; 358 } 359 if (!SwitchLabelEncountered && 360 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 361 ++Line->Level; 362 SwitchLabelEncountered = true; 363 parseStructuralElement(); 364 break; 365 default: 366 parseStructuralElement(); 367 break; 368 } 369 } while (!eof()); 370 } 371 372 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 373 // We'll parse forward through the tokens until we hit 374 // a closing brace or eof - note that getNextToken() will 375 // parse macros, so this will magically work inside macro 376 // definitions, too. 377 unsigned StoredPosition = Tokens->getPosition(); 378 FormatToken *Tok = FormatTok; 379 const FormatToken *PrevTok = Tok->Previous; 380 // Keep a stack of positions of lbrace tokens. We will 381 // update information about whether an lbrace starts a 382 // braced init list or a different block during the loop. 383 SmallVector<FormatToken *, 8> LBraceStack; 384 assert(Tok->Tok.is(tok::l_brace)); 385 do { 386 // Get next non-comment token. 387 FormatToken *NextTok; 388 unsigned ReadTokens = 0; 389 do { 390 NextTok = Tokens->getNextToken(); 391 ++ReadTokens; 392 } while (NextTok->is(tok::comment)); 393 394 switch (Tok->Tok.getKind()) { 395 case tok::l_brace: 396 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 397 if (PrevTok->isOneOf(tok::colon, tok::less)) 398 // A ':' indicates this code is in a type, or a braced list 399 // following a label in an object literal ({a: {b: 1}}). 400 // A '<' could be an object used in a comparison, but that is nonsense 401 // code (can never return true), so more likely it is a generic type 402 // argument (`X<{a: string; b: number}>`). 403 // The code below could be confused by semicolons between the 404 // individual members in a type member list, which would normally 405 // trigger BK_Block. In both cases, this must be parsed as an inline 406 // braced init. 407 Tok->BlockKind = BK_BracedInit; 408 else if (PrevTok->is(tok::r_paren)) 409 // `) { }` can only occur in function or method declarations in JS. 410 Tok->BlockKind = BK_Block; 411 } else { 412 Tok->BlockKind = BK_Unknown; 413 } 414 LBraceStack.push_back(Tok); 415 break; 416 case tok::r_brace: 417 if (LBraceStack.empty()) 418 break; 419 if (LBraceStack.back()->BlockKind == BK_Unknown) { 420 bool ProbablyBracedList = false; 421 if (Style.Language == FormatStyle::LK_Proto) { 422 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 423 } else { 424 // Using OriginalColumn to distinguish between ObjC methods and 425 // binary operators is a bit hacky. 426 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 427 NextTok->OriginalColumn == 0; 428 429 // If there is a comma, semicolon or right paren after the closing 430 // brace, we assume this is a braced initializer list. Note that 431 // regardless how we mark inner braces here, we will overwrite the 432 // BlockKind later if we parse a braced list (where all blocks 433 // inside are by default braced lists), or when we explicitly detect 434 // blocks (for example while parsing lambdas). 435 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 436 // braced list in JS. 437 ProbablyBracedList = 438 (Style.Language == FormatStyle::LK_JavaScript && 439 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 440 Keywords.kw_as)) || 441 (Style.isCpp() && NextTok->is(tok::l_paren)) || 442 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 443 tok::r_paren, tok::r_square, tok::l_brace, 444 tok::ellipsis) || 445 (NextTok->is(tok::identifier) && 446 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 447 (NextTok->is(tok::semi) && 448 (!ExpectClassBody || LBraceStack.size() != 1)) || 449 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 450 if (NextTok->is(tok::l_square)) { 451 // We can have an array subscript after a braced init 452 // list, but C++11 attributes are expected after blocks. 453 NextTok = Tokens->getNextToken(); 454 ++ReadTokens; 455 ProbablyBracedList = NextTok->isNot(tok::l_square); 456 } 457 } 458 if (ProbablyBracedList) { 459 Tok->BlockKind = BK_BracedInit; 460 LBraceStack.back()->BlockKind = BK_BracedInit; 461 } else { 462 Tok->BlockKind = BK_Block; 463 LBraceStack.back()->BlockKind = BK_Block; 464 } 465 } 466 LBraceStack.pop_back(); 467 break; 468 case tok::at: 469 case tok::semi: 470 case tok::kw_if: 471 case tok::kw_while: 472 case tok::kw_for: 473 case tok::kw_switch: 474 case tok::kw_try: 475 case tok::kw___try: 476 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 477 LBraceStack.back()->BlockKind = BK_Block; 478 break; 479 default: 480 break; 481 } 482 PrevTok = Tok; 483 Tok = NextTok; 484 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 485 486 // Assume other blocks for all unclosed opening braces. 487 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 488 if (LBraceStack[i]->BlockKind == BK_Unknown) 489 LBraceStack[i]->BlockKind = BK_Block; 490 } 491 492 FormatTok = Tokens->setPosition(StoredPosition); 493 } 494 495 template <class T> 496 static inline void hash_combine(std::size_t &seed, const T &v) { 497 std::hash<T> hasher; 498 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 499 } 500 501 size_t UnwrappedLineParser::computePPHash() const { 502 size_t h = 0; 503 for (const auto &i : PPStack) { 504 hash_combine(h, size_t(i.Kind)); 505 hash_combine(h, i.Line); 506 } 507 return h; 508 } 509 510 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 511 bool MunchSemi) { 512 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 513 "'{' or macro block token expected"); 514 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 515 FormatTok->BlockKind = BK_Block; 516 517 size_t PPStartHash = computePPHash(); 518 519 unsigned InitialLevel = Line->Level; 520 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 521 522 if (MacroBlock && FormatTok->is(tok::l_paren)) 523 parseParens(); 524 525 size_t NbPreprocessorDirectives = 526 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 527 addUnwrappedLine(); 528 size_t OpeningLineIndex = 529 CurrentLines->empty() 530 ? (UnwrappedLine::kInvalidIndex) 531 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 532 533 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 534 MustBeDeclaration); 535 if (AddLevel) 536 ++Line->Level; 537 parseLevel(/*HasOpeningBrace=*/true); 538 539 if (eof()) 540 return; 541 542 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 543 : !FormatTok->is(tok::r_brace)) { 544 Line->Level = InitialLevel; 545 FormatTok->BlockKind = BK_Block; 546 return; 547 } 548 549 size_t PPEndHash = computePPHash(); 550 551 // Munch the closing brace. 552 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 553 554 if (MacroBlock && FormatTok->is(tok::l_paren)) 555 parseParens(); 556 557 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 558 nextToken(); 559 Line->Level = InitialLevel; 560 561 if (PPStartHash == PPEndHash) { 562 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 563 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 564 // Update the opening line to add the forward reference as well 565 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 566 CurrentLines->size() - 1; 567 } 568 } 569 } 570 571 static bool isGoogScope(const UnwrappedLine &Line) { 572 // FIXME: Closure-library specific stuff should not be hard-coded but be 573 // configurable. 574 if (Line.Tokens.size() < 4) 575 return false; 576 auto I = Line.Tokens.begin(); 577 if (I->Tok->TokenText != "goog") 578 return false; 579 ++I; 580 if (I->Tok->isNot(tok::period)) 581 return false; 582 ++I; 583 if (I->Tok->TokenText != "scope") 584 return false; 585 ++I; 586 return I->Tok->is(tok::l_paren); 587 } 588 589 static bool isIIFE(const UnwrappedLine &Line, 590 const AdditionalKeywords &Keywords) { 591 // Look for the start of an immediately invoked anonymous function. 592 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 593 // This is commonly done in JavaScript to create a new, anonymous scope. 594 // Example: (function() { ... })() 595 if (Line.Tokens.size() < 3) 596 return false; 597 auto I = Line.Tokens.begin(); 598 if (I->Tok->isNot(tok::l_paren)) 599 return false; 600 ++I; 601 if (I->Tok->isNot(Keywords.kw_function)) 602 return false; 603 ++I; 604 return I->Tok->is(tok::l_paren); 605 } 606 607 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 608 const FormatToken &InitialToken) { 609 if (InitialToken.is(tok::kw_namespace)) 610 return Style.BraceWrapping.AfterNamespace; 611 if (InitialToken.is(tok::kw_class)) 612 return Style.BraceWrapping.AfterClass; 613 if (InitialToken.is(tok::kw_union)) 614 return Style.BraceWrapping.AfterUnion; 615 if (InitialToken.is(tok::kw_struct)) 616 return Style.BraceWrapping.AfterStruct; 617 return false; 618 } 619 620 void UnwrappedLineParser::parseChildBlock() { 621 FormatTok->BlockKind = BK_Block; 622 nextToken(); 623 { 624 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 625 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 626 ScopedLineState LineState(*this); 627 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 628 /*MustBeDeclaration=*/false); 629 Line->Level += SkipIndent ? 0 : 1; 630 parseLevel(/*HasOpeningBrace=*/true); 631 flushComments(isOnNewLine(*FormatTok)); 632 Line->Level -= SkipIndent ? 0 : 1; 633 } 634 nextToken(); 635 } 636 637 void UnwrappedLineParser::parsePPDirective() { 638 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 639 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 640 nextToken(); 641 642 if (!FormatTok->Tok.getIdentifierInfo()) { 643 parsePPUnknown(); 644 return; 645 } 646 647 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 648 case tok::pp_define: 649 parsePPDefine(); 650 return; 651 case tok::pp_if: 652 parsePPIf(/*IfDef=*/false); 653 break; 654 case tok::pp_ifdef: 655 case tok::pp_ifndef: 656 parsePPIf(/*IfDef=*/true); 657 break; 658 case tok::pp_else: 659 parsePPElse(); 660 break; 661 case tok::pp_elif: 662 parsePPElIf(); 663 break; 664 case tok::pp_endif: 665 parsePPEndIf(); 666 break; 667 default: 668 parsePPUnknown(); 669 break; 670 } 671 } 672 673 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 674 size_t Line = CurrentLines->size(); 675 if (CurrentLines == &PreprocessorDirectives) 676 Line += Lines.size(); 677 678 if (Unreachable || 679 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 680 PPStack.push_back({PP_Unreachable, Line}); 681 else 682 PPStack.push_back({PP_Conditional, Line}); 683 } 684 685 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 686 ++PPBranchLevel; 687 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 688 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 689 PPLevelBranchIndex.push_back(0); 690 PPLevelBranchCount.push_back(0); 691 } 692 PPChainBranchIndex.push(0); 693 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 694 conditionalCompilationCondition(Unreachable || Skip); 695 } 696 697 void UnwrappedLineParser::conditionalCompilationAlternative() { 698 if (!PPStack.empty()) 699 PPStack.pop_back(); 700 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 701 if (!PPChainBranchIndex.empty()) 702 ++PPChainBranchIndex.top(); 703 conditionalCompilationCondition( 704 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 705 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 706 } 707 708 void UnwrappedLineParser::conditionalCompilationEnd() { 709 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 710 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 711 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 712 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 713 } 714 } 715 // Guard against #endif's without #if. 716 if (PPBranchLevel > -1) 717 --PPBranchLevel; 718 if (!PPChainBranchIndex.empty()) 719 PPChainBranchIndex.pop(); 720 if (!PPStack.empty()) 721 PPStack.pop_back(); 722 } 723 724 void UnwrappedLineParser::parsePPIf(bool IfDef) { 725 bool IfNDef = FormatTok->is(tok::pp_ifndef); 726 nextToken(); 727 bool Unreachable = false; 728 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 729 Unreachable = true; 730 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 731 Unreachable = true; 732 conditionalCompilationStart(Unreachable); 733 FormatToken *IfCondition = FormatTok; 734 // If there's a #ifndef on the first line, and the only lines before it are 735 // comments, it could be an include guard. 736 bool MaybeIncludeGuard = IfNDef; 737 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 738 for (auto &Line : Lines) { 739 if (!Line.Tokens.front().Tok->is(tok::comment)) { 740 MaybeIncludeGuard = false; 741 IncludeGuard = IG_Rejected; 742 break; 743 } 744 } 745 --PPBranchLevel; 746 parsePPUnknown(); 747 ++PPBranchLevel; 748 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 749 IncludeGuard = IG_IfNdefed; 750 IncludeGuardToken = IfCondition; 751 } 752 } 753 754 void UnwrappedLineParser::parsePPElse() { 755 // If a potential include guard has an #else, it's not an include guard. 756 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 757 IncludeGuard = IG_Rejected; 758 conditionalCompilationAlternative(); 759 if (PPBranchLevel > -1) 760 --PPBranchLevel; 761 parsePPUnknown(); 762 ++PPBranchLevel; 763 } 764 765 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 766 767 void UnwrappedLineParser::parsePPEndIf() { 768 conditionalCompilationEnd(); 769 parsePPUnknown(); 770 // If the #endif of a potential include guard is the last thing in the file, 771 // then we found an include guard. 772 unsigned TokenPosition = Tokens->getPosition(); 773 FormatToken *PeekNext = AllTokens[TokenPosition]; 774 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 775 PeekNext->is(tok::eof) && 776 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 777 IncludeGuard = IG_Found; 778 } 779 780 void UnwrappedLineParser::parsePPDefine() { 781 nextToken(); 782 783 if (FormatTok->Tok.getKind() != tok::identifier) { 784 IncludeGuard = IG_Rejected; 785 IncludeGuardToken = nullptr; 786 parsePPUnknown(); 787 return; 788 } 789 790 if (IncludeGuard == IG_IfNdefed && 791 IncludeGuardToken->TokenText == FormatTok->TokenText) { 792 IncludeGuard = IG_Defined; 793 IncludeGuardToken = nullptr; 794 for (auto &Line : Lines) { 795 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 796 IncludeGuard = IG_Rejected; 797 break; 798 } 799 } 800 } 801 802 nextToken(); 803 if (FormatTok->Tok.getKind() == tok::l_paren && 804 FormatTok->WhitespaceRange.getBegin() == 805 FormatTok->WhitespaceRange.getEnd()) { 806 parseParens(); 807 } 808 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 809 Line->Level += PPBranchLevel + 1; 810 addUnwrappedLine(); 811 ++Line->Level; 812 813 // Errors during a preprocessor directive can only affect the layout of the 814 // preprocessor directive, and thus we ignore them. An alternative approach 815 // would be to use the same approach we use on the file level (no 816 // re-indentation if there was a structural error) within the macro 817 // definition. 818 parseFile(); 819 } 820 821 void UnwrappedLineParser::parsePPUnknown() { 822 do { 823 nextToken(); 824 } while (!eof()); 825 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 826 Line->Level += PPBranchLevel + 1; 827 addUnwrappedLine(); 828 } 829 830 // Here we blacklist certain tokens that are not usually the first token in an 831 // unwrapped line. This is used in attempt to distinguish macro calls without 832 // trailing semicolons from other constructs split to several lines. 833 static bool tokenCanStartNewLine(const clang::Token &Tok) { 834 // Semicolon can be a null-statement, l_square can be a start of a macro or 835 // a C++11 attribute, but this doesn't seem to be common. 836 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 837 Tok.isNot(tok::l_square) && 838 // Tokens that can only be used as binary operators and a part of 839 // overloaded operator names. 840 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 841 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 842 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 843 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 844 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 845 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 846 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 847 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 848 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 849 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 850 Tok.isNot(tok::lesslessequal) && 851 // Colon is used in labels, base class lists, initializer lists, 852 // range-based for loops, ternary operator, but should never be the 853 // first token in an unwrapped line. 854 Tok.isNot(tok::colon) && 855 // 'noexcept' is a trailing annotation. 856 Tok.isNot(tok::kw_noexcept); 857 } 858 859 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 860 const FormatToken *FormatTok) { 861 // FIXME: This returns true for C/C++ keywords like 'struct'. 862 return FormatTok->is(tok::identifier) && 863 (FormatTok->Tok.getIdentifierInfo() == nullptr || 864 !FormatTok->isOneOf( 865 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 866 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 867 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 868 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 869 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 870 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 871 Keywords.kw_from)); 872 } 873 874 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 875 const FormatToken *FormatTok) { 876 return FormatTok->Tok.isLiteral() || 877 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 878 mustBeJSIdent(Keywords, FormatTok); 879 } 880 881 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 882 // when encountered after a value (see mustBeJSIdentOrValue). 883 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 884 const FormatToken *FormatTok) { 885 return FormatTok->isOneOf( 886 tok::kw_return, Keywords.kw_yield, 887 // conditionals 888 tok::kw_if, tok::kw_else, 889 // loops 890 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 891 // switch/case 892 tok::kw_switch, tok::kw_case, 893 // exceptions 894 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 895 // declaration 896 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 897 Keywords.kw_async, Keywords.kw_function, 898 // import/export 899 Keywords.kw_import, tok::kw_export); 900 } 901 902 // readTokenWithJavaScriptASI reads the next token and terminates the current 903 // line if JavaScript Automatic Semicolon Insertion must 904 // happen between the current token and the next token. 905 // 906 // This method is conservative - it cannot cover all edge cases of JavaScript, 907 // but only aims to correctly handle certain well known cases. It *must not* 908 // return true in speculative cases. 909 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 910 FormatToken *Previous = FormatTok; 911 readToken(); 912 FormatToken *Next = FormatTok; 913 914 bool IsOnSameLine = 915 CommentsBeforeNextToken.empty() 916 ? Next->NewlinesBefore == 0 917 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 918 if (IsOnSameLine) 919 return; 920 921 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 922 bool PreviousStartsTemplateExpr = 923 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 924 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 925 // If the line contains an '@' sign, the previous token might be an 926 // annotation, which can precede another identifier/value. 927 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 928 [](UnwrappedLineNode &LineNode) { 929 return LineNode.Tok->is(tok::at); 930 }) != Line->Tokens.end(); 931 if (HasAt) 932 return; 933 } 934 if (Next->is(tok::exclaim) && PreviousMustBeValue) 935 return addUnwrappedLine(); 936 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 937 bool NextEndsTemplateExpr = 938 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 939 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 940 (PreviousMustBeValue || 941 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 942 tok::minusminus))) 943 return addUnwrappedLine(); 944 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 945 isJSDeclOrStmt(Keywords, Next)) 946 return addUnwrappedLine(); 947 } 948 949 void UnwrappedLineParser::parseStructuralElement() { 950 assert(!FormatTok->is(tok::l_brace)); 951 if (Style.Language == FormatStyle::LK_TableGen && 952 FormatTok->is(tok::pp_include)) { 953 nextToken(); 954 if (FormatTok->is(tok::string_literal)) 955 nextToken(); 956 addUnwrappedLine(); 957 return; 958 } 959 switch (FormatTok->Tok.getKind()) { 960 case tok::kw_asm: 961 nextToken(); 962 if (FormatTok->is(tok::l_brace)) { 963 FormatTok->Type = TT_InlineASMBrace; 964 nextToken(); 965 while (FormatTok && FormatTok->isNot(tok::eof)) { 966 if (FormatTok->is(tok::r_brace)) { 967 FormatTok->Type = TT_InlineASMBrace; 968 nextToken(); 969 addUnwrappedLine(); 970 break; 971 } 972 FormatTok->Finalized = true; 973 nextToken(); 974 } 975 } 976 break; 977 case tok::kw_namespace: 978 parseNamespace(); 979 return; 980 case tok::kw_inline: 981 nextToken(); 982 if (FormatTok->Tok.is(tok::kw_namespace)) { 983 parseNamespace(); 984 return; 985 } 986 break; 987 case tok::kw_public: 988 case tok::kw_protected: 989 case tok::kw_private: 990 if (Style.Language == FormatStyle::LK_Java || 991 Style.Language == FormatStyle::LK_JavaScript) 992 nextToken(); 993 else 994 parseAccessSpecifier(); 995 return; 996 case tok::kw_if: 997 parseIfThenElse(); 998 return; 999 case tok::kw_for: 1000 case tok::kw_while: 1001 parseForOrWhileLoop(); 1002 return; 1003 case tok::kw_do: 1004 parseDoWhile(); 1005 return; 1006 case tok::kw_switch: 1007 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1008 // 'switch: string' field declaration. 1009 break; 1010 parseSwitch(); 1011 return; 1012 case tok::kw_default: 1013 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1014 // 'default: string' field declaration. 1015 break; 1016 nextToken(); 1017 if (FormatTok->is(tok::colon)) { 1018 parseLabel(); 1019 return; 1020 } 1021 // e.g. "default void f() {}" in a Java interface. 1022 break; 1023 case tok::kw_case: 1024 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1025 // 'case: string' field declaration. 1026 break; 1027 parseCaseLabel(); 1028 return; 1029 case tok::kw_try: 1030 case tok::kw___try: 1031 parseTryCatch(); 1032 return; 1033 case tok::kw_extern: 1034 nextToken(); 1035 if (FormatTok->Tok.is(tok::string_literal)) { 1036 nextToken(); 1037 if (FormatTok->Tok.is(tok::l_brace)) { 1038 if (Style.BraceWrapping.AfterExternBlock) { 1039 addUnwrappedLine(); 1040 parseBlock(/*MustBeDeclaration=*/true); 1041 } else { 1042 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1043 } 1044 addUnwrappedLine(); 1045 return; 1046 } 1047 } 1048 break; 1049 case tok::kw_export: 1050 if (Style.Language == FormatStyle::LK_JavaScript) { 1051 parseJavaScriptEs6ImportExport(); 1052 return; 1053 } 1054 break; 1055 case tok::identifier: 1056 if (FormatTok->is(TT_ForEachMacro)) { 1057 parseForOrWhileLoop(); 1058 return; 1059 } 1060 if (FormatTok->is(TT_MacroBlockBegin)) { 1061 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1062 /*MunchSemi=*/false); 1063 return; 1064 } 1065 if (FormatTok->is(Keywords.kw_import)) { 1066 if (Style.Language == FormatStyle::LK_JavaScript) { 1067 parseJavaScriptEs6ImportExport(); 1068 return; 1069 } 1070 if (Style.Language == FormatStyle::LK_Proto) { 1071 nextToken(); 1072 if (FormatTok->is(tok::kw_public)) 1073 nextToken(); 1074 if (!FormatTok->is(tok::string_literal)) 1075 return; 1076 nextToken(); 1077 if (FormatTok->is(tok::semi)) 1078 nextToken(); 1079 addUnwrappedLine(); 1080 return; 1081 } 1082 } 1083 if (Style.isCpp() && 1084 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1085 Keywords.kw_slots, Keywords.kw_qslots)) { 1086 nextToken(); 1087 if (FormatTok->is(tok::colon)) { 1088 nextToken(); 1089 addUnwrappedLine(); 1090 return; 1091 } 1092 } 1093 // In all other cases, parse the declaration. 1094 break; 1095 default: 1096 break; 1097 } 1098 do { 1099 const FormatToken *Previous = FormatTok->Previous; 1100 switch (FormatTok->Tok.getKind()) { 1101 case tok::at: 1102 nextToken(); 1103 if (FormatTok->Tok.is(tok::l_brace)) { 1104 nextToken(); 1105 parseBracedList(); 1106 break; 1107 } 1108 switch (FormatTok->Tok.getObjCKeywordID()) { 1109 case tok::objc_public: 1110 case tok::objc_protected: 1111 case tok::objc_package: 1112 case tok::objc_private: 1113 return parseAccessSpecifier(); 1114 case tok::objc_interface: 1115 case tok::objc_implementation: 1116 return parseObjCInterfaceOrImplementation(); 1117 case tok::objc_protocol: 1118 if (parseObjCProtocol()) 1119 return; 1120 break; 1121 case tok::objc_end: 1122 return; // Handled by the caller. 1123 case tok::objc_optional: 1124 case tok::objc_required: 1125 nextToken(); 1126 addUnwrappedLine(); 1127 return; 1128 case tok::objc_autoreleasepool: 1129 nextToken(); 1130 if (FormatTok->Tok.is(tok::l_brace)) { 1131 if (Style.BraceWrapping.AfterControlStatement) 1132 addUnwrappedLine(); 1133 parseBlock(/*MustBeDeclaration=*/false); 1134 } 1135 addUnwrappedLine(); 1136 return; 1137 case tok::objc_synchronized: 1138 nextToken(); 1139 if (FormatTok->Tok.is(tok::l_paren)) 1140 // Skip synchronization object 1141 parseParens(); 1142 if (FormatTok->Tok.is(tok::l_brace)) { 1143 if (Style.BraceWrapping.AfterControlStatement) 1144 addUnwrappedLine(); 1145 parseBlock(/*MustBeDeclaration=*/false); 1146 } 1147 addUnwrappedLine(); 1148 return; 1149 case tok::objc_try: 1150 // This branch isn't strictly necessary (the kw_try case below would 1151 // do this too after the tok::at is parsed above). But be explicit. 1152 parseTryCatch(); 1153 return; 1154 default: 1155 break; 1156 } 1157 break; 1158 case tok::kw_enum: 1159 // Ignore if this is part of "template <enum ...". 1160 if (Previous && Previous->is(tok::less)) { 1161 nextToken(); 1162 break; 1163 } 1164 1165 // parseEnum falls through and does not yet add an unwrapped line as an 1166 // enum definition can start a structural element. 1167 if (!parseEnum()) 1168 break; 1169 // This only applies for C++. 1170 if (!Style.isCpp()) { 1171 addUnwrappedLine(); 1172 return; 1173 } 1174 break; 1175 case tok::kw_typedef: 1176 nextToken(); 1177 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1178 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1179 parseEnum(); 1180 break; 1181 case tok::kw_struct: 1182 case tok::kw_union: 1183 case tok::kw_class: 1184 // parseRecord falls through and does not yet add an unwrapped line as a 1185 // record declaration or definition can start a structural element. 1186 parseRecord(); 1187 // This does not apply for Java and JavaScript. 1188 if (Style.Language == FormatStyle::LK_Java || 1189 Style.Language == FormatStyle::LK_JavaScript) { 1190 if (FormatTok->is(tok::semi)) 1191 nextToken(); 1192 addUnwrappedLine(); 1193 return; 1194 } 1195 break; 1196 case tok::period: 1197 nextToken(); 1198 // In Java, classes have an implicit static member "class". 1199 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1200 FormatTok->is(tok::kw_class)) 1201 nextToken(); 1202 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1203 FormatTok->Tok.getIdentifierInfo()) 1204 // JavaScript only has pseudo keywords, all keywords are allowed to 1205 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1206 nextToken(); 1207 break; 1208 case tok::semi: 1209 nextToken(); 1210 addUnwrappedLine(); 1211 return; 1212 case tok::r_brace: 1213 addUnwrappedLine(); 1214 return; 1215 case tok::l_paren: 1216 parseParens(); 1217 break; 1218 case tok::kw_operator: 1219 nextToken(); 1220 if (FormatTok->isBinaryOperator()) 1221 nextToken(); 1222 break; 1223 case tok::caret: 1224 nextToken(); 1225 if (FormatTok->Tok.isAnyIdentifier() || 1226 FormatTok->isSimpleTypeSpecifier()) 1227 nextToken(); 1228 if (FormatTok->is(tok::l_paren)) 1229 parseParens(); 1230 if (FormatTok->is(tok::l_brace)) 1231 parseChildBlock(); 1232 break; 1233 case tok::l_brace: 1234 if (!tryToParseBracedList()) { 1235 // A block outside of parentheses must be the last part of a 1236 // structural element. 1237 // FIXME: Figure out cases where this is not true, and add projections 1238 // for them (the one we know is missing are lambdas). 1239 if (Style.BraceWrapping.AfterFunction) 1240 addUnwrappedLine(); 1241 FormatTok->Type = TT_FunctionLBrace; 1242 parseBlock(/*MustBeDeclaration=*/false); 1243 addUnwrappedLine(); 1244 return; 1245 } 1246 // Otherwise this was a braced init list, and the structural 1247 // element continues. 1248 break; 1249 case tok::kw_try: 1250 // We arrive here when parsing function-try blocks. 1251 parseTryCatch(); 1252 return; 1253 case tok::identifier: { 1254 if (FormatTok->is(TT_MacroBlockEnd)) { 1255 addUnwrappedLine(); 1256 return; 1257 } 1258 1259 // Function declarations (as opposed to function expressions) are parsed 1260 // on their own unwrapped line by continuing this loop. Function 1261 // expressions (functions that are not on their own line) must not create 1262 // a new unwrapped line, so they are special cased below. 1263 size_t TokenCount = Line->Tokens.size(); 1264 if (Style.Language == FormatStyle::LK_JavaScript && 1265 FormatTok->is(Keywords.kw_function) && 1266 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1267 Keywords.kw_async)))) { 1268 tryToParseJSFunction(); 1269 break; 1270 } 1271 if ((Style.Language == FormatStyle::LK_JavaScript || 1272 Style.Language == FormatStyle::LK_Java) && 1273 FormatTok->is(Keywords.kw_interface)) { 1274 if (Style.Language == FormatStyle::LK_JavaScript) { 1275 // In JavaScript/TypeScript, "interface" can be used as a standalone 1276 // identifier, e.g. in `var interface = 1;`. If "interface" is 1277 // followed by another identifier, it is very like to be an actual 1278 // interface declaration. 1279 unsigned StoredPosition = Tokens->getPosition(); 1280 FormatToken *Next = Tokens->getNextToken(); 1281 FormatTok = Tokens->setPosition(StoredPosition); 1282 if (Next && !mustBeJSIdent(Keywords, Next)) { 1283 nextToken(); 1284 break; 1285 } 1286 } 1287 parseRecord(); 1288 addUnwrappedLine(); 1289 return; 1290 } 1291 1292 // See if the following token should start a new unwrapped line. 1293 StringRef Text = FormatTok->TokenText; 1294 nextToken(); 1295 if (Line->Tokens.size() == 1 && 1296 // JS doesn't have macros, and within classes colons indicate fields, 1297 // not labels. 1298 Style.Language != FormatStyle::LK_JavaScript) { 1299 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1300 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1301 parseLabel(); 1302 return; 1303 } 1304 // Recognize function-like macro usages without trailing semicolon as 1305 // well as free-standing macros like Q_OBJECT. 1306 bool FunctionLike = FormatTok->is(tok::l_paren); 1307 if (FunctionLike) 1308 parseParens(); 1309 1310 bool FollowedByNewline = 1311 CommentsBeforeNextToken.empty() 1312 ? FormatTok->NewlinesBefore > 0 1313 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1314 1315 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1316 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1317 addUnwrappedLine(); 1318 return; 1319 } 1320 } 1321 break; 1322 } 1323 case tok::equal: 1324 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1325 // TT_JsFatArrow. The always start an expression or a child block if 1326 // followed by a curly. 1327 if (FormatTok->is(TT_JsFatArrow)) { 1328 nextToken(); 1329 if (FormatTok->is(tok::l_brace)) 1330 parseChildBlock(); 1331 break; 1332 } 1333 1334 nextToken(); 1335 if (FormatTok->Tok.is(tok::l_brace)) { 1336 nextToken(); 1337 parseBracedList(); 1338 } else if (Style.Language == FormatStyle::LK_Proto && 1339 FormatTok->Tok.is(tok::less)) { 1340 nextToken(); 1341 parseBracedList(/*ContinueOnSemicolons=*/false, 1342 /*ClosingBraceKind=*/tok::greater); 1343 } 1344 break; 1345 case tok::l_square: 1346 parseSquare(); 1347 break; 1348 case tok::kw_new: 1349 parseNew(); 1350 break; 1351 default: 1352 nextToken(); 1353 break; 1354 } 1355 } while (!eof()); 1356 } 1357 1358 bool UnwrappedLineParser::tryToParseLambda() { 1359 if (!Style.isCpp()) { 1360 nextToken(); 1361 return false; 1362 } 1363 assert(FormatTok->is(tok::l_square)); 1364 FormatToken &LSquare = *FormatTok; 1365 if (!tryToParseLambdaIntroducer()) 1366 return false; 1367 1368 while (FormatTok->isNot(tok::l_brace)) { 1369 if (FormatTok->isSimpleTypeSpecifier()) { 1370 nextToken(); 1371 continue; 1372 } 1373 switch (FormatTok->Tok.getKind()) { 1374 case tok::l_brace: 1375 break; 1376 case tok::l_paren: 1377 parseParens(); 1378 break; 1379 case tok::amp: 1380 case tok::star: 1381 case tok::kw_const: 1382 case tok::comma: 1383 case tok::less: 1384 case tok::greater: 1385 case tok::identifier: 1386 case tok::numeric_constant: 1387 case tok::coloncolon: 1388 case tok::kw_mutable: 1389 nextToken(); 1390 break; 1391 case tok::arrow: 1392 FormatTok->Type = TT_LambdaArrow; 1393 nextToken(); 1394 break; 1395 default: 1396 return true; 1397 } 1398 } 1399 LSquare.Type = TT_LambdaLSquare; 1400 parseChildBlock(); 1401 return true; 1402 } 1403 1404 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1405 const FormatToken *Previous = FormatTok->Previous; 1406 if (Previous && 1407 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1408 tok::kw_delete, tok::l_square) || 1409 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1410 Previous->isSimpleTypeSpecifier())) { 1411 nextToken(); 1412 return false; 1413 } 1414 nextToken(); 1415 if (FormatTok->is(tok::l_square)) { 1416 return false; 1417 } 1418 parseSquare(/*LambdaIntroducer=*/true); 1419 return true; 1420 } 1421 1422 void UnwrappedLineParser::tryToParseJSFunction() { 1423 assert(FormatTok->is(Keywords.kw_function) || 1424 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1425 if (FormatTok->is(Keywords.kw_async)) 1426 nextToken(); 1427 // Consume "function". 1428 nextToken(); 1429 1430 // Consume * (generator function). Treat it like C++'s overloaded operators. 1431 if (FormatTok->is(tok::star)) { 1432 FormatTok->Type = TT_OverloadedOperator; 1433 nextToken(); 1434 } 1435 1436 // Consume function name. 1437 if (FormatTok->is(tok::identifier)) 1438 nextToken(); 1439 1440 if (FormatTok->isNot(tok::l_paren)) 1441 return; 1442 1443 // Parse formal parameter list. 1444 parseParens(); 1445 1446 if (FormatTok->is(tok::colon)) { 1447 // Parse a type definition. 1448 nextToken(); 1449 1450 // Eat the type declaration. For braced inline object types, balance braces, 1451 // otherwise just parse until finding an l_brace for the function body. 1452 if (FormatTok->is(tok::l_brace)) 1453 tryToParseBracedList(); 1454 else 1455 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1456 nextToken(); 1457 } 1458 1459 if (FormatTok->is(tok::semi)) 1460 return; 1461 1462 parseChildBlock(); 1463 } 1464 1465 bool UnwrappedLineParser::tryToParseBracedList() { 1466 if (FormatTok->BlockKind == BK_Unknown) 1467 calculateBraceTypes(); 1468 assert(FormatTok->BlockKind != BK_Unknown); 1469 if (FormatTok->BlockKind == BK_Block) 1470 return false; 1471 nextToken(); 1472 parseBracedList(); 1473 return true; 1474 } 1475 1476 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1477 tok::TokenKind ClosingBraceKind) { 1478 bool HasError = false; 1479 1480 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1481 // replace this by using parseAssigmentExpression() inside. 1482 do { 1483 if (Style.Language == FormatStyle::LK_JavaScript) { 1484 if (FormatTok->is(Keywords.kw_function) || 1485 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1486 tryToParseJSFunction(); 1487 continue; 1488 } 1489 if (FormatTok->is(TT_JsFatArrow)) { 1490 nextToken(); 1491 // Fat arrows can be followed by simple expressions or by child blocks 1492 // in curly braces. 1493 if (FormatTok->is(tok::l_brace)) { 1494 parseChildBlock(); 1495 continue; 1496 } 1497 } 1498 if (FormatTok->is(tok::l_brace)) { 1499 // Could be a method inside of a braced list `{a() { return 1; }}`. 1500 if (tryToParseBracedList()) 1501 continue; 1502 parseChildBlock(); 1503 } 1504 } 1505 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1506 nextToken(); 1507 return !HasError; 1508 } 1509 switch (FormatTok->Tok.getKind()) { 1510 case tok::caret: 1511 nextToken(); 1512 if (FormatTok->is(tok::l_brace)) { 1513 parseChildBlock(); 1514 } 1515 break; 1516 case tok::l_square: 1517 tryToParseLambda(); 1518 break; 1519 case tok::l_paren: 1520 parseParens(); 1521 // JavaScript can just have free standing methods and getters/setters in 1522 // object literals. Detect them by a "{" following ")". 1523 if (Style.Language == FormatStyle::LK_JavaScript) { 1524 if (FormatTok->is(tok::l_brace)) 1525 parseChildBlock(); 1526 break; 1527 } 1528 break; 1529 case tok::l_brace: 1530 // Assume there are no blocks inside a braced init list apart 1531 // from the ones we explicitly parse out (like lambdas). 1532 FormatTok->BlockKind = BK_BracedInit; 1533 nextToken(); 1534 parseBracedList(); 1535 break; 1536 case tok::less: 1537 if (Style.Language == FormatStyle::LK_Proto) { 1538 nextToken(); 1539 parseBracedList(/*ContinueOnSemicolons=*/false, 1540 /*ClosingBraceKind=*/tok::greater); 1541 } else { 1542 nextToken(); 1543 } 1544 break; 1545 case tok::semi: 1546 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1547 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1548 // used for error recovery if we have otherwise determined that this is 1549 // a braced list. 1550 if (Style.Language == FormatStyle::LK_JavaScript) { 1551 nextToken(); 1552 break; 1553 } 1554 HasError = true; 1555 if (!ContinueOnSemicolons) 1556 return !HasError; 1557 nextToken(); 1558 break; 1559 case tok::comma: 1560 nextToken(); 1561 break; 1562 default: 1563 nextToken(); 1564 break; 1565 } 1566 } while (!eof()); 1567 return false; 1568 } 1569 1570 void UnwrappedLineParser::parseParens() { 1571 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1572 nextToken(); 1573 do { 1574 switch (FormatTok->Tok.getKind()) { 1575 case tok::l_paren: 1576 parseParens(); 1577 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1578 parseChildBlock(); 1579 break; 1580 case tok::r_paren: 1581 nextToken(); 1582 return; 1583 case tok::r_brace: 1584 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1585 return; 1586 case tok::l_square: 1587 tryToParseLambda(); 1588 break; 1589 case tok::l_brace: 1590 if (!tryToParseBracedList()) 1591 parseChildBlock(); 1592 break; 1593 case tok::at: 1594 nextToken(); 1595 if (FormatTok->Tok.is(tok::l_brace)) { 1596 nextToken(); 1597 parseBracedList(); 1598 } 1599 break; 1600 case tok::kw_class: 1601 if (Style.Language == FormatStyle::LK_JavaScript) 1602 parseRecord(/*ParseAsExpr=*/true); 1603 else 1604 nextToken(); 1605 break; 1606 case tok::identifier: 1607 if (Style.Language == FormatStyle::LK_JavaScript && 1608 (FormatTok->is(Keywords.kw_function) || 1609 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1610 tryToParseJSFunction(); 1611 else 1612 nextToken(); 1613 break; 1614 default: 1615 nextToken(); 1616 break; 1617 } 1618 } while (!eof()); 1619 } 1620 1621 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1622 if (!LambdaIntroducer) { 1623 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1624 if (tryToParseLambda()) 1625 return; 1626 } 1627 do { 1628 switch (FormatTok->Tok.getKind()) { 1629 case tok::l_paren: 1630 parseParens(); 1631 break; 1632 case tok::r_square: 1633 nextToken(); 1634 return; 1635 case tok::r_brace: 1636 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1637 return; 1638 case tok::l_square: 1639 parseSquare(); 1640 break; 1641 case tok::l_brace: { 1642 if (!tryToParseBracedList()) 1643 parseChildBlock(); 1644 break; 1645 } 1646 case tok::at: 1647 nextToken(); 1648 if (FormatTok->Tok.is(tok::l_brace)) { 1649 nextToken(); 1650 parseBracedList(); 1651 } 1652 break; 1653 default: 1654 nextToken(); 1655 break; 1656 } 1657 } while (!eof()); 1658 } 1659 1660 void UnwrappedLineParser::parseIfThenElse() { 1661 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1662 nextToken(); 1663 if (FormatTok->Tok.is(tok::kw_constexpr)) 1664 nextToken(); 1665 if (FormatTok->Tok.is(tok::l_paren)) 1666 parseParens(); 1667 bool NeedsUnwrappedLine = false; 1668 if (FormatTok->Tok.is(tok::l_brace)) { 1669 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1670 parseBlock(/*MustBeDeclaration=*/false); 1671 if (Style.BraceWrapping.BeforeElse) 1672 addUnwrappedLine(); 1673 else 1674 NeedsUnwrappedLine = true; 1675 } else { 1676 addUnwrappedLine(); 1677 ++Line->Level; 1678 parseStructuralElement(); 1679 --Line->Level; 1680 } 1681 if (FormatTok->Tok.is(tok::kw_else)) { 1682 nextToken(); 1683 if (FormatTok->Tok.is(tok::l_brace)) { 1684 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1685 parseBlock(/*MustBeDeclaration=*/false); 1686 addUnwrappedLine(); 1687 } else if (FormatTok->Tok.is(tok::kw_if)) { 1688 parseIfThenElse(); 1689 } else { 1690 addUnwrappedLine(); 1691 ++Line->Level; 1692 parseStructuralElement(); 1693 if (FormatTok->is(tok::eof)) 1694 addUnwrappedLine(); 1695 --Line->Level; 1696 } 1697 } else if (NeedsUnwrappedLine) { 1698 addUnwrappedLine(); 1699 } 1700 } 1701 1702 void UnwrappedLineParser::parseTryCatch() { 1703 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1704 nextToken(); 1705 bool NeedsUnwrappedLine = false; 1706 if (FormatTok->is(tok::colon)) { 1707 // We are in a function try block, what comes is an initializer list. 1708 nextToken(); 1709 while (FormatTok->is(tok::identifier)) { 1710 nextToken(); 1711 if (FormatTok->is(tok::l_paren)) 1712 parseParens(); 1713 if (FormatTok->is(tok::comma)) 1714 nextToken(); 1715 } 1716 } 1717 // Parse try with resource. 1718 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1719 parseParens(); 1720 } 1721 if (FormatTok->is(tok::l_brace)) { 1722 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1723 parseBlock(/*MustBeDeclaration=*/false); 1724 if (Style.BraceWrapping.BeforeCatch) { 1725 addUnwrappedLine(); 1726 } else { 1727 NeedsUnwrappedLine = true; 1728 } 1729 } else if (!FormatTok->is(tok::kw_catch)) { 1730 // The C++ standard requires a compound-statement after a try. 1731 // If there's none, we try to assume there's a structuralElement 1732 // and try to continue. 1733 addUnwrappedLine(); 1734 ++Line->Level; 1735 parseStructuralElement(); 1736 --Line->Level; 1737 } 1738 while (1) { 1739 if (FormatTok->is(tok::at)) 1740 nextToken(); 1741 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1742 tok::kw___finally) || 1743 ((Style.Language == FormatStyle::LK_Java || 1744 Style.Language == FormatStyle::LK_JavaScript) && 1745 FormatTok->is(Keywords.kw_finally)) || 1746 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1747 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1748 break; 1749 nextToken(); 1750 while (FormatTok->isNot(tok::l_brace)) { 1751 if (FormatTok->is(tok::l_paren)) { 1752 parseParens(); 1753 continue; 1754 } 1755 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1756 return; 1757 nextToken(); 1758 } 1759 NeedsUnwrappedLine = false; 1760 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1761 parseBlock(/*MustBeDeclaration=*/false); 1762 if (Style.BraceWrapping.BeforeCatch) 1763 addUnwrappedLine(); 1764 else 1765 NeedsUnwrappedLine = true; 1766 } 1767 if (NeedsUnwrappedLine) 1768 addUnwrappedLine(); 1769 } 1770 1771 void UnwrappedLineParser::parseNamespace() { 1772 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1773 1774 const FormatToken &InitialToken = *FormatTok; 1775 nextToken(); 1776 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1777 nextToken(); 1778 if (FormatTok->Tok.is(tok::l_brace)) { 1779 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1780 addUnwrappedLine(); 1781 1782 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1783 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1784 DeclarationScopeStack.size() > 1); 1785 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1786 // Munch the semicolon after a namespace. This is more common than one would 1787 // think. Puttin the semicolon into its own line is very ugly. 1788 if (FormatTok->Tok.is(tok::semi)) 1789 nextToken(); 1790 addUnwrappedLine(); 1791 } 1792 // FIXME: Add error handling. 1793 } 1794 1795 void UnwrappedLineParser::parseNew() { 1796 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1797 nextToken(); 1798 if (Style.Language != FormatStyle::LK_Java) 1799 return; 1800 1801 // In Java, we can parse everything up to the parens, which aren't optional. 1802 do { 1803 // There should not be a ;, { or } before the new's open paren. 1804 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1805 return; 1806 1807 // Consume the parens. 1808 if (FormatTok->is(tok::l_paren)) { 1809 parseParens(); 1810 1811 // If there is a class body of an anonymous class, consume that as child. 1812 if (FormatTok->is(tok::l_brace)) 1813 parseChildBlock(); 1814 return; 1815 } 1816 nextToken(); 1817 } while (!eof()); 1818 } 1819 1820 void UnwrappedLineParser::parseForOrWhileLoop() { 1821 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1822 "'for', 'while' or foreach macro expected"); 1823 nextToken(); 1824 // JS' for await ( ... 1825 if (Style.Language == FormatStyle::LK_JavaScript && 1826 FormatTok->is(Keywords.kw_await)) 1827 nextToken(); 1828 if (FormatTok->Tok.is(tok::l_paren)) 1829 parseParens(); 1830 if (FormatTok->Tok.is(tok::l_brace)) { 1831 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1832 parseBlock(/*MustBeDeclaration=*/false); 1833 addUnwrappedLine(); 1834 } else { 1835 addUnwrappedLine(); 1836 ++Line->Level; 1837 parseStructuralElement(); 1838 --Line->Level; 1839 } 1840 } 1841 1842 void UnwrappedLineParser::parseDoWhile() { 1843 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1844 nextToken(); 1845 if (FormatTok->Tok.is(tok::l_brace)) { 1846 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1847 parseBlock(/*MustBeDeclaration=*/false); 1848 if (Style.BraceWrapping.IndentBraces) 1849 addUnwrappedLine(); 1850 } else { 1851 addUnwrappedLine(); 1852 ++Line->Level; 1853 parseStructuralElement(); 1854 --Line->Level; 1855 } 1856 1857 // FIXME: Add error handling. 1858 if (!FormatTok->Tok.is(tok::kw_while)) { 1859 addUnwrappedLine(); 1860 return; 1861 } 1862 1863 nextToken(); 1864 parseStructuralElement(); 1865 } 1866 1867 void UnwrappedLineParser::parseLabel() { 1868 nextToken(); 1869 unsigned OldLineLevel = Line->Level; 1870 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1871 --Line->Level; 1872 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1873 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1874 parseBlock(/*MustBeDeclaration=*/false); 1875 if (FormatTok->Tok.is(tok::kw_break)) { 1876 if (Style.BraceWrapping.AfterControlStatement) 1877 addUnwrappedLine(); 1878 parseStructuralElement(); 1879 } 1880 addUnwrappedLine(); 1881 } else { 1882 if (FormatTok->is(tok::semi)) 1883 nextToken(); 1884 addUnwrappedLine(); 1885 } 1886 Line->Level = OldLineLevel; 1887 if (FormatTok->isNot(tok::l_brace)) { 1888 parseStructuralElement(); 1889 addUnwrappedLine(); 1890 } 1891 } 1892 1893 void UnwrappedLineParser::parseCaseLabel() { 1894 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1895 // FIXME: fix handling of complex expressions here. 1896 do { 1897 nextToken(); 1898 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1899 parseLabel(); 1900 } 1901 1902 void UnwrappedLineParser::parseSwitch() { 1903 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1904 nextToken(); 1905 if (FormatTok->Tok.is(tok::l_paren)) 1906 parseParens(); 1907 if (FormatTok->Tok.is(tok::l_brace)) { 1908 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1909 parseBlock(/*MustBeDeclaration=*/false); 1910 addUnwrappedLine(); 1911 } else { 1912 addUnwrappedLine(); 1913 ++Line->Level; 1914 parseStructuralElement(); 1915 --Line->Level; 1916 } 1917 } 1918 1919 void UnwrappedLineParser::parseAccessSpecifier() { 1920 nextToken(); 1921 // Understand Qt's slots. 1922 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1923 nextToken(); 1924 // Otherwise, we don't know what it is, and we'd better keep the next token. 1925 if (FormatTok->Tok.is(tok::colon)) 1926 nextToken(); 1927 addUnwrappedLine(); 1928 } 1929 1930 bool UnwrappedLineParser::parseEnum() { 1931 // Won't be 'enum' for NS_ENUMs. 1932 if (FormatTok->Tok.is(tok::kw_enum)) 1933 nextToken(); 1934 1935 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1936 // declarations. An "enum" keyword followed by a colon would be a syntax 1937 // error and thus assume it is just an identifier. 1938 if (Style.Language == FormatStyle::LK_JavaScript && 1939 FormatTok->isOneOf(tok::colon, tok::question)) 1940 return false; 1941 1942 // Eat up enum class ... 1943 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1944 nextToken(); 1945 1946 while (FormatTok->Tok.getIdentifierInfo() || 1947 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1948 tok::greater, tok::comma, tok::question)) { 1949 nextToken(); 1950 // We can have macros or attributes in between 'enum' and the enum name. 1951 if (FormatTok->is(tok::l_paren)) 1952 parseParens(); 1953 if (FormatTok->is(tok::identifier)) { 1954 nextToken(); 1955 // If there are two identifiers in a row, this is likely an elaborate 1956 // return type. In Java, this can be "implements", etc. 1957 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1958 return false; 1959 } 1960 } 1961 1962 // Just a declaration or something is wrong. 1963 if (FormatTok->isNot(tok::l_brace)) 1964 return true; 1965 FormatTok->BlockKind = BK_Block; 1966 1967 if (Style.Language == FormatStyle::LK_Java) { 1968 // Java enums are different. 1969 parseJavaEnumBody(); 1970 return true; 1971 } 1972 if (Style.Language == FormatStyle::LK_Proto) { 1973 parseBlock(/*MustBeDeclaration=*/true); 1974 return true; 1975 } 1976 1977 // Parse enum body. 1978 nextToken(); 1979 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1980 if (HasError) { 1981 if (FormatTok->is(tok::semi)) 1982 nextToken(); 1983 addUnwrappedLine(); 1984 } 1985 return true; 1986 1987 // There is no addUnwrappedLine() here so that we fall through to parsing a 1988 // structural element afterwards. Thus, in "enum A {} n, m;", 1989 // "} n, m;" will end up in one unwrapped line. 1990 } 1991 1992 void UnwrappedLineParser::parseJavaEnumBody() { 1993 // Determine whether the enum is simple, i.e. does not have a semicolon or 1994 // constants with class bodies. Simple enums can be formatted like braced 1995 // lists, contracted to a single line, etc. 1996 unsigned StoredPosition = Tokens->getPosition(); 1997 bool IsSimple = true; 1998 FormatToken *Tok = Tokens->getNextToken(); 1999 while (Tok) { 2000 if (Tok->is(tok::r_brace)) 2001 break; 2002 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2003 IsSimple = false; 2004 break; 2005 } 2006 // FIXME: This will also mark enums with braces in the arguments to enum 2007 // constants as "not simple". This is probably fine in practice, though. 2008 Tok = Tokens->getNextToken(); 2009 } 2010 FormatTok = Tokens->setPosition(StoredPosition); 2011 2012 if (IsSimple) { 2013 nextToken(); 2014 parseBracedList(); 2015 addUnwrappedLine(); 2016 return; 2017 } 2018 2019 // Parse the body of a more complex enum. 2020 // First add a line for everything up to the "{". 2021 nextToken(); 2022 addUnwrappedLine(); 2023 ++Line->Level; 2024 2025 // Parse the enum constants. 2026 while (FormatTok) { 2027 if (FormatTok->is(tok::l_brace)) { 2028 // Parse the constant's class body. 2029 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2030 /*MunchSemi=*/false); 2031 } else if (FormatTok->is(tok::l_paren)) { 2032 parseParens(); 2033 } else if (FormatTok->is(tok::comma)) { 2034 nextToken(); 2035 addUnwrappedLine(); 2036 } else if (FormatTok->is(tok::semi)) { 2037 nextToken(); 2038 addUnwrappedLine(); 2039 break; 2040 } else if (FormatTok->is(tok::r_brace)) { 2041 addUnwrappedLine(); 2042 break; 2043 } else { 2044 nextToken(); 2045 } 2046 } 2047 2048 // Parse the class body after the enum's ";" if any. 2049 parseLevel(/*HasOpeningBrace=*/true); 2050 nextToken(); 2051 --Line->Level; 2052 addUnwrappedLine(); 2053 } 2054 2055 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2056 const FormatToken &InitialToken = *FormatTok; 2057 nextToken(); 2058 2059 // The actual identifier can be a nested name specifier, and in macros 2060 // it is often token-pasted. 2061 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2062 tok::kw___attribute, tok::kw___declspec, 2063 tok::kw_alignas) || 2064 ((Style.Language == FormatStyle::LK_Java || 2065 Style.Language == FormatStyle::LK_JavaScript) && 2066 FormatTok->isOneOf(tok::period, tok::comma))) { 2067 if (Style.Language == FormatStyle::LK_JavaScript && 2068 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2069 // JavaScript/TypeScript supports inline object types in 2070 // extends/implements positions: 2071 // class Foo implements {bar: number} { } 2072 nextToken(); 2073 if (FormatTok->is(tok::l_brace)) { 2074 tryToParseBracedList(); 2075 continue; 2076 } 2077 } 2078 bool IsNonMacroIdentifier = 2079 FormatTok->is(tok::identifier) && 2080 FormatTok->TokenText != FormatTok->TokenText.upper(); 2081 nextToken(); 2082 // We can have macros or attributes in between 'class' and the class name. 2083 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2084 parseParens(); 2085 } 2086 2087 // Note that parsing away template declarations here leads to incorrectly 2088 // accepting function declarations as record declarations. 2089 // In general, we cannot solve this problem. Consider: 2090 // class A<int> B() {} 2091 // which can be a function definition or a class definition when B() is a 2092 // macro. If we find enough real-world cases where this is a problem, we 2093 // can parse for the 'template' keyword in the beginning of the statement, 2094 // and thus rule out the record production in case there is no template 2095 // (this would still leave us with an ambiguity between template function 2096 // and class declarations). 2097 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2098 while (!eof()) { 2099 if (FormatTok->is(tok::l_brace)) { 2100 calculateBraceTypes(/*ExpectClassBody=*/true); 2101 if (!tryToParseBracedList()) 2102 break; 2103 } 2104 if (FormatTok->Tok.is(tok::semi)) 2105 return; 2106 nextToken(); 2107 } 2108 } 2109 if (FormatTok->Tok.is(tok::l_brace)) { 2110 if (ParseAsExpr) { 2111 parseChildBlock(); 2112 } else { 2113 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2114 addUnwrappedLine(); 2115 2116 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2117 /*MunchSemi=*/false); 2118 } 2119 } 2120 // There is no addUnwrappedLine() here so that we fall through to parsing a 2121 // structural element afterwards. Thus, in "class A {} n, m;", 2122 // "} n, m;" will end up in one unwrapped line. 2123 } 2124 2125 void UnwrappedLineParser::parseObjCMethod() { 2126 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2127 "'(' or identifier expected."); 2128 do { 2129 if (FormatTok->Tok.is(tok::semi)) { 2130 nextToken(); 2131 addUnwrappedLine(); 2132 return; 2133 } else if (FormatTok->Tok.is(tok::l_brace)) { 2134 parseBlock(/*MustBeDeclaration=*/false); 2135 addUnwrappedLine(); 2136 return; 2137 } else { 2138 nextToken(); 2139 } 2140 } while (!eof()); 2141 } 2142 2143 void UnwrappedLineParser::parseObjCProtocolList() { 2144 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2145 do { 2146 nextToken(); 2147 // Early exit in case someone forgot a close angle. 2148 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2149 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2150 return; 2151 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2152 nextToken(); // Skip '>'. 2153 } 2154 2155 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2156 do { 2157 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2158 nextToken(); 2159 addUnwrappedLine(); 2160 break; 2161 } 2162 if (FormatTok->is(tok::l_brace)) { 2163 parseBlock(/*MustBeDeclaration=*/false); 2164 // In ObjC interfaces, nothing should be following the "}". 2165 addUnwrappedLine(); 2166 } else if (FormatTok->is(tok::r_brace)) { 2167 // Ignore stray "}". parseStructuralElement doesn't consume them. 2168 nextToken(); 2169 addUnwrappedLine(); 2170 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2171 nextToken(); 2172 parseObjCMethod(); 2173 } else { 2174 parseStructuralElement(); 2175 } 2176 } while (!eof()); 2177 } 2178 2179 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2180 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2181 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2182 nextToken(); 2183 nextToken(); // interface name 2184 2185 // @interface can be followed by a lightweight generic 2186 // specialization list, then either a base class or a category. 2187 if (FormatTok->Tok.is(tok::less)) { 2188 // Unlike protocol lists, generic parameterizations support 2189 // nested angles: 2190 // 2191 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2192 // NSObject <NSCopying, NSSecureCoding> 2193 // 2194 // so we need to count how many open angles we have left. 2195 unsigned NumOpenAngles = 1; 2196 do { 2197 nextToken(); 2198 // Early exit in case someone forgot a close angle. 2199 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2200 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2201 break; 2202 if (FormatTok->Tok.is(tok::less)) 2203 ++NumOpenAngles; 2204 else if (FormatTok->Tok.is(tok::greater)) { 2205 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2206 --NumOpenAngles; 2207 } 2208 } while (!eof() && NumOpenAngles != 0); 2209 nextToken(); // Skip '>'. 2210 } 2211 if (FormatTok->Tok.is(tok::colon)) { 2212 nextToken(); 2213 nextToken(); // base class name 2214 } else if (FormatTok->Tok.is(tok::l_paren)) 2215 // Skip category, if present. 2216 parseParens(); 2217 2218 if (FormatTok->Tok.is(tok::less)) 2219 parseObjCProtocolList(); 2220 2221 if (FormatTok->Tok.is(tok::l_brace)) { 2222 if (Style.BraceWrapping.AfterObjCDeclaration) 2223 addUnwrappedLine(); 2224 parseBlock(/*MustBeDeclaration=*/true); 2225 } 2226 2227 // With instance variables, this puts '}' on its own line. Without instance 2228 // variables, this ends the @interface line. 2229 addUnwrappedLine(); 2230 2231 parseObjCUntilAtEnd(); 2232 } 2233 2234 // Returns true for the declaration/definition form of @protocol, 2235 // false for the expression form. 2236 bool UnwrappedLineParser::parseObjCProtocol() { 2237 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2238 nextToken(); 2239 2240 if (FormatTok->is(tok::l_paren)) 2241 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2242 return false; 2243 2244 // The definition/declaration form, 2245 // @protocol Foo 2246 // - (int)someMethod; 2247 // @end 2248 2249 nextToken(); // protocol name 2250 2251 if (FormatTok->Tok.is(tok::less)) 2252 parseObjCProtocolList(); 2253 2254 // Check for protocol declaration. 2255 if (FormatTok->Tok.is(tok::semi)) { 2256 nextToken(); 2257 addUnwrappedLine(); 2258 return true; 2259 } 2260 2261 addUnwrappedLine(); 2262 parseObjCUntilAtEnd(); 2263 return true; 2264 } 2265 2266 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2267 bool IsImport = FormatTok->is(Keywords.kw_import); 2268 assert(IsImport || FormatTok->is(tok::kw_export)); 2269 nextToken(); 2270 2271 // Consume the "default" in "export default class/function". 2272 if (FormatTok->is(tok::kw_default)) 2273 nextToken(); 2274 2275 // Consume "async function", "function" and "default function", so that these 2276 // get parsed as free-standing JS functions, i.e. do not require a trailing 2277 // semicolon. 2278 if (FormatTok->is(Keywords.kw_async)) 2279 nextToken(); 2280 if (FormatTok->is(Keywords.kw_function)) { 2281 nextToken(); 2282 return; 2283 } 2284 2285 // For imports, `export *`, `export {...}`, consume the rest of the line up 2286 // to the terminating `;`. For everything else, just return and continue 2287 // parsing the structural element, i.e. the declaration or expression for 2288 // `export default`. 2289 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2290 !FormatTok->isStringLiteral()) 2291 return; 2292 2293 while (!eof()) { 2294 if (FormatTok->is(tok::semi)) 2295 return; 2296 if (Line->Tokens.empty()) { 2297 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2298 // import statement should terminate. 2299 return; 2300 } 2301 if (FormatTok->is(tok::l_brace)) { 2302 FormatTok->BlockKind = BK_Block; 2303 nextToken(); 2304 parseBracedList(); 2305 } else { 2306 nextToken(); 2307 } 2308 } 2309 } 2310 2311 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2312 StringRef Prefix = "") { 2313 llvm::dbgs() << Prefix << "Line(" << Line.Level 2314 << ", FSC=" << Line.FirstStartColumn << ")" 2315 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2316 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2317 E = Line.Tokens.end(); 2318 I != E; ++I) { 2319 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2320 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2321 << "] "; 2322 } 2323 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2324 E = Line.Tokens.end(); 2325 I != E; ++I) { 2326 const UnwrappedLineNode &Node = *I; 2327 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2328 I = Node.Children.begin(), 2329 E = Node.Children.end(); 2330 I != E; ++I) { 2331 printDebugInfo(*I, "\nChild: "); 2332 } 2333 } 2334 llvm::dbgs() << "\n"; 2335 } 2336 2337 void UnwrappedLineParser::addUnwrappedLine() { 2338 if (Line->Tokens.empty()) 2339 return; 2340 LLVM_DEBUG({ 2341 if (CurrentLines == &Lines) 2342 printDebugInfo(*Line); 2343 }); 2344 CurrentLines->push_back(std::move(*Line)); 2345 Line->Tokens.clear(); 2346 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2347 Line->FirstStartColumn = 0; 2348 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2349 CurrentLines->append( 2350 std::make_move_iterator(PreprocessorDirectives.begin()), 2351 std::make_move_iterator(PreprocessorDirectives.end())); 2352 PreprocessorDirectives.clear(); 2353 } 2354 // Disconnect the current token from the last token on the previous line. 2355 FormatTok->Previous = nullptr; 2356 } 2357 2358 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2359 2360 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2361 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2362 FormatTok.NewlinesBefore > 0; 2363 } 2364 2365 // Checks if \p FormatTok is a line comment that continues the line comment 2366 // section on \p Line. 2367 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2368 const UnwrappedLine &Line, 2369 llvm::Regex &CommentPragmasRegex) { 2370 if (Line.Tokens.empty()) 2371 return false; 2372 2373 StringRef IndentContent = FormatTok.TokenText; 2374 if (FormatTok.TokenText.startswith("//") || 2375 FormatTok.TokenText.startswith("/*")) 2376 IndentContent = FormatTok.TokenText.substr(2); 2377 if (CommentPragmasRegex.match(IndentContent)) 2378 return false; 2379 2380 // If Line starts with a line comment, then FormatTok continues the comment 2381 // section if its original column is greater or equal to the original start 2382 // column of the line. 2383 // 2384 // Define the min column token of a line as follows: if a line ends in '{' or 2385 // contains a '{' followed by a line comment, then the min column token is 2386 // that '{'. Otherwise, the min column token of the line is the first token of 2387 // the line. 2388 // 2389 // If Line starts with a token other than a line comment, then FormatTok 2390 // continues the comment section if its original column is greater than the 2391 // original start column of the min column token of the line. 2392 // 2393 // For example, the second line comment continues the first in these cases: 2394 // 2395 // // first line 2396 // // second line 2397 // 2398 // and: 2399 // 2400 // // first line 2401 // // second line 2402 // 2403 // and: 2404 // 2405 // int i; // first line 2406 // // second line 2407 // 2408 // and: 2409 // 2410 // do { // first line 2411 // // second line 2412 // int i; 2413 // } while (true); 2414 // 2415 // and: 2416 // 2417 // enum { 2418 // a, // first line 2419 // // second line 2420 // b 2421 // }; 2422 // 2423 // The second line comment doesn't continue the first in these cases: 2424 // 2425 // // first line 2426 // // second line 2427 // 2428 // and: 2429 // 2430 // int i; // first line 2431 // // second line 2432 // 2433 // and: 2434 // 2435 // do { // first line 2436 // // second line 2437 // int i; 2438 // } while (true); 2439 // 2440 // and: 2441 // 2442 // enum { 2443 // a, // first line 2444 // // second line 2445 // }; 2446 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2447 2448 // Scan for '{//'. If found, use the column of '{' as a min column for line 2449 // comment section continuation. 2450 const FormatToken *PreviousToken = nullptr; 2451 for (const UnwrappedLineNode &Node : Line.Tokens) { 2452 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2453 isLineComment(*Node.Tok)) { 2454 MinColumnToken = PreviousToken; 2455 break; 2456 } 2457 PreviousToken = Node.Tok; 2458 2459 // Grab the last newline preceding a token in this unwrapped line. 2460 if (Node.Tok->NewlinesBefore > 0) { 2461 MinColumnToken = Node.Tok; 2462 } 2463 } 2464 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2465 MinColumnToken = PreviousToken; 2466 } 2467 2468 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2469 MinColumnToken); 2470 } 2471 2472 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2473 bool JustComments = Line->Tokens.empty(); 2474 for (SmallVectorImpl<FormatToken *>::const_iterator 2475 I = CommentsBeforeNextToken.begin(), 2476 E = CommentsBeforeNextToken.end(); 2477 I != E; ++I) { 2478 // Line comments that belong to the same line comment section are put on the 2479 // same line since later we might want to reflow content between them. 2480 // Additional fine-grained breaking of line comment sections is controlled 2481 // by the class BreakableLineCommentSection in case it is desirable to keep 2482 // several line comment sections in the same unwrapped line. 2483 // 2484 // FIXME: Consider putting separate line comment sections as children to the 2485 // unwrapped line instead. 2486 (*I)->ContinuesLineCommentSection = 2487 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2488 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2489 addUnwrappedLine(); 2490 pushToken(*I); 2491 } 2492 if (NewlineBeforeNext && JustComments) 2493 addUnwrappedLine(); 2494 CommentsBeforeNextToken.clear(); 2495 } 2496 2497 void UnwrappedLineParser::nextToken(int LevelDifference) { 2498 if (eof()) 2499 return; 2500 flushComments(isOnNewLine(*FormatTok)); 2501 pushToken(FormatTok); 2502 FormatToken *Previous = FormatTok; 2503 if (Style.Language != FormatStyle::LK_JavaScript) 2504 readToken(LevelDifference); 2505 else 2506 readTokenWithJavaScriptASI(); 2507 FormatTok->Previous = Previous; 2508 } 2509 2510 void UnwrappedLineParser::distributeComments( 2511 const SmallVectorImpl<FormatToken *> &Comments, 2512 const FormatToken *NextTok) { 2513 // Whether or not a line comment token continues a line is controlled by 2514 // the method continuesLineCommentSection, with the following caveat: 2515 // 2516 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2517 // that each comment line from the trail is aligned with the next token, if 2518 // the next token exists. If a trail exists, the beginning of the maximal 2519 // trail is marked as a start of a new comment section. 2520 // 2521 // For example in this code: 2522 // 2523 // int a; // line about a 2524 // // line 1 about b 2525 // // line 2 about b 2526 // int b; 2527 // 2528 // the two lines about b form a maximal trail, so there are two sections, the 2529 // first one consisting of the single comment "// line about a" and the 2530 // second one consisting of the next two comments. 2531 if (Comments.empty()) 2532 return; 2533 bool ShouldPushCommentsInCurrentLine = true; 2534 bool HasTrailAlignedWithNextToken = false; 2535 unsigned StartOfTrailAlignedWithNextToken = 0; 2536 if (NextTok) { 2537 // We are skipping the first element intentionally. 2538 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2539 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2540 HasTrailAlignedWithNextToken = true; 2541 StartOfTrailAlignedWithNextToken = i; 2542 } 2543 } 2544 } 2545 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2546 FormatToken *FormatTok = Comments[i]; 2547 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2548 FormatTok->ContinuesLineCommentSection = false; 2549 } else { 2550 FormatTok->ContinuesLineCommentSection = 2551 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2552 } 2553 if (!FormatTok->ContinuesLineCommentSection && 2554 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2555 ShouldPushCommentsInCurrentLine = false; 2556 } 2557 if (ShouldPushCommentsInCurrentLine) { 2558 pushToken(FormatTok); 2559 } else { 2560 CommentsBeforeNextToken.push_back(FormatTok); 2561 } 2562 } 2563 } 2564 2565 void UnwrappedLineParser::readToken(int LevelDifference) { 2566 SmallVector<FormatToken *, 1> Comments; 2567 do { 2568 FormatTok = Tokens->getNextToken(); 2569 assert(FormatTok); 2570 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2571 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2572 distributeComments(Comments, FormatTok); 2573 Comments.clear(); 2574 // If there is an unfinished unwrapped line, we flush the preprocessor 2575 // directives only after that unwrapped line was finished later. 2576 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2577 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2578 assert((LevelDifference >= 0 || 2579 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2580 "LevelDifference makes Line->Level negative"); 2581 Line->Level += LevelDifference; 2582 // Comments stored before the preprocessor directive need to be output 2583 // before the preprocessor directive, at the same level as the 2584 // preprocessor directive, as we consider them to apply to the directive. 2585 flushComments(isOnNewLine(*FormatTok)); 2586 parsePPDirective(); 2587 } 2588 while (FormatTok->Type == TT_ConflictStart || 2589 FormatTok->Type == TT_ConflictEnd || 2590 FormatTok->Type == TT_ConflictAlternative) { 2591 if (FormatTok->Type == TT_ConflictStart) { 2592 conditionalCompilationStart(/*Unreachable=*/false); 2593 } else if (FormatTok->Type == TT_ConflictAlternative) { 2594 conditionalCompilationAlternative(); 2595 } else if (FormatTok->Type == TT_ConflictEnd) { 2596 conditionalCompilationEnd(); 2597 } 2598 FormatTok = Tokens->getNextToken(); 2599 FormatTok->MustBreakBefore = true; 2600 } 2601 2602 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2603 !Line->InPPDirective) { 2604 continue; 2605 } 2606 2607 if (!FormatTok->Tok.is(tok::comment)) { 2608 distributeComments(Comments, FormatTok); 2609 Comments.clear(); 2610 return; 2611 } 2612 2613 Comments.push_back(FormatTok); 2614 } while (!eof()); 2615 2616 distributeComments(Comments, nullptr); 2617 Comments.clear(); 2618 } 2619 2620 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2621 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2622 if (MustBreakBeforeNextToken) { 2623 Line->Tokens.back().Tok->MustBreakBefore = true; 2624 MustBreakBeforeNextToken = false; 2625 } 2626 } 2627 2628 } // end namespace format 2629 } // end namespace clang 2630