1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #include <algorithm> 22 23 #define DEBUG_TYPE "format-parser" 24 25 namespace clang { 26 namespace format { 27 28 class FormatTokenSource { 29 public: 30 virtual ~FormatTokenSource() {} 31 virtual FormatToken *getNextToken() = 0; 32 33 virtual unsigned getPosition() = 0; 34 virtual FormatToken *setPosition(unsigned Position) = 0; 35 }; 36 37 namespace { 38 39 class ScopedDeclarationState { 40 public: 41 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 42 bool MustBeDeclaration) 43 : Line(Line), Stack(Stack) { 44 Line.MustBeDeclaration = MustBeDeclaration; 45 Stack.push_back(MustBeDeclaration); 46 } 47 ~ScopedDeclarationState() { 48 Stack.pop_back(); 49 if (!Stack.empty()) 50 Line.MustBeDeclaration = Stack.back(); 51 else 52 Line.MustBeDeclaration = true; 53 } 54 55 private: 56 UnwrappedLine &Line; 57 std::vector<bool> &Stack; 58 }; 59 60 static bool isLineComment(const FormatToken &FormatTok) { 61 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 62 } 63 64 // Checks if \p FormatTok is a line comment that continues the line comment 65 // \p Previous. The original column of \p MinColumnToken is used to determine 66 // whether \p FormatTok is indented enough to the right to continue \p Previous. 67 static bool continuesLineComment(const FormatToken &FormatTok, 68 const FormatToken *Previous, 69 const FormatToken *MinColumnToken) { 70 if (!Previous || !MinColumnToken) 71 return false; 72 unsigned MinContinueColumn = 73 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 74 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 75 isLineComment(*Previous) && 76 FormatTok.OriginalColumn >= MinContinueColumn; 77 } 78 79 class ScopedMacroState : public FormatTokenSource { 80 public: 81 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 82 FormatToken *&ResetToken) 83 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 84 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 85 Token(nullptr), PreviousToken(nullptr) { 86 TokenSource = this; 87 Line.Level = 0; 88 Line.InPPDirective = true; 89 } 90 91 ~ScopedMacroState() override { 92 TokenSource = PreviousTokenSource; 93 ResetToken = Token; 94 Line.InPPDirective = false; 95 Line.Level = PreviousLineLevel; 96 } 97 98 FormatToken *getNextToken() override { 99 // The \c UnwrappedLineParser guards against this by never calling 100 // \c getNextToken() after it has encountered the first eof token. 101 assert(!eof()); 102 PreviousToken = Token; 103 Token = PreviousTokenSource->getNextToken(); 104 if (eof()) 105 return getFakeEOF(); 106 return Token; 107 } 108 109 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 110 111 FormatToken *setPosition(unsigned Position) override { 112 PreviousToken = nullptr; 113 Token = PreviousTokenSource->setPosition(Position); 114 return Token; 115 } 116 117 private: 118 bool eof() { 119 return Token && Token->HasUnescapedNewline && 120 !continuesLineComment(*Token, PreviousToken, 121 /*MinColumnToken=*/PreviousToken); 122 } 123 124 FormatToken *getFakeEOF() { 125 static bool EOFInitialized = false; 126 static FormatToken FormatTok; 127 if (!EOFInitialized) { 128 FormatTok.Tok.startToken(); 129 FormatTok.Tok.setKind(tok::eof); 130 EOFInitialized = true; 131 } 132 return &FormatTok; 133 } 134 135 UnwrappedLine &Line; 136 FormatTokenSource *&TokenSource; 137 FormatToken *&ResetToken; 138 unsigned PreviousLineLevel; 139 FormatTokenSource *PreviousTokenSource; 140 141 FormatToken *Token; 142 FormatToken *PreviousToken; 143 }; 144 145 } // end anonymous namespace 146 147 class ScopedLineState { 148 public: 149 ScopedLineState(UnwrappedLineParser &Parser, 150 bool SwitchToPreprocessorLines = false) 151 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 152 if (SwitchToPreprocessorLines) 153 Parser.CurrentLines = &Parser.PreprocessorDirectives; 154 else if (!Parser.Line->Tokens.empty()) 155 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 156 PreBlockLine = std::move(Parser.Line); 157 Parser.Line = llvm::make_unique<UnwrappedLine>(); 158 Parser.Line->Level = PreBlockLine->Level; 159 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 160 } 161 162 ~ScopedLineState() { 163 if (!Parser.Line->Tokens.empty()) { 164 Parser.addUnwrappedLine(); 165 } 166 assert(Parser.Line->Tokens.empty()); 167 Parser.Line = std::move(PreBlockLine); 168 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 169 Parser.MustBreakBeforeNextToken = true; 170 Parser.CurrentLines = OriginalLines; 171 } 172 173 private: 174 UnwrappedLineParser &Parser; 175 176 std::unique_ptr<UnwrappedLine> PreBlockLine; 177 SmallVectorImpl<UnwrappedLine> *OriginalLines; 178 }; 179 180 class CompoundStatementIndenter { 181 public: 182 CompoundStatementIndenter(UnwrappedLineParser *Parser, 183 const FormatStyle &Style, unsigned &LineLevel) 184 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 185 if (Style.BraceWrapping.AfterControlStatement) 186 Parser->addUnwrappedLine(); 187 if (Style.BraceWrapping.IndentBraces) 188 ++LineLevel; 189 } 190 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 191 192 private: 193 unsigned &LineLevel; 194 unsigned OldLineLevel; 195 }; 196 197 namespace { 198 199 class IndexedTokenSource : public FormatTokenSource { 200 public: 201 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 202 : Tokens(Tokens), Position(-1) {} 203 204 FormatToken *getNextToken() override { 205 ++Position; 206 return Tokens[Position]; 207 } 208 209 unsigned getPosition() override { 210 assert(Position >= 0); 211 return Position; 212 } 213 214 FormatToken *setPosition(unsigned P) override { 215 Position = P; 216 return Tokens[Position]; 217 } 218 219 void reset() { Position = -1; } 220 221 private: 222 ArrayRef<FormatToken *> Tokens; 223 int Position; 224 }; 225 226 } // end anonymous namespace 227 228 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 229 const AdditionalKeywords &Keywords, 230 unsigned FirstStartColumn, 231 ArrayRef<FormatToken *> Tokens, 232 UnwrappedLineConsumer &Callback) 233 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 234 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 235 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 236 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 237 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 238 ? IG_Rejected 239 : IG_Inited), 240 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 241 242 void UnwrappedLineParser::reset() { 243 PPBranchLevel = -1; 244 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 245 ? IG_Rejected 246 : IG_Inited; 247 IncludeGuardToken = nullptr; 248 Line.reset(new UnwrappedLine); 249 CommentsBeforeNextToken.clear(); 250 FormatTok = nullptr; 251 MustBreakBeforeNextToken = false; 252 PreprocessorDirectives.clear(); 253 CurrentLines = &Lines; 254 DeclarationScopeStack.clear(); 255 PPStack.clear(); 256 Line->FirstStartColumn = FirstStartColumn; 257 } 258 259 void UnwrappedLineParser::parse() { 260 IndexedTokenSource TokenSource(AllTokens); 261 Line->FirstStartColumn = FirstStartColumn; 262 do { 263 DEBUG(llvm::dbgs() << "----\n"); 264 reset(); 265 Tokens = &TokenSource; 266 TokenSource.reset(); 267 268 readToken(); 269 parseFile(); 270 271 // If we found an include guard then all preprocessor directives (other than 272 // the guard) are over-indented by one. 273 if (IncludeGuard == IG_Found) 274 for (auto &Line : Lines) 275 if (Line.InPPDirective && Line.Level > 0) 276 --Line.Level; 277 278 // Create line with eof token. 279 pushToken(FormatTok); 280 addUnwrappedLine(); 281 282 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 283 E = Lines.end(); 284 I != E; ++I) { 285 Callback.consumeUnwrappedLine(*I); 286 } 287 Callback.finishRun(); 288 Lines.clear(); 289 while (!PPLevelBranchIndex.empty() && 290 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 291 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 292 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 293 } 294 if (!PPLevelBranchIndex.empty()) { 295 ++PPLevelBranchIndex.back(); 296 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 297 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 298 } 299 } while (!PPLevelBranchIndex.empty()); 300 } 301 302 void UnwrappedLineParser::parseFile() { 303 // The top-level context in a file always has declarations, except for pre- 304 // processor directives and JavaScript files. 305 bool MustBeDeclaration = 306 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 307 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 308 MustBeDeclaration); 309 if (Style.Language == FormatStyle::LK_TextProto) 310 parseBracedList(); 311 else 312 parseLevel(/*HasOpeningBrace=*/false); 313 // Make sure to format the remaining tokens. 314 flushComments(true); 315 addUnwrappedLine(); 316 } 317 318 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 319 bool SwitchLabelEncountered = false; 320 do { 321 tok::TokenKind kind = FormatTok->Tok.getKind(); 322 if (FormatTok->Type == TT_MacroBlockBegin) { 323 kind = tok::l_brace; 324 } else if (FormatTok->Type == TT_MacroBlockEnd) { 325 kind = tok::r_brace; 326 } 327 328 switch (kind) { 329 case tok::comment: 330 nextToken(); 331 addUnwrappedLine(); 332 break; 333 case tok::l_brace: 334 // FIXME: Add parameter whether this can happen - if this happens, we must 335 // be in a non-declaration context. 336 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 337 continue; 338 parseBlock(/*MustBeDeclaration=*/false); 339 addUnwrappedLine(); 340 break; 341 case tok::r_brace: 342 if (HasOpeningBrace) 343 return; 344 nextToken(); 345 addUnwrappedLine(); 346 break; 347 case tok::kw_default: 348 case tok::kw_case: 349 if (Style.Language == FormatStyle::LK_JavaScript && 350 Line->MustBeDeclaration) { 351 // A 'case: string' style field declaration. 352 parseStructuralElement(); 353 break; 354 } 355 if (!SwitchLabelEncountered && 356 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 357 ++Line->Level; 358 SwitchLabelEncountered = true; 359 parseStructuralElement(); 360 break; 361 default: 362 parseStructuralElement(); 363 break; 364 } 365 } while (!eof()); 366 } 367 368 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 369 // We'll parse forward through the tokens until we hit 370 // a closing brace or eof - note that getNextToken() will 371 // parse macros, so this will magically work inside macro 372 // definitions, too. 373 unsigned StoredPosition = Tokens->getPosition(); 374 FormatToken *Tok = FormatTok; 375 const FormatToken *PrevTok = Tok->Previous; 376 // Keep a stack of positions of lbrace tokens. We will 377 // update information about whether an lbrace starts a 378 // braced init list or a different block during the loop. 379 SmallVector<FormatToken *, 8> LBraceStack; 380 assert(Tok->Tok.is(tok::l_brace)); 381 do { 382 // Get next non-comment token. 383 FormatToken *NextTok; 384 unsigned ReadTokens = 0; 385 do { 386 NextTok = Tokens->getNextToken(); 387 ++ReadTokens; 388 } while (NextTok->is(tok::comment)); 389 390 switch (Tok->Tok.getKind()) { 391 case tok::l_brace: 392 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 393 if (PrevTok->isOneOf(tok::colon, tok::less)) 394 // A ':' indicates this code is in a type, or a braced list 395 // following a label in an object literal ({a: {b: 1}}). 396 // A '<' could be an object used in a comparison, but that is nonsense 397 // code (can never return true), so more likely it is a generic type 398 // argument (`X<{a: string; b: number}>`). 399 // The code below could be confused by semicolons between the 400 // individual members in a type member list, which would normally 401 // trigger BK_Block. In both cases, this must be parsed as an inline 402 // braced init. 403 Tok->BlockKind = BK_BracedInit; 404 else if (PrevTok->is(tok::r_paren)) 405 // `) { }` can only occur in function or method declarations in JS. 406 Tok->BlockKind = BK_Block; 407 } else { 408 Tok->BlockKind = BK_Unknown; 409 } 410 LBraceStack.push_back(Tok); 411 break; 412 case tok::r_brace: 413 if (LBraceStack.empty()) 414 break; 415 if (LBraceStack.back()->BlockKind == BK_Unknown) { 416 bool ProbablyBracedList = false; 417 if (Style.Language == FormatStyle::LK_Proto) { 418 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 419 } else { 420 // Using OriginalColumn to distinguish between ObjC methods and 421 // binary operators is a bit hacky. 422 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 423 NextTok->OriginalColumn == 0; 424 425 // If there is a comma, semicolon or right paren after the closing 426 // brace, we assume this is a braced initializer list. Note that 427 // regardless how we mark inner braces here, we will overwrite the 428 // BlockKind later if we parse a braced list (where all blocks 429 // inside are by default braced lists), or when we explicitly detect 430 // blocks (for example while parsing lambdas). 431 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 432 // braced list in JS. 433 ProbablyBracedList = 434 (Style.Language == FormatStyle::LK_JavaScript && 435 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 436 Keywords.kw_as)) || 437 (Style.isCpp() && NextTok->is(tok::l_paren)) || 438 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 439 tok::r_paren, tok::r_square, tok::l_brace, 440 tok::l_square, tok::ellipsis) || 441 (NextTok->is(tok::identifier) && 442 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 443 (NextTok->is(tok::semi) && 444 (!ExpectClassBody || LBraceStack.size() != 1)) || 445 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 446 } 447 if (ProbablyBracedList) { 448 Tok->BlockKind = BK_BracedInit; 449 LBraceStack.back()->BlockKind = BK_BracedInit; 450 } else { 451 Tok->BlockKind = BK_Block; 452 LBraceStack.back()->BlockKind = BK_Block; 453 } 454 } 455 LBraceStack.pop_back(); 456 break; 457 case tok::at: 458 case tok::semi: 459 case tok::kw_if: 460 case tok::kw_while: 461 case tok::kw_for: 462 case tok::kw_switch: 463 case tok::kw_try: 464 case tok::kw___try: 465 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 466 LBraceStack.back()->BlockKind = BK_Block; 467 break; 468 default: 469 break; 470 } 471 PrevTok = Tok; 472 Tok = NextTok; 473 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 474 475 // Assume other blocks for all unclosed opening braces. 476 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 477 if (LBraceStack[i]->BlockKind == BK_Unknown) 478 LBraceStack[i]->BlockKind = BK_Block; 479 } 480 481 FormatTok = Tokens->setPosition(StoredPosition); 482 } 483 484 template <class T> 485 static inline void hash_combine(std::size_t &seed, const T &v) { 486 std::hash<T> hasher; 487 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 488 } 489 490 size_t UnwrappedLineParser::computePPHash() const { 491 size_t h = 0; 492 for (const auto &i : PPStack) { 493 hash_combine(h, size_t(i.Kind)); 494 hash_combine(h, i.Line); 495 } 496 return h; 497 } 498 499 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 500 bool MunchSemi) { 501 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 502 "'{' or macro block token expected"); 503 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 504 FormatTok->BlockKind = BK_Block; 505 506 size_t PPStartHash = computePPHash(); 507 508 unsigned InitialLevel = Line->Level; 509 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 510 511 if (MacroBlock && FormatTok->is(tok::l_paren)) 512 parseParens(); 513 514 size_t NbPreprocessorDirectives = 515 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 516 addUnwrappedLine(); 517 size_t OpeningLineIndex = 518 CurrentLines->empty() 519 ? (UnwrappedLine::kInvalidIndex) 520 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 521 522 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 523 MustBeDeclaration); 524 if (AddLevel) 525 ++Line->Level; 526 parseLevel(/*HasOpeningBrace=*/true); 527 528 if (eof()) 529 return; 530 531 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 532 : !FormatTok->is(tok::r_brace)) { 533 Line->Level = InitialLevel; 534 FormatTok->BlockKind = BK_Block; 535 return; 536 } 537 538 size_t PPEndHash = computePPHash(); 539 540 // Munch the closing brace. 541 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 542 543 if (MacroBlock && FormatTok->is(tok::l_paren)) 544 parseParens(); 545 546 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 547 nextToken(); 548 Line->Level = InitialLevel; 549 550 if (PPStartHash == PPEndHash) { 551 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 552 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 553 // Update the opening line to add the forward reference as well 554 (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = 555 CurrentLines->size() - 1; 556 } 557 } 558 } 559 560 static bool isGoogScope(const UnwrappedLine &Line) { 561 // FIXME: Closure-library specific stuff should not be hard-coded but be 562 // configurable. 563 if (Line.Tokens.size() < 4) 564 return false; 565 auto I = Line.Tokens.begin(); 566 if (I->Tok->TokenText != "goog") 567 return false; 568 ++I; 569 if (I->Tok->isNot(tok::period)) 570 return false; 571 ++I; 572 if (I->Tok->TokenText != "scope") 573 return false; 574 ++I; 575 return I->Tok->is(tok::l_paren); 576 } 577 578 static bool isIIFE(const UnwrappedLine &Line, 579 const AdditionalKeywords &Keywords) { 580 // Look for the start of an immediately invoked anonymous function. 581 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 582 // This is commonly done in JavaScript to create a new, anonymous scope. 583 // Example: (function() { ... })() 584 if (Line.Tokens.size() < 3) 585 return false; 586 auto I = Line.Tokens.begin(); 587 if (I->Tok->isNot(tok::l_paren)) 588 return false; 589 ++I; 590 if (I->Tok->isNot(Keywords.kw_function)) 591 return false; 592 ++I; 593 return I->Tok->is(tok::l_paren); 594 } 595 596 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 597 const FormatToken &InitialToken) { 598 if (InitialToken.is(tok::kw_namespace)) 599 return Style.BraceWrapping.AfterNamespace; 600 if (InitialToken.is(tok::kw_class)) 601 return Style.BraceWrapping.AfterClass; 602 if (InitialToken.is(tok::kw_union)) 603 return Style.BraceWrapping.AfterUnion; 604 if (InitialToken.is(tok::kw_struct)) 605 return Style.BraceWrapping.AfterStruct; 606 return false; 607 } 608 609 void UnwrappedLineParser::parseChildBlock() { 610 FormatTok->BlockKind = BK_Block; 611 nextToken(); 612 { 613 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 614 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 615 ScopedLineState LineState(*this); 616 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 617 /*MustBeDeclaration=*/false); 618 Line->Level += SkipIndent ? 0 : 1; 619 parseLevel(/*HasOpeningBrace=*/true); 620 flushComments(isOnNewLine(*FormatTok)); 621 Line->Level -= SkipIndent ? 0 : 1; 622 } 623 nextToken(); 624 } 625 626 void UnwrappedLineParser::parsePPDirective() { 627 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 628 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 629 nextToken(); 630 631 if (!FormatTok->Tok.getIdentifierInfo()) { 632 parsePPUnknown(); 633 return; 634 } 635 636 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 637 case tok::pp_define: 638 parsePPDefine(); 639 return; 640 case tok::pp_if: 641 parsePPIf(/*IfDef=*/false); 642 break; 643 case tok::pp_ifdef: 644 case tok::pp_ifndef: 645 parsePPIf(/*IfDef=*/true); 646 break; 647 case tok::pp_else: 648 parsePPElse(); 649 break; 650 case tok::pp_elif: 651 parsePPElIf(); 652 break; 653 case tok::pp_endif: 654 parsePPEndIf(); 655 break; 656 default: 657 parsePPUnknown(); 658 break; 659 } 660 } 661 662 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 663 size_t Line = CurrentLines->size(); 664 if (CurrentLines == &PreprocessorDirectives) 665 Line += Lines.size(); 666 667 if (Unreachable || 668 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 669 PPStack.push_back({PP_Unreachable, Line}); 670 else 671 PPStack.push_back({PP_Conditional, Line}); 672 } 673 674 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 675 ++PPBranchLevel; 676 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 677 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 678 PPLevelBranchIndex.push_back(0); 679 PPLevelBranchCount.push_back(0); 680 } 681 PPChainBranchIndex.push(0); 682 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 683 conditionalCompilationCondition(Unreachable || Skip); 684 } 685 686 void UnwrappedLineParser::conditionalCompilationAlternative() { 687 if (!PPStack.empty()) 688 PPStack.pop_back(); 689 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 690 if (!PPChainBranchIndex.empty()) 691 ++PPChainBranchIndex.top(); 692 conditionalCompilationCondition( 693 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 694 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 695 } 696 697 void UnwrappedLineParser::conditionalCompilationEnd() { 698 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 699 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 700 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 701 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 702 } 703 } 704 // Guard against #endif's without #if. 705 if (PPBranchLevel > -1) 706 --PPBranchLevel; 707 if (!PPChainBranchIndex.empty()) 708 PPChainBranchIndex.pop(); 709 if (!PPStack.empty()) 710 PPStack.pop_back(); 711 } 712 713 void UnwrappedLineParser::parsePPIf(bool IfDef) { 714 bool IfNDef = FormatTok->is(tok::pp_ifndef); 715 nextToken(); 716 bool Unreachable = false; 717 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 718 Unreachable = true; 719 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 720 Unreachable = true; 721 conditionalCompilationStart(Unreachable); 722 FormatToken *IfCondition = FormatTok; 723 // If there's a #ifndef on the first line, and the only lines before it are 724 // comments, it could be an include guard. 725 bool MaybeIncludeGuard = IfNDef; 726 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 727 for (auto &Line : Lines) { 728 if (!Line.Tokens.front().Tok->is(tok::comment)) { 729 MaybeIncludeGuard = false; 730 IncludeGuard = IG_Rejected; 731 break; 732 } 733 } 734 --PPBranchLevel; 735 parsePPUnknown(); 736 ++PPBranchLevel; 737 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 738 IncludeGuard = IG_IfNdefed; 739 IncludeGuardToken = IfCondition; 740 } 741 } 742 743 void UnwrappedLineParser::parsePPElse() { 744 // If a potential include guard has an #else, it's not an include guard. 745 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 746 IncludeGuard = IG_Rejected; 747 conditionalCompilationAlternative(); 748 if (PPBranchLevel > -1) 749 --PPBranchLevel; 750 parsePPUnknown(); 751 ++PPBranchLevel; 752 } 753 754 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 755 756 void UnwrappedLineParser::parsePPEndIf() { 757 conditionalCompilationEnd(); 758 parsePPUnknown(); 759 // If the #endif of a potential include guard is the last thing in the file, 760 // then we found an include guard. 761 unsigned TokenPosition = Tokens->getPosition(); 762 FormatToken *PeekNext = AllTokens[TokenPosition]; 763 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 764 PeekNext->is(tok::eof) && 765 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 766 IncludeGuard = IG_Found; 767 } 768 769 void UnwrappedLineParser::parsePPDefine() { 770 nextToken(); 771 772 if (FormatTok->Tok.getKind() != tok::identifier) { 773 IncludeGuard = IG_Rejected; 774 IncludeGuardToken = nullptr; 775 parsePPUnknown(); 776 return; 777 } 778 779 if (IncludeGuard == IG_IfNdefed && 780 IncludeGuardToken->TokenText == FormatTok->TokenText) { 781 IncludeGuard = IG_Defined; 782 IncludeGuardToken = nullptr; 783 for (auto &Line : Lines) { 784 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 785 IncludeGuard = IG_Rejected; 786 break; 787 } 788 } 789 } 790 791 nextToken(); 792 if (FormatTok->Tok.getKind() == tok::l_paren && 793 FormatTok->WhitespaceRange.getBegin() == 794 FormatTok->WhitespaceRange.getEnd()) { 795 parseParens(); 796 } 797 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 798 Line->Level += PPBranchLevel + 1; 799 addUnwrappedLine(); 800 ++Line->Level; 801 802 // Errors during a preprocessor directive can only affect the layout of the 803 // preprocessor directive, and thus we ignore them. An alternative approach 804 // would be to use the same approach we use on the file level (no 805 // re-indentation if there was a structural error) within the macro 806 // definition. 807 parseFile(); 808 } 809 810 void UnwrappedLineParser::parsePPUnknown() { 811 do { 812 nextToken(); 813 } while (!eof()); 814 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 815 Line->Level += PPBranchLevel + 1; 816 addUnwrappedLine(); 817 } 818 819 // Here we blacklist certain tokens that are not usually the first token in an 820 // unwrapped line. This is used in attempt to distinguish macro calls without 821 // trailing semicolons from other constructs split to several lines. 822 static bool tokenCanStartNewLine(const clang::Token &Tok) { 823 // Semicolon can be a null-statement, l_square can be a start of a macro or 824 // a C++11 attribute, but this doesn't seem to be common. 825 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 826 Tok.isNot(tok::l_square) && 827 // Tokens that can only be used as binary operators and a part of 828 // overloaded operator names. 829 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 830 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 831 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 832 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 833 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 834 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 835 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 836 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 837 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 838 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 839 Tok.isNot(tok::lesslessequal) && 840 // Colon is used in labels, base class lists, initializer lists, 841 // range-based for loops, ternary operator, but should never be the 842 // first token in an unwrapped line. 843 Tok.isNot(tok::colon) && 844 // 'noexcept' is a trailing annotation. 845 Tok.isNot(tok::kw_noexcept); 846 } 847 848 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 849 const FormatToken *FormatTok) { 850 // FIXME: This returns true for C/C++ keywords like 'struct'. 851 return FormatTok->is(tok::identifier) && 852 (FormatTok->Tok.getIdentifierInfo() == nullptr || 853 !FormatTok->isOneOf( 854 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 855 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 856 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 857 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 858 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 859 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 860 Keywords.kw_from)); 861 } 862 863 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 864 const FormatToken *FormatTok) { 865 return FormatTok->Tok.isLiteral() || 866 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 867 mustBeJSIdent(Keywords, FormatTok); 868 } 869 870 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 871 // when encountered after a value (see mustBeJSIdentOrValue). 872 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 873 const FormatToken *FormatTok) { 874 return FormatTok->isOneOf( 875 tok::kw_return, Keywords.kw_yield, 876 // conditionals 877 tok::kw_if, tok::kw_else, 878 // loops 879 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 880 // switch/case 881 tok::kw_switch, tok::kw_case, 882 // exceptions 883 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 884 // declaration 885 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 886 Keywords.kw_async, Keywords.kw_function, 887 // import/export 888 Keywords.kw_import, tok::kw_export); 889 } 890 891 // readTokenWithJavaScriptASI reads the next token and terminates the current 892 // line if JavaScript Automatic Semicolon Insertion must 893 // happen between the current token and the next token. 894 // 895 // This method is conservative - it cannot cover all edge cases of JavaScript, 896 // but only aims to correctly handle certain well known cases. It *must not* 897 // return true in speculative cases. 898 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 899 FormatToken *Previous = FormatTok; 900 readToken(); 901 FormatToken *Next = FormatTok; 902 903 bool IsOnSameLine = 904 CommentsBeforeNextToken.empty() 905 ? Next->NewlinesBefore == 0 906 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 907 if (IsOnSameLine) 908 return; 909 910 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 911 bool PreviousStartsTemplateExpr = 912 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 913 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 914 // If the line contains an '@' sign, the previous token might be an 915 // annotation, which can precede another identifier/value. 916 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 917 [](UnwrappedLineNode &LineNode) { 918 return LineNode.Tok->is(tok::at); 919 }) != Line->Tokens.end(); 920 if (HasAt) 921 return; 922 } 923 if (Next->is(tok::exclaim) && PreviousMustBeValue) 924 return addUnwrappedLine(); 925 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 926 bool NextEndsTemplateExpr = 927 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 928 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 929 (PreviousMustBeValue || 930 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 931 tok::minusminus))) 932 return addUnwrappedLine(); 933 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 934 isJSDeclOrStmt(Keywords, Next)) 935 return addUnwrappedLine(); 936 } 937 938 void UnwrappedLineParser::parseStructuralElement() { 939 assert(!FormatTok->is(tok::l_brace)); 940 if (Style.Language == FormatStyle::LK_TableGen && 941 FormatTok->is(tok::pp_include)) { 942 nextToken(); 943 if (FormatTok->is(tok::string_literal)) 944 nextToken(); 945 addUnwrappedLine(); 946 return; 947 } 948 switch (FormatTok->Tok.getKind()) { 949 case tok::at: 950 nextToken(); 951 if (FormatTok->Tok.is(tok::l_brace)) { 952 nextToken(); 953 parseBracedList(); 954 break; 955 } 956 switch (FormatTok->Tok.getObjCKeywordID()) { 957 case tok::objc_public: 958 case tok::objc_protected: 959 case tok::objc_package: 960 case tok::objc_private: 961 return parseAccessSpecifier(); 962 case tok::objc_interface: 963 case tok::objc_implementation: 964 return parseObjCInterfaceOrImplementation(); 965 case tok::objc_protocol: 966 return parseObjCProtocol(); 967 case tok::objc_end: 968 return; // Handled by the caller. 969 case tok::objc_optional: 970 case tok::objc_required: 971 nextToken(); 972 addUnwrappedLine(); 973 return; 974 case tok::objc_autoreleasepool: 975 nextToken(); 976 if (FormatTok->Tok.is(tok::l_brace)) { 977 if (Style.BraceWrapping.AfterObjCDeclaration) 978 addUnwrappedLine(); 979 parseBlock(/*MustBeDeclaration=*/false); 980 } 981 addUnwrappedLine(); 982 return; 983 case tok::objc_try: 984 // This branch isn't strictly necessary (the kw_try case below would 985 // do this too after the tok::at is parsed above). But be explicit. 986 parseTryCatch(); 987 return; 988 default: 989 break; 990 } 991 break; 992 case tok::kw_asm: 993 nextToken(); 994 if (FormatTok->is(tok::l_brace)) { 995 FormatTok->Type = TT_InlineASMBrace; 996 nextToken(); 997 while (FormatTok && FormatTok->isNot(tok::eof)) { 998 if (FormatTok->is(tok::r_brace)) { 999 FormatTok->Type = TT_InlineASMBrace; 1000 nextToken(); 1001 addUnwrappedLine(); 1002 break; 1003 } 1004 FormatTok->Finalized = true; 1005 nextToken(); 1006 } 1007 } 1008 break; 1009 case tok::kw_namespace: 1010 parseNamespace(); 1011 return; 1012 case tok::kw_inline: 1013 nextToken(); 1014 if (FormatTok->Tok.is(tok::kw_namespace)) { 1015 parseNamespace(); 1016 return; 1017 } 1018 break; 1019 case tok::kw_public: 1020 case tok::kw_protected: 1021 case tok::kw_private: 1022 if (Style.Language == FormatStyle::LK_Java || 1023 Style.Language == FormatStyle::LK_JavaScript) 1024 nextToken(); 1025 else 1026 parseAccessSpecifier(); 1027 return; 1028 case tok::kw_if: 1029 parseIfThenElse(); 1030 return; 1031 case tok::kw_for: 1032 case tok::kw_while: 1033 parseForOrWhileLoop(); 1034 return; 1035 case tok::kw_do: 1036 parseDoWhile(); 1037 return; 1038 case tok::kw_switch: 1039 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1040 // 'switch: string' field declaration. 1041 break; 1042 parseSwitch(); 1043 return; 1044 case tok::kw_default: 1045 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1046 // 'default: string' field declaration. 1047 break; 1048 nextToken(); 1049 parseLabel(); 1050 return; 1051 case tok::kw_case: 1052 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1053 // 'case: string' field declaration. 1054 break; 1055 parseCaseLabel(); 1056 return; 1057 case tok::kw_try: 1058 case tok::kw___try: 1059 parseTryCatch(); 1060 return; 1061 case tok::kw_extern: 1062 nextToken(); 1063 if (FormatTok->Tok.is(tok::string_literal)) { 1064 nextToken(); 1065 if (FormatTok->Tok.is(tok::l_brace)) { 1066 if (Style.BraceWrapping.AfterExternBlock) { 1067 addUnwrappedLine(); 1068 parseBlock(/*MustBeDeclaration=*/true); 1069 } else { 1070 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1071 } 1072 addUnwrappedLine(); 1073 return; 1074 } 1075 } 1076 break; 1077 case tok::kw_export: 1078 if (Style.Language == FormatStyle::LK_JavaScript) { 1079 parseJavaScriptEs6ImportExport(); 1080 return; 1081 } 1082 break; 1083 case tok::identifier: 1084 if (FormatTok->is(TT_ForEachMacro)) { 1085 parseForOrWhileLoop(); 1086 return; 1087 } 1088 if (FormatTok->is(TT_MacroBlockBegin)) { 1089 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1090 /*MunchSemi=*/false); 1091 return; 1092 } 1093 if (FormatTok->is(Keywords.kw_import)) { 1094 if (Style.Language == FormatStyle::LK_JavaScript) { 1095 parseJavaScriptEs6ImportExport(); 1096 return; 1097 } 1098 if (Style.Language == FormatStyle::LK_Proto) { 1099 nextToken(); 1100 if (FormatTok->is(tok::kw_public)) 1101 nextToken(); 1102 if (!FormatTok->is(tok::string_literal)) 1103 return; 1104 nextToken(); 1105 if (FormatTok->is(tok::semi)) 1106 nextToken(); 1107 addUnwrappedLine(); 1108 return; 1109 } 1110 } 1111 if (Style.isCpp() && 1112 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1113 Keywords.kw_slots, Keywords.kw_qslots)) { 1114 nextToken(); 1115 if (FormatTok->is(tok::colon)) { 1116 nextToken(); 1117 addUnwrappedLine(); 1118 return; 1119 } 1120 } 1121 // In all other cases, parse the declaration. 1122 break; 1123 default: 1124 break; 1125 } 1126 do { 1127 const FormatToken *Previous = FormatTok->Previous; 1128 switch (FormatTok->Tok.getKind()) { 1129 case tok::at: 1130 nextToken(); 1131 if (FormatTok->Tok.is(tok::l_brace)) { 1132 nextToken(); 1133 parseBracedList(); 1134 } 1135 break; 1136 case tok::kw_enum: 1137 // Ignore if this is part of "template <enum ...". 1138 if (Previous && Previous->is(tok::less)) { 1139 nextToken(); 1140 break; 1141 } 1142 1143 // parseEnum falls through and does not yet add an unwrapped line as an 1144 // enum definition can start a structural element. 1145 if (!parseEnum()) 1146 break; 1147 // This only applies for C++. 1148 if (!Style.isCpp()) { 1149 addUnwrappedLine(); 1150 return; 1151 } 1152 break; 1153 case tok::kw_typedef: 1154 nextToken(); 1155 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1156 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1157 parseEnum(); 1158 break; 1159 case tok::kw_struct: 1160 case tok::kw_union: 1161 case tok::kw_class: 1162 // parseRecord falls through and does not yet add an unwrapped line as a 1163 // record declaration or definition can start a structural element. 1164 parseRecord(); 1165 // This does not apply for Java and JavaScript. 1166 if (Style.Language == FormatStyle::LK_Java || 1167 Style.Language == FormatStyle::LK_JavaScript) { 1168 if (FormatTok->is(tok::semi)) 1169 nextToken(); 1170 addUnwrappedLine(); 1171 return; 1172 } 1173 break; 1174 case tok::period: 1175 nextToken(); 1176 // In Java, classes have an implicit static member "class". 1177 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1178 FormatTok->is(tok::kw_class)) 1179 nextToken(); 1180 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1181 FormatTok->Tok.getIdentifierInfo()) 1182 // JavaScript only has pseudo keywords, all keywords are allowed to 1183 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1184 nextToken(); 1185 break; 1186 case tok::semi: 1187 nextToken(); 1188 addUnwrappedLine(); 1189 return; 1190 case tok::r_brace: 1191 addUnwrappedLine(); 1192 return; 1193 case tok::l_paren: 1194 parseParens(); 1195 break; 1196 case tok::kw_operator: 1197 nextToken(); 1198 if (FormatTok->isBinaryOperator()) 1199 nextToken(); 1200 break; 1201 case tok::caret: 1202 nextToken(); 1203 if (FormatTok->Tok.isAnyIdentifier() || 1204 FormatTok->isSimpleTypeSpecifier()) 1205 nextToken(); 1206 if (FormatTok->is(tok::l_paren)) 1207 parseParens(); 1208 if (FormatTok->is(tok::l_brace)) 1209 parseChildBlock(); 1210 break; 1211 case tok::l_brace: 1212 if (!tryToParseBracedList()) { 1213 // A block outside of parentheses must be the last part of a 1214 // structural element. 1215 // FIXME: Figure out cases where this is not true, and add projections 1216 // for them (the one we know is missing are lambdas). 1217 if (Style.BraceWrapping.AfterFunction) 1218 addUnwrappedLine(); 1219 FormatTok->Type = TT_FunctionLBrace; 1220 parseBlock(/*MustBeDeclaration=*/false); 1221 addUnwrappedLine(); 1222 return; 1223 } 1224 // Otherwise this was a braced init list, and the structural 1225 // element continues. 1226 break; 1227 case tok::kw_try: 1228 // We arrive here when parsing function-try blocks. 1229 parseTryCatch(); 1230 return; 1231 case tok::identifier: { 1232 if (FormatTok->is(TT_MacroBlockEnd)) { 1233 addUnwrappedLine(); 1234 return; 1235 } 1236 1237 // Function declarations (as opposed to function expressions) are parsed 1238 // on their own unwrapped line by continuing this loop. Function 1239 // expressions (functions that are not on their own line) must not create 1240 // a new unwrapped line, so they are special cased below. 1241 size_t TokenCount = Line->Tokens.size(); 1242 if (Style.Language == FormatStyle::LK_JavaScript && 1243 FormatTok->is(Keywords.kw_function) && 1244 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1245 Keywords.kw_async)))) { 1246 tryToParseJSFunction(); 1247 break; 1248 } 1249 if ((Style.Language == FormatStyle::LK_JavaScript || 1250 Style.Language == FormatStyle::LK_Java) && 1251 FormatTok->is(Keywords.kw_interface)) { 1252 if (Style.Language == FormatStyle::LK_JavaScript) { 1253 // In JavaScript/TypeScript, "interface" can be used as a standalone 1254 // identifier, e.g. in `var interface = 1;`. If "interface" is 1255 // followed by another identifier, it is very like to be an actual 1256 // interface declaration. 1257 unsigned StoredPosition = Tokens->getPosition(); 1258 FormatToken *Next = Tokens->getNextToken(); 1259 FormatTok = Tokens->setPosition(StoredPosition); 1260 if (Next && !mustBeJSIdent(Keywords, Next)) { 1261 nextToken(); 1262 break; 1263 } 1264 } 1265 parseRecord(); 1266 addUnwrappedLine(); 1267 return; 1268 } 1269 1270 // See if the following token should start a new unwrapped line. 1271 StringRef Text = FormatTok->TokenText; 1272 nextToken(); 1273 if (Line->Tokens.size() == 1 && 1274 // JS doesn't have macros, and within classes colons indicate fields, 1275 // not labels. 1276 Style.Language != FormatStyle::LK_JavaScript) { 1277 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1278 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1279 parseLabel(); 1280 return; 1281 } 1282 // Recognize function-like macro usages without trailing semicolon as 1283 // well as free-standing macros like Q_OBJECT. 1284 bool FunctionLike = FormatTok->is(tok::l_paren); 1285 if (FunctionLike) 1286 parseParens(); 1287 1288 bool FollowedByNewline = 1289 CommentsBeforeNextToken.empty() 1290 ? FormatTok->NewlinesBefore > 0 1291 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1292 1293 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1294 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1295 addUnwrappedLine(); 1296 return; 1297 } 1298 } 1299 break; 1300 } 1301 case tok::equal: 1302 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1303 // TT_JsFatArrow. The always start an expression or a child block if 1304 // followed by a curly. 1305 if (FormatTok->is(TT_JsFatArrow)) { 1306 nextToken(); 1307 if (FormatTok->is(tok::l_brace)) 1308 parseChildBlock(); 1309 break; 1310 } 1311 1312 nextToken(); 1313 if (FormatTok->Tok.is(tok::l_brace)) { 1314 nextToken(); 1315 parseBracedList(); 1316 } else if (Style.Language == FormatStyle::LK_Proto && 1317 FormatTok->Tok.is(tok::less)) { 1318 nextToken(); 1319 parseBracedList(/*ContinueOnSemicolons=*/false, 1320 /*ClosingBraceKind=*/tok::greater); 1321 } 1322 break; 1323 case tok::l_square: 1324 parseSquare(); 1325 break; 1326 case tok::kw_new: 1327 parseNew(); 1328 break; 1329 default: 1330 nextToken(); 1331 break; 1332 } 1333 } while (!eof()); 1334 } 1335 1336 bool UnwrappedLineParser::tryToParseLambda() { 1337 if (!Style.isCpp()) { 1338 nextToken(); 1339 return false; 1340 } 1341 assert(FormatTok->is(tok::l_square)); 1342 FormatToken &LSquare = *FormatTok; 1343 if (!tryToParseLambdaIntroducer()) 1344 return false; 1345 1346 while (FormatTok->isNot(tok::l_brace)) { 1347 if (FormatTok->isSimpleTypeSpecifier()) { 1348 nextToken(); 1349 continue; 1350 } 1351 switch (FormatTok->Tok.getKind()) { 1352 case tok::l_brace: 1353 break; 1354 case tok::l_paren: 1355 parseParens(); 1356 break; 1357 case tok::amp: 1358 case tok::star: 1359 case tok::kw_const: 1360 case tok::comma: 1361 case tok::less: 1362 case tok::greater: 1363 case tok::identifier: 1364 case tok::numeric_constant: 1365 case tok::coloncolon: 1366 case tok::kw_mutable: 1367 nextToken(); 1368 break; 1369 case tok::arrow: 1370 FormatTok->Type = TT_LambdaArrow; 1371 nextToken(); 1372 break; 1373 default: 1374 return true; 1375 } 1376 } 1377 LSquare.Type = TT_LambdaLSquare; 1378 parseChildBlock(); 1379 return true; 1380 } 1381 1382 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1383 const FormatToken *Previous = FormatTok->Previous; 1384 if (Previous && 1385 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1386 tok::kw_delete) || 1387 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1388 Previous->isSimpleTypeSpecifier())) { 1389 nextToken(); 1390 return false; 1391 } 1392 nextToken(); 1393 parseSquare(/*LambdaIntroducer=*/true); 1394 return true; 1395 } 1396 1397 void UnwrappedLineParser::tryToParseJSFunction() { 1398 assert(FormatTok->is(Keywords.kw_function) || 1399 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1400 if (FormatTok->is(Keywords.kw_async)) 1401 nextToken(); 1402 // Consume "function". 1403 nextToken(); 1404 1405 // Consume * (generator function). Treat it like C++'s overloaded operators. 1406 if (FormatTok->is(tok::star)) { 1407 FormatTok->Type = TT_OverloadedOperator; 1408 nextToken(); 1409 } 1410 1411 // Consume function name. 1412 if (FormatTok->is(tok::identifier)) 1413 nextToken(); 1414 1415 if (FormatTok->isNot(tok::l_paren)) 1416 return; 1417 1418 // Parse formal parameter list. 1419 parseParens(); 1420 1421 if (FormatTok->is(tok::colon)) { 1422 // Parse a type definition. 1423 nextToken(); 1424 1425 // Eat the type declaration. For braced inline object types, balance braces, 1426 // otherwise just parse until finding an l_brace for the function body. 1427 if (FormatTok->is(tok::l_brace)) 1428 tryToParseBracedList(); 1429 else 1430 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1431 nextToken(); 1432 } 1433 1434 if (FormatTok->is(tok::semi)) 1435 return; 1436 1437 parseChildBlock(); 1438 } 1439 1440 bool UnwrappedLineParser::tryToParseBracedList() { 1441 if (FormatTok->BlockKind == BK_Unknown) 1442 calculateBraceTypes(); 1443 assert(FormatTok->BlockKind != BK_Unknown); 1444 if (FormatTok->BlockKind == BK_Block) 1445 return false; 1446 nextToken(); 1447 parseBracedList(); 1448 return true; 1449 } 1450 1451 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1452 tok::TokenKind ClosingBraceKind) { 1453 bool HasError = false; 1454 1455 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1456 // replace this by using parseAssigmentExpression() inside. 1457 do { 1458 if (Style.Language == FormatStyle::LK_JavaScript) { 1459 if (FormatTok->is(Keywords.kw_function) || 1460 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1461 tryToParseJSFunction(); 1462 continue; 1463 } 1464 if (FormatTok->is(TT_JsFatArrow)) { 1465 nextToken(); 1466 // Fat arrows can be followed by simple expressions or by child blocks 1467 // in curly braces. 1468 if (FormatTok->is(tok::l_brace)) { 1469 parseChildBlock(); 1470 continue; 1471 } 1472 } 1473 if (FormatTok->is(tok::l_brace)) { 1474 // Could be a method inside of a braced list `{a() { return 1; }}`. 1475 if (tryToParseBracedList()) 1476 continue; 1477 parseChildBlock(); 1478 } 1479 } 1480 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1481 nextToken(); 1482 return !HasError; 1483 } 1484 switch (FormatTok->Tok.getKind()) { 1485 case tok::caret: 1486 nextToken(); 1487 if (FormatTok->is(tok::l_brace)) { 1488 parseChildBlock(); 1489 } 1490 break; 1491 case tok::l_square: 1492 tryToParseLambda(); 1493 break; 1494 case tok::l_paren: 1495 parseParens(); 1496 // JavaScript can just have free standing methods and getters/setters in 1497 // object literals. Detect them by a "{" following ")". 1498 if (Style.Language == FormatStyle::LK_JavaScript) { 1499 if (FormatTok->is(tok::l_brace)) 1500 parseChildBlock(); 1501 break; 1502 } 1503 break; 1504 case tok::l_brace: 1505 // Assume there are no blocks inside a braced init list apart 1506 // from the ones we explicitly parse out (like lambdas). 1507 FormatTok->BlockKind = BK_BracedInit; 1508 nextToken(); 1509 parseBracedList(); 1510 break; 1511 case tok::less: 1512 if (Style.Language == FormatStyle::LK_Proto) { 1513 nextToken(); 1514 parseBracedList(/*ContinueOnSemicolons=*/false, 1515 /*ClosingBraceKind=*/tok::greater); 1516 } else { 1517 nextToken(); 1518 } 1519 break; 1520 case tok::semi: 1521 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1522 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1523 // used for error recovery if we have otherwise determined that this is 1524 // a braced list. 1525 if (Style.Language == FormatStyle::LK_JavaScript) { 1526 nextToken(); 1527 break; 1528 } 1529 HasError = true; 1530 if (!ContinueOnSemicolons) 1531 return !HasError; 1532 nextToken(); 1533 break; 1534 case tok::comma: 1535 nextToken(); 1536 break; 1537 default: 1538 nextToken(); 1539 break; 1540 } 1541 } while (!eof()); 1542 return false; 1543 } 1544 1545 void UnwrappedLineParser::parseParens() { 1546 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1547 nextToken(); 1548 do { 1549 switch (FormatTok->Tok.getKind()) { 1550 case tok::l_paren: 1551 parseParens(); 1552 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1553 parseChildBlock(); 1554 break; 1555 case tok::r_paren: 1556 nextToken(); 1557 return; 1558 case tok::r_brace: 1559 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1560 return; 1561 case tok::l_square: 1562 tryToParseLambda(); 1563 break; 1564 case tok::l_brace: 1565 if (!tryToParseBracedList()) 1566 parseChildBlock(); 1567 break; 1568 case tok::at: 1569 nextToken(); 1570 if (FormatTok->Tok.is(tok::l_brace)) { 1571 nextToken(); 1572 parseBracedList(); 1573 } 1574 break; 1575 case tok::kw_class: 1576 if (Style.Language == FormatStyle::LK_JavaScript) 1577 parseRecord(/*ParseAsExpr=*/true); 1578 else 1579 nextToken(); 1580 break; 1581 case tok::identifier: 1582 if (Style.Language == FormatStyle::LK_JavaScript && 1583 (FormatTok->is(Keywords.kw_function) || 1584 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1585 tryToParseJSFunction(); 1586 else 1587 nextToken(); 1588 break; 1589 default: 1590 nextToken(); 1591 break; 1592 } 1593 } while (!eof()); 1594 } 1595 1596 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1597 if (!LambdaIntroducer) { 1598 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1599 if (tryToParseLambda()) 1600 return; 1601 } 1602 do { 1603 switch (FormatTok->Tok.getKind()) { 1604 case tok::l_paren: 1605 parseParens(); 1606 break; 1607 case tok::r_square: 1608 nextToken(); 1609 return; 1610 case tok::r_brace: 1611 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1612 return; 1613 case tok::l_square: 1614 parseSquare(); 1615 break; 1616 case tok::l_brace: { 1617 if (!tryToParseBracedList()) 1618 parseChildBlock(); 1619 break; 1620 } 1621 case tok::at: 1622 nextToken(); 1623 if (FormatTok->Tok.is(tok::l_brace)) { 1624 nextToken(); 1625 parseBracedList(); 1626 } 1627 break; 1628 default: 1629 nextToken(); 1630 break; 1631 } 1632 } while (!eof()); 1633 } 1634 1635 void UnwrappedLineParser::parseIfThenElse() { 1636 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1637 nextToken(); 1638 if (FormatTok->Tok.is(tok::kw_constexpr)) 1639 nextToken(); 1640 if (FormatTok->Tok.is(tok::l_paren)) 1641 parseParens(); 1642 bool NeedsUnwrappedLine = false; 1643 if (FormatTok->Tok.is(tok::l_brace)) { 1644 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1645 parseBlock(/*MustBeDeclaration=*/false); 1646 if (Style.BraceWrapping.BeforeElse) 1647 addUnwrappedLine(); 1648 else 1649 NeedsUnwrappedLine = true; 1650 } else { 1651 addUnwrappedLine(); 1652 ++Line->Level; 1653 parseStructuralElement(); 1654 --Line->Level; 1655 } 1656 if (FormatTok->Tok.is(tok::kw_else)) { 1657 nextToken(); 1658 if (FormatTok->Tok.is(tok::l_brace)) { 1659 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1660 parseBlock(/*MustBeDeclaration=*/false); 1661 addUnwrappedLine(); 1662 } else if (FormatTok->Tok.is(tok::kw_if)) { 1663 parseIfThenElse(); 1664 } else { 1665 addUnwrappedLine(); 1666 ++Line->Level; 1667 parseStructuralElement(); 1668 if (FormatTok->is(tok::eof)) 1669 addUnwrappedLine(); 1670 --Line->Level; 1671 } 1672 } else if (NeedsUnwrappedLine) { 1673 addUnwrappedLine(); 1674 } 1675 } 1676 1677 void UnwrappedLineParser::parseTryCatch() { 1678 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1679 nextToken(); 1680 bool NeedsUnwrappedLine = false; 1681 if (FormatTok->is(tok::colon)) { 1682 // We are in a function try block, what comes is an initializer list. 1683 nextToken(); 1684 while (FormatTok->is(tok::identifier)) { 1685 nextToken(); 1686 if (FormatTok->is(tok::l_paren)) 1687 parseParens(); 1688 if (FormatTok->is(tok::comma)) 1689 nextToken(); 1690 } 1691 } 1692 // Parse try with resource. 1693 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1694 parseParens(); 1695 } 1696 if (FormatTok->is(tok::l_brace)) { 1697 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1698 parseBlock(/*MustBeDeclaration=*/false); 1699 if (Style.BraceWrapping.BeforeCatch) { 1700 addUnwrappedLine(); 1701 } else { 1702 NeedsUnwrappedLine = true; 1703 } 1704 } else if (!FormatTok->is(tok::kw_catch)) { 1705 // The C++ standard requires a compound-statement after a try. 1706 // If there's none, we try to assume there's a structuralElement 1707 // and try to continue. 1708 addUnwrappedLine(); 1709 ++Line->Level; 1710 parseStructuralElement(); 1711 --Line->Level; 1712 } 1713 while (1) { 1714 if (FormatTok->is(tok::at)) 1715 nextToken(); 1716 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1717 tok::kw___finally) || 1718 ((Style.Language == FormatStyle::LK_Java || 1719 Style.Language == FormatStyle::LK_JavaScript) && 1720 FormatTok->is(Keywords.kw_finally)) || 1721 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1722 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1723 break; 1724 nextToken(); 1725 while (FormatTok->isNot(tok::l_brace)) { 1726 if (FormatTok->is(tok::l_paren)) { 1727 parseParens(); 1728 continue; 1729 } 1730 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1731 return; 1732 nextToken(); 1733 } 1734 NeedsUnwrappedLine = false; 1735 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1736 parseBlock(/*MustBeDeclaration=*/false); 1737 if (Style.BraceWrapping.BeforeCatch) 1738 addUnwrappedLine(); 1739 else 1740 NeedsUnwrappedLine = true; 1741 } 1742 if (NeedsUnwrappedLine) 1743 addUnwrappedLine(); 1744 } 1745 1746 void UnwrappedLineParser::parseNamespace() { 1747 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1748 1749 const FormatToken &InitialToken = *FormatTok; 1750 nextToken(); 1751 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1752 nextToken(); 1753 if (FormatTok->Tok.is(tok::l_brace)) { 1754 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1755 addUnwrappedLine(); 1756 1757 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1758 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1759 DeclarationScopeStack.size() > 1); 1760 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1761 // Munch the semicolon after a namespace. This is more common than one would 1762 // think. Puttin the semicolon into its own line is very ugly. 1763 if (FormatTok->Tok.is(tok::semi)) 1764 nextToken(); 1765 addUnwrappedLine(); 1766 } 1767 // FIXME: Add error handling. 1768 } 1769 1770 void UnwrappedLineParser::parseNew() { 1771 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1772 nextToken(); 1773 if (Style.Language != FormatStyle::LK_Java) 1774 return; 1775 1776 // In Java, we can parse everything up to the parens, which aren't optional. 1777 do { 1778 // There should not be a ;, { or } before the new's open paren. 1779 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1780 return; 1781 1782 // Consume the parens. 1783 if (FormatTok->is(tok::l_paren)) { 1784 parseParens(); 1785 1786 // If there is a class body of an anonymous class, consume that as child. 1787 if (FormatTok->is(tok::l_brace)) 1788 parseChildBlock(); 1789 return; 1790 } 1791 nextToken(); 1792 } while (!eof()); 1793 } 1794 1795 void UnwrappedLineParser::parseForOrWhileLoop() { 1796 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1797 "'for', 'while' or foreach macro expected"); 1798 nextToken(); 1799 // JS' for await ( ... 1800 if (Style.Language == FormatStyle::LK_JavaScript && 1801 FormatTok->is(Keywords.kw_await)) 1802 nextToken(); 1803 if (FormatTok->Tok.is(tok::l_paren)) 1804 parseParens(); 1805 if (FormatTok->Tok.is(tok::l_brace)) { 1806 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1807 parseBlock(/*MustBeDeclaration=*/false); 1808 addUnwrappedLine(); 1809 } else { 1810 addUnwrappedLine(); 1811 ++Line->Level; 1812 parseStructuralElement(); 1813 --Line->Level; 1814 } 1815 } 1816 1817 void UnwrappedLineParser::parseDoWhile() { 1818 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1819 nextToken(); 1820 if (FormatTok->Tok.is(tok::l_brace)) { 1821 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1822 parseBlock(/*MustBeDeclaration=*/false); 1823 if (Style.BraceWrapping.IndentBraces) 1824 addUnwrappedLine(); 1825 } else { 1826 addUnwrappedLine(); 1827 ++Line->Level; 1828 parseStructuralElement(); 1829 --Line->Level; 1830 } 1831 1832 // FIXME: Add error handling. 1833 if (!FormatTok->Tok.is(tok::kw_while)) { 1834 addUnwrappedLine(); 1835 return; 1836 } 1837 1838 nextToken(); 1839 parseStructuralElement(); 1840 } 1841 1842 void UnwrappedLineParser::parseLabel() { 1843 nextToken(); 1844 unsigned OldLineLevel = Line->Level; 1845 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1846 --Line->Level; 1847 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1848 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1849 parseBlock(/*MustBeDeclaration=*/false); 1850 if (FormatTok->Tok.is(tok::kw_break)) { 1851 if (Style.BraceWrapping.AfterControlStatement) 1852 addUnwrappedLine(); 1853 parseStructuralElement(); 1854 } 1855 addUnwrappedLine(); 1856 } else { 1857 if (FormatTok->is(tok::semi)) 1858 nextToken(); 1859 addUnwrappedLine(); 1860 } 1861 Line->Level = OldLineLevel; 1862 if (FormatTok->isNot(tok::l_brace)) { 1863 parseStructuralElement(); 1864 addUnwrappedLine(); 1865 } 1866 } 1867 1868 void UnwrappedLineParser::parseCaseLabel() { 1869 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1870 // FIXME: fix handling of complex expressions here. 1871 do { 1872 nextToken(); 1873 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1874 parseLabel(); 1875 } 1876 1877 void UnwrappedLineParser::parseSwitch() { 1878 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1879 nextToken(); 1880 if (FormatTok->Tok.is(tok::l_paren)) 1881 parseParens(); 1882 if (FormatTok->Tok.is(tok::l_brace)) { 1883 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1884 parseBlock(/*MustBeDeclaration=*/false); 1885 addUnwrappedLine(); 1886 } else { 1887 addUnwrappedLine(); 1888 ++Line->Level; 1889 parseStructuralElement(); 1890 --Line->Level; 1891 } 1892 } 1893 1894 void UnwrappedLineParser::parseAccessSpecifier() { 1895 nextToken(); 1896 // Understand Qt's slots. 1897 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1898 nextToken(); 1899 // Otherwise, we don't know what it is, and we'd better keep the next token. 1900 if (FormatTok->Tok.is(tok::colon)) 1901 nextToken(); 1902 addUnwrappedLine(); 1903 } 1904 1905 bool UnwrappedLineParser::parseEnum() { 1906 // Won't be 'enum' for NS_ENUMs. 1907 if (FormatTok->Tok.is(tok::kw_enum)) 1908 nextToken(); 1909 1910 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1911 // declarations. An "enum" keyword followed by a colon would be a syntax 1912 // error and thus assume it is just an identifier. 1913 if (Style.Language == FormatStyle::LK_JavaScript && 1914 FormatTok->isOneOf(tok::colon, tok::question)) 1915 return false; 1916 1917 // Eat up enum class ... 1918 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1919 nextToken(); 1920 1921 while (FormatTok->Tok.getIdentifierInfo() || 1922 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1923 tok::greater, tok::comma, tok::question)) { 1924 nextToken(); 1925 // We can have macros or attributes in between 'enum' and the enum name. 1926 if (FormatTok->is(tok::l_paren)) 1927 parseParens(); 1928 if (FormatTok->is(tok::identifier)) { 1929 nextToken(); 1930 // If there are two identifiers in a row, this is likely an elaborate 1931 // return type. In Java, this can be "implements", etc. 1932 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1933 return false; 1934 } 1935 } 1936 1937 // Just a declaration or something is wrong. 1938 if (FormatTok->isNot(tok::l_brace)) 1939 return true; 1940 FormatTok->BlockKind = BK_Block; 1941 1942 if (Style.Language == FormatStyle::LK_Java) { 1943 // Java enums are different. 1944 parseJavaEnumBody(); 1945 return true; 1946 } 1947 if (Style.Language == FormatStyle::LK_Proto) { 1948 parseBlock(/*MustBeDeclaration=*/true); 1949 return true; 1950 } 1951 1952 // Parse enum body. 1953 nextToken(); 1954 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1955 if (HasError) { 1956 if (FormatTok->is(tok::semi)) 1957 nextToken(); 1958 addUnwrappedLine(); 1959 } 1960 return true; 1961 1962 // There is no addUnwrappedLine() here so that we fall through to parsing a 1963 // structural element afterwards. Thus, in "enum A {} n, m;", 1964 // "} n, m;" will end up in one unwrapped line. 1965 } 1966 1967 void UnwrappedLineParser::parseJavaEnumBody() { 1968 // Determine whether the enum is simple, i.e. does not have a semicolon or 1969 // constants with class bodies. Simple enums can be formatted like braced 1970 // lists, contracted to a single line, etc. 1971 unsigned StoredPosition = Tokens->getPosition(); 1972 bool IsSimple = true; 1973 FormatToken *Tok = Tokens->getNextToken(); 1974 while (Tok) { 1975 if (Tok->is(tok::r_brace)) 1976 break; 1977 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1978 IsSimple = false; 1979 break; 1980 } 1981 // FIXME: This will also mark enums with braces in the arguments to enum 1982 // constants as "not simple". This is probably fine in practice, though. 1983 Tok = Tokens->getNextToken(); 1984 } 1985 FormatTok = Tokens->setPosition(StoredPosition); 1986 1987 if (IsSimple) { 1988 nextToken(); 1989 parseBracedList(); 1990 addUnwrappedLine(); 1991 return; 1992 } 1993 1994 // Parse the body of a more complex enum. 1995 // First add a line for everything up to the "{". 1996 nextToken(); 1997 addUnwrappedLine(); 1998 ++Line->Level; 1999 2000 // Parse the enum constants. 2001 while (FormatTok) { 2002 if (FormatTok->is(tok::l_brace)) { 2003 // Parse the constant's class body. 2004 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2005 /*MunchSemi=*/false); 2006 } else if (FormatTok->is(tok::l_paren)) { 2007 parseParens(); 2008 } else if (FormatTok->is(tok::comma)) { 2009 nextToken(); 2010 addUnwrappedLine(); 2011 } else if (FormatTok->is(tok::semi)) { 2012 nextToken(); 2013 addUnwrappedLine(); 2014 break; 2015 } else if (FormatTok->is(tok::r_brace)) { 2016 addUnwrappedLine(); 2017 break; 2018 } else { 2019 nextToken(); 2020 } 2021 } 2022 2023 // Parse the class body after the enum's ";" if any. 2024 parseLevel(/*HasOpeningBrace=*/true); 2025 nextToken(); 2026 --Line->Level; 2027 addUnwrappedLine(); 2028 } 2029 2030 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2031 const FormatToken &InitialToken = *FormatTok; 2032 nextToken(); 2033 2034 // The actual identifier can be a nested name specifier, and in macros 2035 // it is often token-pasted. 2036 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2037 tok::kw___attribute, tok::kw___declspec, 2038 tok::kw_alignas) || 2039 ((Style.Language == FormatStyle::LK_Java || 2040 Style.Language == FormatStyle::LK_JavaScript) && 2041 FormatTok->isOneOf(tok::period, tok::comma))) { 2042 if (Style.Language == FormatStyle::LK_JavaScript && 2043 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2044 // JavaScript/TypeScript supports inline object types in 2045 // extends/implements positions: 2046 // class Foo implements {bar: number} { } 2047 nextToken(); 2048 if (FormatTok->is(tok::l_brace)) { 2049 tryToParseBracedList(); 2050 continue; 2051 } 2052 } 2053 bool IsNonMacroIdentifier = 2054 FormatTok->is(tok::identifier) && 2055 FormatTok->TokenText != FormatTok->TokenText.upper(); 2056 nextToken(); 2057 // We can have macros or attributes in between 'class' and the class name. 2058 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2059 parseParens(); 2060 } 2061 2062 // Note that parsing away template declarations here leads to incorrectly 2063 // accepting function declarations as record declarations. 2064 // In general, we cannot solve this problem. Consider: 2065 // class A<int> B() {} 2066 // which can be a function definition or a class definition when B() is a 2067 // macro. If we find enough real-world cases where this is a problem, we 2068 // can parse for the 'template' keyword in the beginning of the statement, 2069 // and thus rule out the record production in case there is no template 2070 // (this would still leave us with an ambiguity between template function 2071 // and class declarations). 2072 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2073 while (!eof()) { 2074 if (FormatTok->is(tok::l_brace)) { 2075 calculateBraceTypes(/*ExpectClassBody=*/true); 2076 if (!tryToParseBracedList()) 2077 break; 2078 } 2079 if (FormatTok->Tok.is(tok::semi)) 2080 return; 2081 nextToken(); 2082 } 2083 } 2084 if (FormatTok->Tok.is(tok::l_brace)) { 2085 if (ParseAsExpr) { 2086 parseChildBlock(); 2087 } else { 2088 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2089 addUnwrappedLine(); 2090 2091 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2092 /*MunchSemi=*/false); 2093 } 2094 } 2095 // There is no addUnwrappedLine() here so that we fall through to parsing a 2096 // structural element afterwards. Thus, in "class A {} n, m;", 2097 // "} n, m;" will end up in one unwrapped line. 2098 } 2099 2100 void UnwrappedLineParser::parseObjCProtocolList() { 2101 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2102 do 2103 nextToken(); 2104 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2105 nextToken(); // Skip '>'. 2106 } 2107 2108 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2109 do { 2110 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2111 nextToken(); 2112 addUnwrappedLine(); 2113 break; 2114 } 2115 if (FormatTok->is(tok::l_brace)) { 2116 parseBlock(/*MustBeDeclaration=*/false); 2117 // In ObjC interfaces, nothing should be following the "}". 2118 addUnwrappedLine(); 2119 } else if (FormatTok->is(tok::r_brace)) { 2120 // Ignore stray "}". parseStructuralElement doesn't consume them. 2121 nextToken(); 2122 addUnwrappedLine(); 2123 } else { 2124 parseStructuralElement(); 2125 } 2126 } while (!eof()); 2127 } 2128 2129 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2130 nextToken(); 2131 nextToken(); // interface name 2132 2133 // @interface can be followed by either a base class, or a category. 2134 if (FormatTok->Tok.is(tok::colon)) { 2135 nextToken(); 2136 nextToken(); // base class name 2137 } else if (FormatTok->Tok.is(tok::l_paren)) 2138 // Skip category, if present. 2139 parseParens(); 2140 2141 if (FormatTok->Tok.is(tok::less)) 2142 parseObjCProtocolList(); 2143 2144 if (FormatTok->Tok.is(tok::l_brace)) { 2145 if (Style.BraceWrapping.AfterObjCDeclaration) 2146 addUnwrappedLine(); 2147 parseBlock(/*MustBeDeclaration=*/true); 2148 } 2149 2150 // With instance variables, this puts '}' on its own line. Without instance 2151 // variables, this ends the @interface line. 2152 addUnwrappedLine(); 2153 2154 parseObjCUntilAtEnd(); 2155 } 2156 2157 void UnwrappedLineParser::parseObjCProtocol() { 2158 nextToken(); 2159 nextToken(); // protocol name 2160 2161 if (FormatTok->Tok.is(tok::less)) 2162 parseObjCProtocolList(); 2163 2164 // Check for protocol declaration. 2165 if (FormatTok->Tok.is(tok::semi)) { 2166 nextToken(); 2167 return addUnwrappedLine(); 2168 } 2169 2170 addUnwrappedLine(); 2171 parseObjCUntilAtEnd(); 2172 } 2173 2174 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2175 bool IsImport = FormatTok->is(Keywords.kw_import); 2176 assert(IsImport || FormatTok->is(tok::kw_export)); 2177 nextToken(); 2178 2179 // Consume the "default" in "export default class/function". 2180 if (FormatTok->is(tok::kw_default)) 2181 nextToken(); 2182 2183 // Consume "async function", "function" and "default function", so that these 2184 // get parsed as free-standing JS functions, i.e. do not require a trailing 2185 // semicolon. 2186 if (FormatTok->is(Keywords.kw_async)) 2187 nextToken(); 2188 if (FormatTok->is(Keywords.kw_function)) { 2189 nextToken(); 2190 return; 2191 } 2192 2193 // For imports, `export *`, `export {...}`, consume the rest of the line up 2194 // to the terminating `;`. For everything else, just return and continue 2195 // parsing the structural element, i.e. the declaration or expression for 2196 // `export default`. 2197 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2198 !FormatTok->isStringLiteral()) 2199 return; 2200 2201 while (!eof()) { 2202 if (FormatTok->is(tok::semi)) 2203 return; 2204 if (Line->Tokens.empty()) { 2205 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2206 // import statement should terminate. 2207 return; 2208 } 2209 if (FormatTok->is(tok::l_brace)) { 2210 FormatTok->BlockKind = BK_Block; 2211 nextToken(); 2212 parseBracedList(); 2213 } else { 2214 nextToken(); 2215 } 2216 } 2217 } 2218 2219 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2220 StringRef Prefix = "") { 2221 llvm::dbgs() << Prefix << "Line(" << Line.Level 2222 << ", FSC=" << Line.FirstStartColumn << ")" 2223 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2224 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2225 E = Line.Tokens.end(); 2226 I != E; ++I) { 2227 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2228 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2229 << "] "; 2230 } 2231 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2232 E = Line.Tokens.end(); 2233 I != E; ++I) { 2234 const UnwrappedLineNode &Node = *I; 2235 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2236 I = Node.Children.begin(), 2237 E = Node.Children.end(); 2238 I != E; ++I) { 2239 printDebugInfo(*I, "\nChild: "); 2240 } 2241 } 2242 llvm::dbgs() << "\n"; 2243 } 2244 2245 void UnwrappedLineParser::addUnwrappedLine() { 2246 if (Line->Tokens.empty()) 2247 return; 2248 DEBUG({ 2249 if (CurrentLines == &Lines) 2250 printDebugInfo(*Line); 2251 }); 2252 CurrentLines->push_back(std::move(*Line)); 2253 Line->Tokens.clear(); 2254 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2255 Line->FirstStartColumn = 0; 2256 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2257 CurrentLines->append( 2258 std::make_move_iterator(PreprocessorDirectives.begin()), 2259 std::make_move_iterator(PreprocessorDirectives.end())); 2260 PreprocessorDirectives.clear(); 2261 } 2262 // Disconnect the current token from the last token on the previous line. 2263 FormatTok->Previous = nullptr; 2264 } 2265 2266 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2267 2268 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2269 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2270 FormatTok.NewlinesBefore > 0; 2271 } 2272 2273 // Checks if \p FormatTok is a line comment that continues the line comment 2274 // section on \p Line. 2275 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2276 const UnwrappedLine &Line, 2277 llvm::Regex &CommentPragmasRegex) { 2278 if (Line.Tokens.empty()) 2279 return false; 2280 2281 StringRef IndentContent = FormatTok.TokenText; 2282 if (FormatTok.TokenText.startswith("//") || 2283 FormatTok.TokenText.startswith("/*")) 2284 IndentContent = FormatTok.TokenText.substr(2); 2285 if (CommentPragmasRegex.match(IndentContent)) 2286 return false; 2287 2288 // If Line starts with a line comment, then FormatTok continues the comment 2289 // section if its original column is greater or equal to the original start 2290 // column of the line. 2291 // 2292 // Define the min column token of a line as follows: if a line ends in '{' or 2293 // contains a '{' followed by a line comment, then the min column token is 2294 // that '{'. Otherwise, the min column token of the line is the first token of 2295 // the line. 2296 // 2297 // If Line starts with a token other than a line comment, then FormatTok 2298 // continues the comment section if its original column is greater than the 2299 // original start column of the min column token of the line. 2300 // 2301 // For example, the second line comment continues the first in these cases: 2302 // 2303 // // first line 2304 // // second line 2305 // 2306 // and: 2307 // 2308 // // first line 2309 // // second line 2310 // 2311 // and: 2312 // 2313 // int i; // first line 2314 // // second line 2315 // 2316 // and: 2317 // 2318 // do { // first line 2319 // // second line 2320 // int i; 2321 // } while (true); 2322 // 2323 // and: 2324 // 2325 // enum { 2326 // a, // first line 2327 // // second line 2328 // b 2329 // }; 2330 // 2331 // The second line comment doesn't continue the first in these cases: 2332 // 2333 // // first line 2334 // // second line 2335 // 2336 // and: 2337 // 2338 // int i; // first line 2339 // // second line 2340 // 2341 // and: 2342 // 2343 // do { // first line 2344 // // second line 2345 // int i; 2346 // } while (true); 2347 // 2348 // and: 2349 // 2350 // enum { 2351 // a, // first line 2352 // // second line 2353 // }; 2354 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2355 2356 // Scan for '{//'. If found, use the column of '{' as a min column for line 2357 // comment section continuation. 2358 const FormatToken *PreviousToken = nullptr; 2359 for (const UnwrappedLineNode &Node : Line.Tokens) { 2360 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2361 isLineComment(*Node.Tok)) { 2362 MinColumnToken = PreviousToken; 2363 break; 2364 } 2365 PreviousToken = Node.Tok; 2366 2367 // Grab the last newline preceding a token in this unwrapped line. 2368 if (Node.Tok->NewlinesBefore > 0) { 2369 MinColumnToken = Node.Tok; 2370 } 2371 } 2372 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2373 MinColumnToken = PreviousToken; 2374 } 2375 2376 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2377 MinColumnToken); 2378 } 2379 2380 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2381 bool JustComments = Line->Tokens.empty(); 2382 for (SmallVectorImpl<FormatToken *>::const_iterator 2383 I = CommentsBeforeNextToken.begin(), 2384 E = CommentsBeforeNextToken.end(); 2385 I != E; ++I) { 2386 // Line comments that belong to the same line comment section are put on the 2387 // same line since later we might want to reflow content between them. 2388 // Additional fine-grained breaking of line comment sections is controlled 2389 // by the class BreakableLineCommentSection in case it is desirable to keep 2390 // several line comment sections in the same unwrapped line. 2391 // 2392 // FIXME: Consider putting separate line comment sections as children to the 2393 // unwrapped line instead. 2394 (*I)->ContinuesLineCommentSection = 2395 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2396 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2397 addUnwrappedLine(); 2398 pushToken(*I); 2399 } 2400 if (NewlineBeforeNext && JustComments) 2401 addUnwrappedLine(); 2402 CommentsBeforeNextToken.clear(); 2403 } 2404 2405 void UnwrappedLineParser::nextToken(int LevelDifference) { 2406 if (eof()) 2407 return; 2408 flushComments(isOnNewLine(*FormatTok)); 2409 pushToken(FormatTok); 2410 FormatToken *Previous = FormatTok; 2411 if (Style.Language != FormatStyle::LK_JavaScript) 2412 readToken(LevelDifference); 2413 else 2414 readTokenWithJavaScriptASI(); 2415 FormatTok->Previous = Previous; 2416 } 2417 2418 void UnwrappedLineParser::distributeComments( 2419 const SmallVectorImpl<FormatToken *> &Comments, 2420 const FormatToken *NextTok) { 2421 // Whether or not a line comment token continues a line is controlled by 2422 // the method continuesLineCommentSection, with the following caveat: 2423 // 2424 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2425 // that each comment line from the trail is aligned with the next token, if 2426 // the next token exists. If a trail exists, the beginning of the maximal 2427 // trail is marked as a start of a new comment section. 2428 // 2429 // For example in this code: 2430 // 2431 // int a; // line about a 2432 // // line 1 about b 2433 // // line 2 about b 2434 // int b; 2435 // 2436 // the two lines about b form a maximal trail, so there are two sections, the 2437 // first one consisting of the single comment "// line about a" and the 2438 // second one consisting of the next two comments. 2439 if (Comments.empty()) 2440 return; 2441 bool ShouldPushCommentsInCurrentLine = true; 2442 bool HasTrailAlignedWithNextToken = false; 2443 unsigned StartOfTrailAlignedWithNextToken = 0; 2444 if (NextTok) { 2445 // We are skipping the first element intentionally. 2446 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2447 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2448 HasTrailAlignedWithNextToken = true; 2449 StartOfTrailAlignedWithNextToken = i; 2450 } 2451 } 2452 } 2453 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2454 FormatToken *FormatTok = Comments[i]; 2455 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2456 FormatTok->ContinuesLineCommentSection = false; 2457 } else { 2458 FormatTok->ContinuesLineCommentSection = 2459 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2460 } 2461 if (!FormatTok->ContinuesLineCommentSection && 2462 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2463 ShouldPushCommentsInCurrentLine = false; 2464 } 2465 if (ShouldPushCommentsInCurrentLine) { 2466 pushToken(FormatTok); 2467 } else { 2468 CommentsBeforeNextToken.push_back(FormatTok); 2469 } 2470 } 2471 } 2472 2473 void UnwrappedLineParser::readToken(int LevelDifference) { 2474 SmallVector<FormatToken *, 1> Comments; 2475 do { 2476 FormatTok = Tokens->getNextToken(); 2477 assert(FormatTok); 2478 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2479 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2480 distributeComments(Comments, FormatTok); 2481 Comments.clear(); 2482 // If there is an unfinished unwrapped line, we flush the preprocessor 2483 // directives only after that unwrapped line was finished later. 2484 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2485 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2486 assert((LevelDifference >= 0 || 2487 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2488 "LevelDifference makes Line->Level negative"); 2489 Line->Level += LevelDifference; 2490 // Comments stored before the preprocessor directive need to be output 2491 // before the preprocessor directive, at the same level as the 2492 // preprocessor directive, as we consider them to apply to the directive. 2493 flushComments(isOnNewLine(*FormatTok)); 2494 parsePPDirective(); 2495 } 2496 while (FormatTok->Type == TT_ConflictStart || 2497 FormatTok->Type == TT_ConflictEnd || 2498 FormatTok->Type == TT_ConflictAlternative) { 2499 if (FormatTok->Type == TT_ConflictStart) { 2500 conditionalCompilationStart(/*Unreachable=*/false); 2501 } else if (FormatTok->Type == TT_ConflictAlternative) { 2502 conditionalCompilationAlternative(); 2503 } else if (FormatTok->Type == TT_ConflictEnd) { 2504 conditionalCompilationEnd(); 2505 } 2506 FormatTok = Tokens->getNextToken(); 2507 FormatTok->MustBreakBefore = true; 2508 } 2509 2510 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2511 !Line->InPPDirective) { 2512 continue; 2513 } 2514 2515 if (!FormatTok->Tok.is(tok::comment)) { 2516 distributeComments(Comments, FormatTok); 2517 Comments.clear(); 2518 return; 2519 } 2520 2521 Comments.push_back(FormatTok); 2522 } while (!eof()); 2523 2524 distributeComments(Comments, nullptr); 2525 Comments.clear(); 2526 } 2527 2528 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2529 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2530 if (MustBreakBeforeNextToken) { 2531 Line->Tokens.back().Tok->MustBreakBefore = true; 2532 MustBreakBeforeNextToken = false; 2533 } 2534 } 2535 2536 } // end namespace format 2537 } // end namespace clang 2538