1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #include <algorithm> 22 23 #define DEBUG_TYPE "format-parser" 24 25 namespace clang { 26 namespace format { 27 28 class FormatTokenSource { 29 public: 30 virtual ~FormatTokenSource() {} 31 virtual FormatToken *getNextToken() = 0; 32 33 virtual unsigned getPosition() = 0; 34 virtual FormatToken *setPosition(unsigned Position) = 0; 35 }; 36 37 namespace { 38 39 class ScopedDeclarationState { 40 public: 41 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 42 bool MustBeDeclaration) 43 : Line(Line), Stack(Stack) { 44 Line.MustBeDeclaration = MustBeDeclaration; 45 Stack.push_back(MustBeDeclaration); 46 } 47 ~ScopedDeclarationState() { 48 Stack.pop_back(); 49 if (!Stack.empty()) 50 Line.MustBeDeclaration = Stack.back(); 51 else 52 Line.MustBeDeclaration = true; 53 } 54 55 private: 56 UnwrappedLine &Line; 57 std::vector<bool> &Stack; 58 }; 59 60 static bool isLineComment(const FormatToken &FormatTok) { 61 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 62 } 63 64 // Checks if \p FormatTok is a line comment that continues the line comment 65 // \p Previous. The original column of \p MinColumnToken is used to determine 66 // whether \p FormatTok is indented enough to the right to continue \p Previous. 67 static bool continuesLineComment(const FormatToken &FormatTok, 68 const FormatToken *Previous, 69 const FormatToken *MinColumnToken) { 70 if (!Previous || !MinColumnToken) 71 return false; 72 unsigned MinContinueColumn = 73 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 74 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 75 isLineComment(*Previous) && 76 FormatTok.OriginalColumn >= MinContinueColumn; 77 } 78 79 class ScopedMacroState : public FormatTokenSource { 80 public: 81 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 82 FormatToken *&ResetToken) 83 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 84 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 85 Token(nullptr), PreviousToken(nullptr) { 86 TokenSource = this; 87 Line.Level = 0; 88 Line.InPPDirective = true; 89 } 90 91 ~ScopedMacroState() override { 92 TokenSource = PreviousTokenSource; 93 ResetToken = Token; 94 Line.InPPDirective = false; 95 Line.Level = PreviousLineLevel; 96 } 97 98 FormatToken *getNextToken() override { 99 // The \c UnwrappedLineParser guards against this by never calling 100 // \c getNextToken() after it has encountered the first eof token. 101 assert(!eof()); 102 PreviousToken = Token; 103 Token = PreviousTokenSource->getNextToken(); 104 if (eof()) 105 return getFakeEOF(); 106 return Token; 107 } 108 109 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 110 111 FormatToken *setPosition(unsigned Position) override { 112 PreviousToken = nullptr; 113 Token = PreviousTokenSource->setPosition(Position); 114 return Token; 115 } 116 117 private: 118 bool eof() { 119 return Token && Token->HasUnescapedNewline && 120 !continuesLineComment(*Token, PreviousToken, 121 /*MinColumnToken=*/PreviousToken); 122 } 123 124 FormatToken *getFakeEOF() { 125 static bool EOFInitialized = false; 126 static FormatToken FormatTok; 127 if (!EOFInitialized) { 128 FormatTok.Tok.startToken(); 129 FormatTok.Tok.setKind(tok::eof); 130 EOFInitialized = true; 131 } 132 return &FormatTok; 133 } 134 135 UnwrappedLine &Line; 136 FormatTokenSource *&TokenSource; 137 FormatToken *&ResetToken; 138 unsigned PreviousLineLevel; 139 FormatTokenSource *PreviousTokenSource; 140 141 FormatToken *Token; 142 FormatToken *PreviousToken; 143 }; 144 145 } // end anonymous namespace 146 147 class ScopedLineState { 148 public: 149 ScopedLineState(UnwrappedLineParser &Parser, 150 bool SwitchToPreprocessorLines = false) 151 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 152 if (SwitchToPreprocessorLines) 153 Parser.CurrentLines = &Parser.PreprocessorDirectives; 154 else if (!Parser.Line->Tokens.empty()) 155 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 156 PreBlockLine = std::move(Parser.Line); 157 Parser.Line = llvm::make_unique<UnwrappedLine>(); 158 Parser.Line->Level = PreBlockLine->Level; 159 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 160 } 161 162 ~ScopedLineState() { 163 if (!Parser.Line->Tokens.empty()) { 164 Parser.addUnwrappedLine(); 165 } 166 assert(Parser.Line->Tokens.empty()); 167 Parser.Line = std::move(PreBlockLine); 168 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 169 Parser.MustBreakBeforeNextToken = true; 170 Parser.CurrentLines = OriginalLines; 171 } 172 173 private: 174 UnwrappedLineParser &Parser; 175 176 std::unique_ptr<UnwrappedLine> PreBlockLine; 177 SmallVectorImpl<UnwrappedLine> *OriginalLines; 178 }; 179 180 class CompoundStatementIndenter { 181 public: 182 CompoundStatementIndenter(UnwrappedLineParser *Parser, 183 const FormatStyle &Style, unsigned &LineLevel) 184 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 185 if (Style.BraceWrapping.AfterControlStatement) 186 Parser->addUnwrappedLine(); 187 if (Style.BraceWrapping.IndentBraces) 188 ++LineLevel; 189 } 190 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 191 192 private: 193 unsigned &LineLevel; 194 unsigned OldLineLevel; 195 }; 196 197 namespace { 198 199 class IndexedTokenSource : public FormatTokenSource { 200 public: 201 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 202 : Tokens(Tokens), Position(-1) {} 203 204 FormatToken *getNextToken() override { 205 ++Position; 206 return Tokens[Position]; 207 } 208 209 unsigned getPosition() override { 210 assert(Position >= 0); 211 return Position; 212 } 213 214 FormatToken *setPosition(unsigned P) override { 215 Position = P; 216 return Tokens[Position]; 217 } 218 219 void reset() { Position = -1; } 220 221 private: 222 ArrayRef<FormatToken *> Tokens; 223 int Position; 224 }; 225 226 } // end anonymous namespace 227 228 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 229 const AdditionalKeywords &Keywords, 230 unsigned FirstStartColumn, 231 ArrayRef<FormatToken *> Tokens, 232 UnwrappedLineConsumer &Callback) 233 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 234 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 235 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 236 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 237 IfNdefCondition(nullptr), FoundIncludeGuardStart(false), 238 IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {} 239 240 void UnwrappedLineParser::reset() { 241 PPBranchLevel = -1; 242 IfNdefCondition = nullptr; 243 FoundIncludeGuardStart = false; 244 IncludeGuardRejected = false; 245 Line.reset(new UnwrappedLine); 246 CommentsBeforeNextToken.clear(); 247 FormatTok = nullptr; 248 MustBreakBeforeNextToken = false; 249 PreprocessorDirectives.clear(); 250 CurrentLines = &Lines; 251 DeclarationScopeStack.clear(); 252 PPStack.clear(); 253 Line->FirstStartColumn = FirstStartColumn; 254 } 255 256 void UnwrappedLineParser::parse() { 257 IndexedTokenSource TokenSource(AllTokens); 258 Line->FirstStartColumn = FirstStartColumn; 259 do { 260 DEBUG(llvm::dbgs() << "----\n"); 261 reset(); 262 Tokens = &TokenSource; 263 TokenSource.reset(); 264 265 readToken(); 266 parseFile(); 267 // Create line with eof token. 268 pushToken(FormatTok); 269 addUnwrappedLine(); 270 271 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 272 E = Lines.end(); 273 I != E; ++I) { 274 Callback.consumeUnwrappedLine(*I); 275 } 276 Callback.finishRun(); 277 Lines.clear(); 278 while (!PPLevelBranchIndex.empty() && 279 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 280 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 281 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 282 } 283 if (!PPLevelBranchIndex.empty()) { 284 ++PPLevelBranchIndex.back(); 285 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 286 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 287 } 288 } while (!PPLevelBranchIndex.empty()); 289 } 290 291 void UnwrappedLineParser::parseFile() { 292 // The top-level context in a file always has declarations, except for pre- 293 // processor directives and JavaScript files. 294 bool MustBeDeclaration = 295 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 296 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 297 MustBeDeclaration); 298 if (Style.Language == FormatStyle::LK_TextProto) 299 parseBracedList(); 300 else 301 parseLevel(/*HasOpeningBrace=*/false); 302 // Make sure to format the remaining tokens. 303 flushComments(true); 304 addUnwrappedLine(); 305 } 306 307 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 308 bool SwitchLabelEncountered = false; 309 do { 310 tok::TokenKind kind = FormatTok->Tok.getKind(); 311 if (FormatTok->Type == TT_MacroBlockBegin) { 312 kind = tok::l_brace; 313 } else if (FormatTok->Type == TT_MacroBlockEnd) { 314 kind = tok::r_brace; 315 } 316 317 switch (kind) { 318 case tok::comment: 319 nextToken(); 320 addUnwrappedLine(); 321 break; 322 case tok::l_brace: 323 // FIXME: Add parameter whether this can happen - if this happens, we must 324 // be in a non-declaration context. 325 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 326 continue; 327 parseBlock(/*MustBeDeclaration=*/false); 328 addUnwrappedLine(); 329 break; 330 case tok::r_brace: 331 if (HasOpeningBrace) 332 return; 333 nextToken(); 334 addUnwrappedLine(); 335 break; 336 case tok::kw_default: { 337 unsigned StoredPosition = Tokens->getPosition(); 338 FormatToken *Next = Tokens->getNextToken(); 339 FormatTok = Tokens->setPosition(StoredPosition); 340 if (Next && Next->isNot(tok::colon)) { 341 // default not followed by ':' is not a case label; treat it like 342 // an identifier. 343 parseStructuralElement(); 344 break; 345 } 346 // Else, if it is 'default:', fall through to the case handling. 347 LLVM_FALLTHROUGH; 348 } 349 case tok::kw_case: 350 if (Style.Language == FormatStyle::LK_JavaScript && 351 Line->MustBeDeclaration) { 352 // A 'case: string' style field declaration. 353 parseStructuralElement(); 354 break; 355 } 356 if (!SwitchLabelEncountered && 357 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 358 ++Line->Level; 359 SwitchLabelEncountered = true; 360 parseStructuralElement(); 361 break; 362 default: 363 parseStructuralElement(); 364 break; 365 } 366 } while (!eof()); 367 } 368 369 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 370 // We'll parse forward through the tokens until we hit 371 // a closing brace or eof - note that getNextToken() will 372 // parse macros, so this will magically work inside macro 373 // definitions, too. 374 unsigned StoredPosition = Tokens->getPosition(); 375 FormatToken *Tok = FormatTok; 376 const FormatToken *PrevTok = Tok->Previous; 377 // Keep a stack of positions of lbrace tokens. We will 378 // update information about whether an lbrace starts a 379 // braced init list or a different block during the loop. 380 SmallVector<FormatToken *, 8> LBraceStack; 381 assert(Tok->Tok.is(tok::l_brace)); 382 do { 383 // Get next non-comment token. 384 FormatToken *NextTok; 385 unsigned ReadTokens = 0; 386 do { 387 NextTok = Tokens->getNextToken(); 388 ++ReadTokens; 389 } while (NextTok->is(tok::comment)); 390 391 switch (Tok->Tok.getKind()) { 392 case tok::l_brace: 393 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 394 if (PrevTok->isOneOf(tok::colon, tok::less)) 395 // A ':' indicates this code is in a type, or a braced list 396 // following a label in an object literal ({a: {b: 1}}). 397 // A '<' could be an object used in a comparison, but that is nonsense 398 // code (can never return true), so more likely it is a generic type 399 // argument (`X<{a: string; b: number}>`). 400 // The code below could be confused by semicolons between the 401 // individual members in a type member list, which would normally 402 // trigger BK_Block. In both cases, this must be parsed as an inline 403 // braced init. 404 Tok->BlockKind = BK_BracedInit; 405 else if (PrevTok->is(tok::r_paren)) 406 // `) { }` can only occur in function or method declarations in JS. 407 Tok->BlockKind = BK_Block; 408 } else { 409 Tok->BlockKind = BK_Unknown; 410 } 411 LBraceStack.push_back(Tok); 412 break; 413 case tok::r_brace: 414 if (LBraceStack.empty()) 415 break; 416 if (LBraceStack.back()->BlockKind == BK_Unknown) { 417 bool ProbablyBracedList = false; 418 if (Style.Language == FormatStyle::LK_Proto) { 419 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 420 } else { 421 // Using OriginalColumn to distinguish between ObjC methods and 422 // binary operators is a bit hacky. 423 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 424 NextTok->OriginalColumn == 0; 425 426 // If there is a comma, semicolon or right paren after the closing 427 // brace, we assume this is a braced initializer list. Note that 428 // regardless how we mark inner braces here, we will overwrite the 429 // BlockKind later if we parse a braced list (where all blocks 430 // inside are by default braced lists), or when we explicitly detect 431 // blocks (for example while parsing lambdas). 432 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 433 // braced list in JS. 434 ProbablyBracedList = 435 (Style.Language == FormatStyle::LK_JavaScript && 436 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 437 Keywords.kw_as)) || 438 (Style.isCpp() && NextTok->is(tok::l_paren)) || 439 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 440 tok::r_paren, tok::r_square, tok::l_brace, 441 tok::l_square, tok::ellipsis) || 442 (NextTok->is(tok::identifier) && 443 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 444 (NextTok->is(tok::semi) && 445 (!ExpectClassBody || LBraceStack.size() != 1)) || 446 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 447 } 448 if (ProbablyBracedList) { 449 Tok->BlockKind = BK_BracedInit; 450 LBraceStack.back()->BlockKind = BK_BracedInit; 451 } else { 452 Tok->BlockKind = BK_Block; 453 LBraceStack.back()->BlockKind = BK_Block; 454 } 455 } 456 LBraceStack.pop_back(); 457 break; 458 case tok::at: 459 case tok::semi: 460 case tok::kw_if: 461 case tok::kw_while: 462 case tok::kw_for: 463 case tok::kw_switch: 464 case tok::kw_try: 465 case tok::kw___try: 466 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 467 LBraceStack.back()->BlockKind = BK_Block; 468 break; 469 default: 470 break; 471 } 472 PrevTok = Tok; 473 Tok = NextTok; 474 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 475 476 // Assume other blocks for all unclosed opening braces. 477 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 478 if (LBraceStack[i]->BlockKind == BK_Unknown) 479 LBraceStack[i]->BlockKind = BK_Block; 480 } 481 482 FormatTok = Tokens->setPosition(StoredPosition); 483 } 484 485 template <class T> 486 static inline void hash_combine(std::size_t &seed, const T &v) { 487 std::hash<T> hasher; 488 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 489 } 490 491 size_t UnwrappedLineParser::computePPHash() const { 492 size_t h = 0; 493 for (const auto &i : PPStack) { 494 hash_combine(h, size_t(i.Kind)); 495 hash_combine(h, i.Line); 496 } 497 return h; 498 } 499 500 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 501 bool MunchSemi) { 502 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 503 "'{' or macro block token expected"); 504 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 505 FormatTok->BlockKind = BK_Block; 506 507 size_t PPStartHash = computePPHash(); 508 509 unsigned InitialLevel = Line->Level; 510 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 511 512 if (MacroBlock && FormatTok->is(tok::l_paren)) 513 parseParens(); 514 515 size_t NbPreprocessorDirectives = 516 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 517 addUnwrappedLine(); 518 size_t OpeningLineIndex = 519 CurrentLines->empty() 520 ? (UnwrappedLine::kInvalidIndex) 521 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 522 523 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 524 MustBeDeclaration); 525 if (AddLevel) 526 ++Line->Level; 527 parseLevel(/*HasOpeningBrace=*/true); 528 529 if (eof()) 530 return; 531 532 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 533 : !FormatTok->is(tok::r_brace)) { 534 Line->Level = InitialLevel; 535 FormatTok->BlockKind = BK_Block; 536 return; 537 } 538 539 size_t PPEndHash = computePPHash(); 540 541 // Munch the closing brace. 542 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 543 544 if (MacroBlock && FormatTok->is(tok::l_paren)) 545 parseParens(); 546 547 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 548 nextToken(); 549 Line->Level = InitialLevel; 550 551 if (PPStartHash == PPEndHash) { 552 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 553 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 554 // Update the opening line to add the forward reference as well 555 (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = 556 CurrentLines->size() - 1; 557 } 558 } 559 } 560 561 static bool isGoogScope(const UnwrappedLine &Line) { 562 // FIXME: Closure-library specific stuff should not be hard-coded but be 563 // configurable. 564 if (Line.Tokens.size() < 4) 565 return false; 566 auto I = Line.Tokens.begin(); 567 if (I->Tok->TokenText != "goog") 568 return false; 569 ++I; 570 if (I->Tok->isNot(tok::period)) 571 return false; 572 ++I; 573 if (I->Tok->TokenText != "scope") 574 return false; 575 ++I; 576 return I->Tok->is(tok::l_paren); 577 } 578 579 static bool isIIFE(const UnwrappedLine &Line, 580 const AdditionalKeywords &Keywords) { 581 // Look for the start of an immediately invoked anonymous function. 582 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 583 // This is commonly done in JavaScript to create a new, anonymous scope. 584 // Example: (function() { ... })() 585 if (Line.Tokens.size() < 3) 586 return false; 587 auto I = Line.Tokens.begin(); 588 if (I->Tok->isNot(tok::l_paren)) 589 return false; 590 ++I; 591 if (I->Tok->isNot(Keywords.kw_function)) 592 return false; 593 ++I; 594 return I->Tok->is(tok::l_paren); 595 } 596 597 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 598 const FormatToken &InitialToken) { 599 if (InitialToken.is(tok::kw_namespace)) 600 return Style.BraceWrapping.AfterNamespace; 601 if (InitialToken.is(tok::kw_class)) 602 return Style.BraceWrapping.AfterClass; 603 if (InitialToken.is(tok::kw_union)) 604 return Style.BraceWrapping.AfterUnion; 605 if (InitialToken.is(tok::kw_struct)) 606 return Style.BraceWrapping.AfterStruct; 607 return false; 608 } 609 610 void UnwrappedLineParser::parseChildBlock() { 611 FormatTok->BlockKind = BK_Block; 612 nextToken(); 613 { 614 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 615 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 616 ScopedLineState LineState(*this); 617 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 618 /*MustBeDeclaration=*/false); 619 Line->Level += SkipIndent ? 0 : 1; 620 parseLevel(/*HasOpeningBrace=*/true); 621 flushComments(isOnNewLine(*FormatTok)); 622 Line->Level -= SkipIndent ? 0 : 1; 623 } 624 nextToken(); 625 } 626 627 void UnwrappedLineParser::parsePPDirective() { 628 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 629 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 630 nextToken(); 631 632 if (!FormatTok->Tok.getIdentifierInfo()) { 633 parsePPUnknown(); 634 return; 635 } 636 637 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 638 case tok::pp_define: 639 parsePPDefine(); 640 return; 641 case tok::pp_if: 642 parsePPIf(/*IfDef=*/false); 643 break; 644 case tok::pp_ifdef: 645 case tok::pp_ifndef: 646 parsePPIf(/*IfDef=*/true); 647 break; 648 case tok::pp_else: 649 parsePPElse(); 650 break; 651 case tok::pp_elif: 652 parsePPElIf(); 653 break; 654 case tok::pp_endif: 655 parsePPEndIf(); 656 break; 657 default: 658 parsePPUnknown(); 659 break; 660 } 661 } 662 663 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 664 size_t Line = CurrentLines->size(); 665 if (CurrentLines == &PreprocessorDirectives) 666 Line += Lines.size(); 667 668 if (Unreachable || 669 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 670 PPStack.push_back({PP_Unreachable, Line}); 671 else 672 PPStack.push_back({PP_Conditional, Line}); 673 } 674 675 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 676 ++PPBranchLevel; 677 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 678 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 679 PPLevelBranchIndex.push_back(0); 680 PPLevelBranchCount.push_back(0); 681 } 682 PPChainBranchIndex.push(0); 683 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 684 conditionalCompilationCondition(Unreachable || Skip); 685 } 686 687 void UnwrappedLineParser::conditionalCompilationAlternative() { 688 if (!PPStack.empty()) 689 PPStack.pop_back(); 690 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 691 if (!PPChainBranchIndex.empty()) 692 ++PPChainBranchIndex.top(); 693 conditionalCompilationCondition( 694 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 695 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 696 } 697 698 void UnwrappedLineParser::conditionalCompilationEnd() { 699 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 700 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 701 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 702 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 703 } 704 } 705 // Guard against #endif's without #if. 706 if (PPBranchLevel > -1) 707 --PPBranchLevel; 708 if (!PPChainBranchIndex.empty()) 709 PPChainBranchIndex.pop(); 710 if (!PPStack.empty()) 711 PPStack.pop_back(); 712 } 713 714 void UnwrappedLineParser::parsePPIf(bool IfDef) { 715 bool IfNDef = FormatTok->is(tok::pp_ifndef); 716 nextToken(); 717 bool Unreachable = false; 718 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 719 Unreachable = true; 720 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 721 Unreachable = true; 722 conditionalCompilationStart(Unreachable); 723 FormatToken *IfCondition = FormatTok; 724 // If there's a #ifndef on the first line, and the only lines before it are 725 // comments, it could be an include guard. 726 bool MaybeIncludeGuard = IfNDef; 727 if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) { 728 for (auto &Line : Lines) { 729 if (!Line.Tokens.front().Tok->is(tok::comment)) { 730 MaybeIncludeGuard = false; 731 IncludeGuardRejected = true; 732 break; 733 } 734 } 735 } 736 --PPBranchLevel; 737 parsePPUnknown(); 738 ++PPBranchLevel; 739 if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) 740 IfNdefCondition = IfCondition; 741 } 742 743 void UnwrappedLineParser::parsePPElse() { 744 // If a potential include guard has an #else, it's not an include guard. 745 if (FoundIncludeGuardStart && PPBranchLevel == 0) 746 FoundIncludeGuardStart = false; 747 conditionalCompilationAlternative(); 748 if (PPBranchLevel > -1) 749 --PPBranchLevel; 750 parsePPUnknown(); 751 ++PPBranchLevel; 752 } 753 754 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 755 756 void UnwrappedLineParser::parsePPEndIf() { 757 conditionalCompilationEnd(); 758 parsePPUnknown(); 759 // If the #endif of a potential include guard is the last thing in the file, 760 // then we count it as a real include guard and subtract one from every 761 // preprocessor indent. 762 unsigned TokenPosition = Tokens->getPosition(); 763 FormatToken *PeekNext = AllTokens[TokenPosition]; 764 if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) && 765 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 766 for (auto &Line : Lines) 767 if (Line.InPPDirective && Line.Level > 0) 768 --Line.Level; 769 } 770 771 void UnwrappedLineParser::parsePPDefine() { 772 nextToken(); 773 774 if (FormatTok->Tok.getKind() != tok::identifier) { 775 parsePPUnknown(); 776 return; 777 } 778 if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) { 779 FoundIncludeGuardStart = true; 780 for (auto &Line : Lines) { 781 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 782 FoundIncludeGuardStart = false; 783 break; 784 } 785 } 786 } 787 IfNdefCondition = nullptr; 788 nextToken(); 789 if (FormatTok->Tok.getKind() == tok::l_paren && 790 FormatTok->WhitespaceRange.getBegin() == 791 FormatTok->WhitespaceRange.getEnd()) { 792 parseParens(); 793 } 794 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 795 Line->Level += PPBranchLevel + 1; 796 addUnwrappedLine(); 797 ++Line->Level; 798 799 // Errors during a preprocessor directive can only affect the layout of the 800 // preprocessor directive, and thus we ignore them. An alternative approach 801 // would be to use the same approach we use on the file level (no 802 // re-indentation if there was a structural error) within the macro 803 // definition. 804 parseFile(); 805 } 806 807 void UnwrappedLineParser::parsePPUnknown() { 808 do { 809 nextToken(); 810 } while (!eof()); 811 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 812 Line->Level += PPBranchLevel + 1; 813 addUnwrappedLine(); 814 IfNdefCondition = nullptr; 815 } 816 817 // Here we blacklist certain tokens that are not usually the first token in an 818 // unwrapped line. This is used in attempt to distinguish macro calls without 819 // trailing semicolons from other constructs split to several lines. 820 static bool tokenCanStartNewLine(const clang::Token &Tok) { 821 // Semicolon can be a null-statement, l_square can be a start of a macro or 822 // a C++11 attribute, but this doesn't seem to be common. 823 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 824 Tok.isNot(tok::l_square) && 825 // Tokens that can only be used as binary operators and a part of 826 // overloaded operator names. 827 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 828 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 829 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 830 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 831 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 832 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 833 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 834 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 835 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 836 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 837 Tok.isNot(tok::lesslessequal) && 838 // Colon is used in labels, base class lists, initializer lists, 839 // range-based for loops, ternary operator, but should never be the 840 // first token in an unwrapped line. 841 Tok.isNot(tok::colon) && 842 // 'noexcept' is a trailing annotation. 843 Tok.isNot(tok::kw_noexcept); 844 } 845 846 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 847 const FormatToken *FormatTok) { 848 // FIXME: This returns true for C/C++ keywords like 'struct'. 849 return FormatTok->is(tok::identifier) && 850 (FormatTok->Tok.getIdentifierInfo() == nullptr || 851 !FormatTok->isOneOf( 852 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 853 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 854 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 855 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 856 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 857 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 858 Keywords.kw_from)); 859 } 860 861 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 862 const FormatToken *FormatTok) { 863 return FormatTok->Tok.isLiteral() || 864 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 865 mustBeJSIdent(Keywords, FormatTok); 866 } 867 868 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 869 // when encountered after a value (see mustBeJSIdentOrValue). 870 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 871 const FormatToken *FormatTok) { 872 return FormatTok->isOneOf( 873 tok::kw_return, Keywords.kw_yield, 874 // conditionals 875 tok::kw_if, tok::kw_else, 876 // loops 877 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 878 // switch/case 879 tok::kw_switch, tok::kw_case, 880 // exceptions 881 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 882 // declaration 883 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 884 Keywords.kw_async, Keywords.kw_function, 885 // import/export 886 Keywords.kw_import, tok::kw_export); 887 } 888 889 // readTokenWithJavaScriptASI reads the next token and terminates the current 890 // line if JavaScript Automatic Semicolon Insertion must 891 // happen between the current token and the next token. 892 // 893 // This method is conservative - it cannot cover all edge cases of JavaScript, 894 // but only aims to correctly handle certain well known cases. It *must not* 895 // return true in speculative cases. 896 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 897 FormatToken *Previous = FormatTok; 898 readToken(); 899 FormatToken *Next = FormatTok; 900 901 bool IsOnSameLine = 902 CommentsBeforeNextToken.empty() 903 ? Next->NewlinesBefore == 0 904 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 905 if (IsOnSameLine) 906 return; 907 908 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 909 bool PreviousStartsTemplateExpr = 910 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 911 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 912 // If the line contains an '@' sign, the previous token might be an 913 // annotation, which can precede another identifier/value. 914 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 915 [](UnwrappedLineNode &LineNode) { 916 return LineNode.Tok->is(tok::at); 917 }) != Line->Tokens.end(); 918 if (HasAt) 919 return; 920 } 921 if (Next->is(tok::exclaim) && PreviousMustBeValue) 922 return addUnwrappedLine(); 923 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 924 bool NextEndsTemplateExpr = 925 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 926 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 927 (PreviousMustBeValue || 928 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 929 tok::minusminus))) 930 return addUnwrappedLine(); 931 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 932 isJSDeclOrStmt(Keywords, Next)) 933 return addUnwrappedLine(); 934 } 935 936 void UnwrappedLineParser::parseStructuralElement() { 937 assert(!FormatTok->is(tok::l_brace)); 938 if (Style.Language == FormatStyle::LK_TableGen && 939 FormatTok->is(tok::pp_include)) { 940 nextToken(); 941 if (FormatTok->is(tok::string_literal)) 942 nextToken(); 943 addUnwrappedLine(); 944 return; 945 } 946 switch (FormatTok->Tok.getKind()) { 947 case tok::kw_asm: 948 nextToken(); 949 if (FormatTok->is(tok::l_brace)) { 950 FormatTok->Type = TT_InlineASMBrace; 951 nextToken(); 952 while (FormatTok && FormatTok->isNot(tok::eof)) { 953 if (FormatTok->is(tok::r_brace)) { 954 FormatTok->Type = TT_InlineASMBrace; 955 nextToken(); 956 addUnwrappedLine(); 957 break; 958 } 959 FormatTok->Finalized = true; 960 nextToken(); 961 } 962 } 963 break; 964 case tok::kw_namespace: 965 parseNamespace(); 966 return; 967 case tok::kw_inline: 968 nextToken(); 969 if (FormatTok->Tok.is(tok::kw_namespace)) { 970 parseNamespace(); 971 return; 972 } 973 break; 974 case tok::kw_public: 975 case tok::kw_protected: 976 case tok::kw_private: 977 if (Style.Language == FormatStyle::LK_Java || 978 Style.Language == FormatStyle::LK_JavaScript) 979 nextToken(); 980 else 981 parseAccessSpecifier(); 982 return; 983 case tok::kw_if: 984 parseIfThenElse(); 985 return; 986 case tok::kw_for: 987 case tok::kw_while: 988 parseForOrWhileLoop(); 989 return; 990 case tok::kw_do: 991 parseDoWhile(); 992 return; 993 case tok::kw_switch: 994 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 995 // 'switch: string' field declaration. 996 break; 997 parseSwitch(); 998 return; 999 case tok::kw_default: 1000 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1001 // 'default: string' field declaration. 1002 break; 1003 nextToken(); 1004 if (FormatTok->is(tok::colon)) { 1005 parseLabel(); 1006 return; 1007 } 1008 // e.g. "default void f() {}" in a Java interface. 1009 break; 1010 case tok::kw_case: 1011 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1012 // 'case: string' field declaration. 1013 break; 1014 parseCaseLabel(); 1015 return; 1016 case tok::kw_try: 1017 case tok::kw___try: 1018 parseTryCatch(); 1019 return; 1020 case tok::kw_extern: 1021 nextToken(); 1022 if (FormatTok->Tok.is(tok::string_literal)) { 1023 nextToken(); 1024 if (FormatTok->Tok.is(tok::l_brace)) { 1025 if (Style.BraceWrapping.AfterExternBlock) { 1026 addUnwrappedLine(); 1027 parseBlock(/*MustBeDeclaration=*/true); 1028 } else { 1029 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1030 } 1031 addUnwrappedLine(); 1032 return; 1033 } 1034 } 1035 break; 1036 case tok::kw_export: 1037 if (Style.Language == FormatStyle::LK_JavaScript) { 1038 parseJavaScriptEs6ImportExport(); 1039 return; 1040 } 1041 break; 1042 case tok::identifier: 1043 if (FormatTok->is(TT_ForEachMacro)) { 1044 parseForOrWhileLoop(); 1045 return; 1046 } 1047 if (FormatTok->is(TT_MacroBlockBegin)) { 1048 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1049 /*MunchSemi=*/false); 1050 return; 1051 } 1052 if (FormatTok->is(Keywords.kw_import)) { 1053 if (Style.Language == FormatStyle::LK_JavaScript) { 1054 parseJavaScriptEs6ImportExport(); 1055 return; 1056 } 1057 if (Style.Language == FormatStyle::LK_Proto) { 1058 nextToken(); 1059 if (FormatTok->is(tok::kw_public)) 1060 nextToken(); 1061 if (!FormatTok->is(tok::string_literal)) 1062 return; 1063 nextToken(); 1064 if (FormatTok->is(tok::semi)) 1065 nextToken(); 1066 addUnwrappedLine(); 1067 return; 1068 } 1069 } 1070 if (Style.isCpp() && 1071 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1072 Keywords.kw_slots, Keywords.kw_qslots)) { 1073 nextToken(); 1074 if (FormatTok->is(tok::colon)) { 1075 nextToken(); 1076 addUnwrappedLine(); 1077 return; 1078 } 1079 } 1080 // In all other cases, parse the declaration. 1081 break; 1082 default: 1083 break; 1084 } 1085 do { 1086 const FormatToken *Previous = FormatTok->Previous; 1087 switch (FormatTok->Tok.getKind()) { 1088 case tok::at: 1089 nextToken(); 1090 if (FormatTok->Tok.is(tok::l_brace)) { 1091 nextToken(); 1092 parseBracedList(); 1093 break; 1094 } 1095 switch (FormatTok->Tok.getObjCKeywordID()) { 1096 case tok::objc_public: 1097 case tok::objc_protected: 1098 case tok::objc_package: 1099 case tok::objc_private: 1100 return parseAccessSpecifier(); 1101 case tok::objc_interface: 1102 case tok::objc_implementation: 1103 return parseObjCInterfaceOrImplementation(); 1104 case tok::objc_protocol: 1105 if (parseObjCProtocol()) 1106 return; 1107 break; 1108 case tok::objc_end: 1109 return; // Handled by the caller. 1110 case tok::objc_optional: 1111 case tok::objc_required: 1112 nextToken(); 1113 addUnwrappedLine(); 1114 return; 1115 case tok::objc_autoreleasepool: 1116 nextToken(); 1117 if (FormatTok->Tok.is(tok::l_brace)) { 1118 if (Style.BraceWrapping.AfterObjCDeclaration) 1119 addUnwrappedLine(); 1120 parseBlock(/*MustBeDeclaration=*/false); 1121 } 1122 addUnwrappedLine(); 1123 return; 1124 case tok::objc_try: 1125 // This branch isn't strictly necessary (the kw_try case below would 1126 // do this too after the tok::at is parsed above). But be explicit. 1127 parseTryCatch(); 1128 return; 1129 default: 1130 break; 1131 } 1132 break; 1133 case tok::kw_enum: 1134 // Ignore if this is part of "template <enum ...". 1135 if (Previous && Previous->is(tok::less)) { 1136 nextToken(); 1137 break; 1138 } 1139 1140 // parseEnum falls through and does not yet add an unwrapped line as an 1141 // enum definition can start a structural element. 1142 if (!parseEnum()) 1143 break; 1144 // This only applies for C++. 1145 if (!Style.isCpp()) { 1146 addUnwrappedLine(); 1147 return; 1148 } 1149 break; 1150 case tok::kw_typedef: 1151 nextToken(); 1152 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1153 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1154 parseEnum(); 1155 break; 1156 case tok::kw_struct: 1157 case tok::kw_union: 1158 case tok::kw_class: 1159 // parseRecord falls through and does not yet add an unwrapped line as a 1160 // record declaration or definition can start a structural element. 1161 parseRecord(); 1162 // This does not apply for Java and JavaScript. 1163 if (Style.Language == FormatStyle::LK_Java || 1164 Style.Language == FormatStyle::LK_JavaScript) { 1165 if (FormatTok->is(tok::semi)) 1166 nextToken(); 1167 addUnwrappedLine(); 1168 return; 1169 } 1170 break; 1171 case tok::period: 1172 nextToken(); 1173 // In Java, classes have an implicit static member "class". 1174 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1175 FormatTok->is(tok::kw_class)) 1176 nextToken(); 1177 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1178 FormatTok->Tok.getIdentifierInfo()) 1179 // JavaScript only has pseudo keywords, all keywords are allowed to 1180 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1181 nextToken(); 1182 break; 1183 case tok::semi: 1184 nextToken(); 1185 addUnwrappedLine(); 1186 return; 1187 case tok::r_brace: 1188 addUnwrappedLine(); 1189 return; 1190 case tok::l_paren: 1191 parseParens(); 1192 break; 1193 case tok::kw_operator: 1194 nextToken(); 1195 if (FormatTok->isBinaryOperator()) 1196 nextToken(); 1197 break; 1198 case tok::caret: 1199 nextToken(); 1200 if (FormatTok->Tok.isAnyIdentifier() || 1201 FormatTok->isSimpleTypeSpecifier()) 1202 nextToken(); 1203 if (FormatTok->is(tok::l_paren)) 1204 parseParens(); 1205 if (FormatTok->is(tok::l_brace)) 1206 parseChildBlock(); 1207 break; 1208 case tok::l_brace: 1209 if (!tryToParseBracedList()) { 1210 // A block outside of parentheses must be the last part of a 1211 // structural element. 1212 // FIXME: Figure out cases where this is not true, and add projections 1213 // for them (the one we know is missing are lambdas). 1214 if (Style.BraceWrapping.AfterFunction) 1215 addUnwrappedLine(); 1216 FormatTok->Type = TT_FunctionLBrace; 1217 parseBlock(/*MustBeDeclaration=*/false); 1218 addUnwrappedLine(); 1219 return; 1220 } 1221 // Otherwise this was a braced init list, and the structural 1222 // element continues. 1223 break; 1224 case tok::kw_try: 1225 // We arrive here when parsing function-try blocks. 1226 parseTryCatch(); 1227 return; 1228 case tok::identifier: { 1229 if (FormatTok->is(TT_MacroBlockEnd)) { 1230 addUnwrappedLine(); 1231 return; 1232 } 1233 1234 // Function declarations (as opposed to function expressions) are parsed 1235 // on their own unwrapped line by continuing this loop. Function 1236 // expressions (functions that are not on their own line) must not create 1237 // a new unwrapped line, so they are special cased below. 1238 size_t TokenCount = Line->Tokens.size(); 1239 if (Style.Language == FormatStyle::LK_JavaScript && 1240 FormatTok->is(Keywords.kw_function) && 1241 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1242 Keywords.kw_async)))) { 1243 tryToParseJSFunction(); 1244 break; 1245 } 1246 if ((Style.Language == FormatStyle::LK_JavaScript || 1247 Style.Language == FormatStyle::LK_Java) && 1248 FormatTok->is(Keywords.kw_interface)) { 1249 if (Style.Language == FormatStyle::LK_JavaScript) { 1250 // In JavaScript/TypeScript, "interface" can be used as a standalone 1251 // identifier, e.g. in `var interface = 1;`. If "interface" is 1252 // followed by another identifier, it is very like to be an actual 1253 // interface declaration. 1254 unsigned StoredPosition = Tokens->getPosition(); 1255 FormatToken *Next = Tokens->getNextToken(); 1256 FormatTok = Tokens->setPosition(StoredPosition); 1257 if (Next && !mustBeJSIdent(Keywords, Next)) { 1258 nextToken(); 1259 break; 1260 } 1261 } 1262 parseRecord(); 1263 addUnwrappedLine(); 1264 return; 1265 } 1266 1267 // See if the following token should start a new unwrapped line. 1268 StringRef Text = FormatTok->TokenText; 1269 nextToken(); 1270 if (Line->Tokens.size() == 1 && 1271 // JS doesn't have macros, and within classes colons indicate fields, 1272 // not labels. 1273 Style.Language != FormatStyle::LK_JavaScript) { 1274 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1275 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1276 parseLabel(); 1277 return; 1278 } 1279 // Recognize function-like macro usages without trailing semicolon as 1280 // well as free-standing macros like Q_OBJECT. 1281 bool FunctionLike = FormatTok->is(tok::l_paren); 1282 if (FunctionLike) 1283 parseParens(); 1284 1285 bool FollowedByNewline = 1286 CommentsBeforeNextToken.empty() 1287 ? FormatTok->NewlinesBefore > 0 1288 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1289 1290 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1291 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1292 addUnwrappedLine(); 1293 return; 1294 } 1295 } 1296 break; 1297 } 1298 case tok::equal: 1299 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1300 // TT_JsFatArrow. The always start an expression or a child block if 1301 // followed by a curly. 1302 if (FormatTok->is(TT_JsFatArrow)) { 1303 nextToken(); 1304 if (FormatTok->is(tok::l_brace)) 1305 parseChildBlock(); 1306 break; 1307 } 1308 1309 nextToken(); 1310 if (FormatTok->Tok.is(tok::l_brace)) { 1311 nextToken(); 1312 parseBracedList(); 1313 } else if (Style.Language == FormatStyle::LK_Proto && 1314 FormatTok->Tok.is(tok::less)) { 1315 nextToken(); 1316 parseBracedList(/*ContinueOnSemicolons=*/false, 1317 /*ClosingBraceKind=*/tok::greater); 1318 } 1319 break; 1320 case tok::l_square: 1321 parseSquare(); 1322 break; 1323 case tok::kw_new: 1324 parseNew(); 1325 break; 1326 default: 1327 nextToken(); 1328 break; 1329 } 1330 } while (!eof()); 1331 } 1332 1333 bool UnwrappedLineParser::tryToParseLambda() { 1334 if (!Style.isCpp()) { 1335 nextToken(); 1336 return false; 1337 } 1338 assert(FormatTok->is(tok::l_square)); 1339 FormatToken &LSquare = *FormatTok; 1340 if (!tryToParseLambdaIntroducer()) 1341 return false; 1342 1343 while (FormatTok->isNot(tok::l_brace)) { 1344 if (FormatTok->isSimpleTypeSpecifier()) { 1345 nextToken(); 1346 continue; 1347 } 1348 switch (FormatTok->Tok.getKind()) { 1349 case tok::l_brace: 1350 break; 1351 case tok::l_paren: 1352 parseParens(); 1353 break; 1354 case tok::amp: 1355 case tok::star: 1356 case tok::kw_const: 1357 case tok::comma: 1358 case tok::less: 1359 case tok::greater: 1360 case tok::identifier: 1361 case tok::numeric_constant: 1362 case tok::coloncolon: 1363 case tok::kw_mutable: 1364 nextToken(); 1365 break; 1366 case tok::arrow: 1367 FormatTok->Type = TT_LambdaArrow; 1368 nextToken(); 1369 break; 1370 default: 1371 return true; 1372 } 1373 } 1374 LSquare.Type = TT_LambdaLSquare; 1375 parseChildBlock(); 1376 return true; 1377 } 1378 1379 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1380 const FormatToken *Previous = FormatTok->Previous; 1381 if (Previous && 1382 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1383 tok::kw_delete) || 1384 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1385 Previous->isSimpleTypeSpecifier())) { 1386 nextToken(); 1387 return false; 1388 } 1389 nextToken(); 1390 parseSquare(/*LambdaIntroducer=*/true); 1391 return true; 1392 } 1393 1394 void UnwrappedLineParser::tryToParseJSFunction() { 1395 assert(FormatTok->is(Keywords.kw_function) || 1396 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1397 if (FormatTok->is(Keywords.kw_async)) 1398 nextToken(); 1399 // Consume "function". 1400 nextToken(); 1401 1402 // Consume * (generator function). Treat it like C++'s overloaded operators. 1403 if (FormatTok->is(tok::star)) { 1404 FormatTok->Type = TT_OverloadedOperator; 1405 nextToken(); 1406 } 1407 1408 // Consume function name. 1409 if (FormatTok->is(tok::identifier)) 1410 nextToken(); 1411 1412 if (FormatTok->isNot(tok::l_paren)) 1413 return; 1414 1415 // Parse formal parameter list. 1416 parseParens(); 1417 1418 if (FormatTok->is(tok::colon)) { 1419 // Parse a type definition. 1420 nextToken(); 1421 1422 // Eat the type declaration. For braced inline object types, balance braces, 1423 // otherwise just parse until finding an l_brace for the function body. 1424 if (FormatTok->is(tok::l_brace)) 1425 tryToParseBracedList(); 1426 else 1427 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1428 nextToken(); 1429 } 1430 1431 if (FormatTok->is(tok::semi)) 1432 return; 1433 1434 parseChildBlock(); 1435 } 1436 1437 bool UnwrappedLineParser::tryToParseBracedList() { 1438 if (FormatTok->BlockKind == BK_Unknown) 1439 calculateBraceTypes(); 1440 assert(FormatTok->BlockKind != BK_Unknown); 1441 if (FormatTok->BlockKind == BK_Block) 1442 return false; 1443 nextToken(); 1444 parseBracedList(); 1445 return true; 1446 } 1447 1448 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1449 tok::TokenKind ClosingBraceKind) { 1450 bool HasError = false; 1451 1452 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1453 // replace this by using parseAssigmentExpression() inside. 1454 do { 1455 if (Style.Language == FormatStyle::LK_JavaScript) { 1456 if (FormatTok->is(Keywords.kw_function) || 1457 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1458 tryToParseJSFunction(); 1459 continue; 1460 } 1461 if (FormatTok->is(TT_JsFatArrow)) { 1462 nextToken(); 1463 // Fat arrows can be followed by simple expressions or by child blocks 1464 // in curly braces. 1465 if (FormatTok->is(tok::l_brace)) { 1466 parseChildBlock(); 1467 continue; 1468 } 1469 } 1470 if (FormatTok->is(tok::l_brace)) { 1471 // Could be a method inside of a braced list `{a() { return 1; }}`. 1472 if (tryToParseBracedList()) 1473 continue; 1474 parseChildBlock(); 1475 } 1476 } 1477 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1478 nextToken(); 1479 return !HasError; 1480 } 1481 switch (FormatTok->Tok.getKind()) { 1482 case tok::caret: 1483 nextToken(); 1484 if (FormatTok->is(tok::l_brace)) { 1485 parseChildBlock(); 1486 } 1487 break; 1488 case tok::l_square: 1489 tryToParseLambda(); 1490 break; 1491 case tok::l_paren: 1492 parseParens(); 1493 // JavaScript can just have free standing methods and getters/setters in 1494 // object literals. Detect them by a "{" following ")". 1495 if (Style.Language == FormatStyle::LK_JavaScript) { 1496 if (FormatTok->is(tok::l_brace)) 1497 parseChildBlock(); 1498 break; 1499 } 1500 break; 1501 case tok::l_brace: 1502 // Assume there are no blocks inside a braced init list apart 1503 // from the ones we explicitly parse out (like lambdas). 1504 FormatTok->BlockKind = BK_BracedInit; 1505 nextToken(); 1506 parseBracedList(); 1507 break; 1508 case tok::less: 1509 if (Style.Language == FormatStyle::LK_Proto) { 1510 nextToken(); 1511 parseBracedList(/*ContinueOnSemicolons=*/false, 1512 /*ClosingBraceKind=*/tok::greater); 1513 } else { 1514 nextToken(); 1515 } 1516 break; 1517 case tok::semi: 1518 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1519 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1520 // used for error recovery if we have otherwise determined that this is 1521 // a braced list. 1522 if (Style.Language == FormatStyle::LK_JavaScript) { 1523 nextToken(); 1524 break; 1525 } 1526 HasError = true; 1527 if (!ContinueOnSemicolons) 1528 return !HasError; 1529 nextToken(); 1530 break; 1531 case tok::comma: 1532 nextToken(); 1533 break; 1534 default: 1535 nextToken(); 1536 break; 1537 } 1538 } while (!eof()); 1539 return false; 1540 } 1541 1542 void UnwrappedLineParser::parseParens() { 1543 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1544 nextToken(); 1545 do { 1546 switch (FormatTok->Tok.getKind()) { 1547 case tok::l_paren: 1548 parseParens(); 1549 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1550 parseChildBlock(); 1551 break; 1552 case tok::r_paren: 1553 nextToken(); 1554 return; 1555 case tok::r_brace: 1556 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1557 return; 1558 case tok::l_square: 1559 tryToParseLambda(); 1560 break; 1561 case tok::l_brace: 1562 if (!tryToParseBracedList()) 1563 parseChildBlock(); 1564 break; 1565 case tok::at: 1566 nextToken(); 1567 if (FormatTok->Tok.is(tok::l_brace)) { 1568 nextToken(); 1569 parseBracedList(); 1570 } 1571 break; 1572 case tok::kw_class: 1573 if (Style.Language == FormatStyle::LK_JavaScript) 1574 parseRecord(/*ParseAsExpr=*/true); 1575 else 1576 nextToken(); 1577 break; 1578 case tok::identifier: 1579 if (Style.Language == FormatStyle::LK_JavaScript && 1580 (FormatTok->is(Keywords.kw_function) || 1581 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1582 tryToParseJSFunction(); 1583 else 1584 nextToken(); 1585 break; 1586 default: 1587 nextToken(); 1588 break; 1589 } 1590 } while (!eof()); 1591 } 1592 1593 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1594 if (!LambdaIntroducer) { 1595 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1596 if (tryToParseLambda()) 1597 return; 1598 } 1599 do { 1600 switch (FormatTok->Tok.getKind()) { 1601 case tok::l_paren: 1602 parseParens(); 1603 break; 1604 case tok::r_square: 1605 nextToken(); 1606 return; 1607 case tok::r_brace: 1608 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1609 return; 1610 case tok::l_square: 1611 parseSquare(); 1612 break; 1613 case tok::l_brace: { 1614 if (!tryToParseBracedList()) 1615 parseChildBlock(); 1616 break; 1617 } 1618 case tok::at: 1619 nextToken(); 1620 if (FormatTok->Tok.is(tok::l_brace)) { 1621 nextToken(); 1622 parseBracedList(); 1623 } 1624 break; 1625 default: 1626 nextToken(); 1627 break; 1628 } 1629 } while (!eof()); 1630 } 1631 1632 void UnwrappedLineParser::parseIfThenElse() { 1633 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1634 nextToken(); 1635 if (FormatTok->Tok.is(tok::kw_constexpr)) 1636 nextToken(); 1637 if (FormatTok->Tok.is(tok::l_paren)) 1638 parseParens(); 1639 bool NeedsUnwrappedLine = false; 1640 if (FormatTok->Tok.is(tok::l_brace)) { 1641 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1642 parseBlock(/*MustBeDeclaration=*/false); 1643 if (Style.BraceWrapping.BeforeElse) 1644 addUnwrappedLine(); 1645 else 1646 NeedsUnwrappedLine = true; 1647 } else { 1648 addUnwrappedLine(); 1649 ++Line->Level; 1650 parseStructuralElement(); 1651 --Line->Level; 1652 } 1653 if (FormatTok->Tok.is(tok::kw_else)) { 1654 nextToken(); 1655 if (FormatTok->Tok.is(tok::l_brace)) { 1656 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1657 parseBlock(/*MustBeDeclaration=*/false); 1658 addUnwrappedLine(); 1659 } else if (FormatTok->Tok.is(tok::kw_if)) { 1660 parseIfThenElse(); 1661 } else { 1662 addUnwrappedLine(); 1663 ++Line->Level; 1664 parseStructuralElement(); 1665 if (FormatTok->is(tok::eof)) 1666 addUnwrappedLine(); 1667 --Line->Level; 1668 } 1669 } else if (NeedsUnwrappedLine) { 1670 addUnwrappedLine(); 1671 } 1672 } 1673 1674 void UnwrappedLineParser::parseTryCatch() { 1675 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1676 nextToken(); 1677 bool NeedsUnwrappedLine = false; 1678 if (FormatTok->is(tok::colon)) { 1679 // We are in a function try block, what comes is an initializer list. 1680 nextToken(); 1681 while (FormatTok->is(tok::identifier)) { 1682 nextToken(); 1683 if (FormatTok->is(tok::l_paren)) 1684 parseParens(); 1685 if (FormatTok->is(tok::comma)) 1686 nextToken(); 1687 } 1688 } 1689 // Parse try with resource. 1690 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1691 parseParens(); 1692 } 1693 if (FormatTok->is(tok::l_brace)) { 1694 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1695 parseBlock(/*MustBeDeclaration=*/false); 1696 if (Style.BraceWrapping.BeforeCatch) { 1697 addUnwrappedLine(); 1698 } else { 1699 NeedsUnwrappedLine = true; 1700 } 1701 } else if (!FormatTok->is(tok::kw_catch)) { 1702 // The C++ standard requires a compound-statement after a try. 1703 // If there's none, we try to assume there's a structuralElement 1704 // and try to continue. 1705 addUnwrappedLine(); 1706 ++Line->Level; 1707 parseStructuralElement(); 1708 --Line->Level; 1709 } 1710 while (1) { 1711 if (FormatTok->is(tok::at)) 1712 nextToken(); 1713 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1714 tok::kw___finally) || 1715 ((Style.Language == FormatStyle::LK_Java || 1716 Style.Language == FormatStyle::LK_JavaScript) && 1717 FormatTok->is(Keywords.kw_finally)) || 1718 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1719 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1720 break; 1721 nextToken(); 1722 while (FormatTok->isNot(tok::l_brace)) { 1723 if (FormatTok->is(tok::l_paren)) { 1724 parseParens(); 1725 continue; 1726 } 1727 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1728 return; 1729 nextToken(); 1730 } 1731 NeedsUnwrappedLine = false; 1732 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1733 parseBlock(/*MustBeDeclaration=*/false); 1734 if (Style.BraceWrapping.BeforeCatch) 1735 addUnwrappedLine(); 1736 else 1737 NeedsUnwrappedLine = true; 1738 } 1739 if (NeedsUnwrappedLine) 1740 addUnwrappedLine(); 1741 } 1742 1743 void UnwrappedLineParser::parseNamespace() { 1744 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1745 1746 const FormatToken &InitialToken = *FormatTok; 1747 nextToken(); 1748 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1749 nextToken(); 1750 if (FormatTok->Tok.is(tok::l_brace)) { 1751 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1752 addUnwrappedLine(); 1753 1754 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1755 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1756 DeclarationScopeStack.size() > 1); 1757 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1758 // Munch the semicolon after a namespace. This is more common than one would 1759 // think. Puttin the semicolon into its own line is very ugly. 1760 if (FormatTok->Tok.is(tok::semi)) 1761 nextToken(); 1762 addUnwrappedLine(); 1763 } 1764 // FIXME: Add error handling. 1765 } 1766 1767 void UnwrappedLineParser::parseNew() { 1768 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1769 nextToken(); 1770 if (Style.Language != FormatStyle::LK_Java) 1771 return; 1772 1773 // In Java, we can parse everything up to the parens, which aren't optional. 1774 do { 1775 // There should not be a ;, { or } before the new's open paren. 1776 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1777 return; 1778 1779 // Consume the parens. 1780 if (FormatTok->is(tok::l_paren)) { 1781 parseParens(); 1782 1783 // If there is a class body of an anonymous class, consume that as child. 1784 if (FormatTok->is(tok::l_brace)) 1785 parseChildBlock(); 1786 return; 1787 } 1788 nextToken(); 1789 } while (!eof()); 1790 } 1791 1792 void UnwrappedLineParser::parseForOrWhileLoop() { 1793 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1794 "'for', 'while' or foreach macro expected"); 1795 nextToken(); 1796 // JS' for await ( ... 1797 if (Style.Language == FormatStyle::LK_JavaScript && 1798 FormatTok->is(Keywords.kw_await)) 1799 nextToken(); 1800 if (FormatTok->Tok.is(tok::l_paren)) 1801 parseParens(); 1802 if (FormatTok->Tok.is(tok::l_brace)) { 1803 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1804 parseBlock(/*MustBeDeclaration=*/false); 1805 addUnwrappedLine(); 1806 } else { 1807 addUnwrappedLine(); 1808 ++Line->Level; 1809 parseStructuralElement(); 1810 --Line->Level; 1811 } 1812 } 1813 1814 void UnwrappedLineParser::parseDoWhile() { 1815 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1816 nextToken(); 1817 if (FormatTok->Tok.is(tok::l_brace)) { 1818 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1819 parseBlock(/*MustBeDeclaration=*/false); 1820 if (Style.BraceWrapping.IndentBraces) 1821 addUnwrappedLine(); 1822 } else { 1823 addUnwrappedLine(); 1824 ++Line->Level; 1825 parseStructuralElement(); 1826 --Line->Level; 1827 } 1828 1829 // FIXME: Add error handling. 1830 if (!FormatTok->Tok.is(tok::kw_while)) { 1831 addUnwrappedLine(); 1832 return; 1833 } 1834 1835 nextToken(); 1836 parseStructuralElement(); 1837 } 1838 1839 void UnwrappedLineParser::parseLabel() { 1840 nextToken(); 1841 unsigned OldLineLevel = Line->Level; 1842 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1843 --Line->Level; 1844 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1845 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1846 parseBlock(/*MustBeDeclaration=*/false); 1847 if (FormatTok->Tok.is(tok::kw_break)) { 1848 if (Style.BraceWrapping.AfterControlStatement) 1849 addUnwrappedLine(); 1850 parseStructuralElement(); 1851 } 1852 addUnwrappedLine(); 1853 } else { 1854 if (FormatTok->is(tok::semi)) 1855 nextToken(); 1856 addUnwrappedLine(); 1857 } 1858 Line->Level = OldLineLevel; 1859 if (FormatTok->isNot(tok::l_brace)) { 1860 parseStructuralElement(); 1861 addUnwrappedLine(); 1862 } 1863 } 1864 1865 void UnwrappedLineParser::parseCaseLabel() { 1866 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1867 // FIXME: fix handling of complex expressions here. 1868 do { 1869 nextToken(); 1870 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1871 parseLabel(); 1872 } 1873 1874 void UnwrappedLineParser::parseSwitch() { 1875 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1876 nextToken(); 1877 if (FormatTok->Tok.is(tok::l_paren)) 1878 parseParens(); 1879 if (FormatTok->Tok.is(tok::l_brace)) { 1880 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1881 parseBlock(/*MustBeDeclaration=*/false); 1882 addUnwrappedLine(); 1883 } else { 1884 addUnwrappedLine(); 1885 ++Line->Level; 1886 parseStructuralElement(); 1887 --Line->Level; 1888 } 1889 } 1890 1891 void UnwrappedLineParser::parseAccessSpecifier() { 1892 nextToken(); 1893 // Understand Qt's slots. 1894 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1895 nextToken(); 1896 // Otherwise, we don't know what it is, and we'd better keep the next token. 1897 if (FormatTok->Tok.is(tok::colon)) 1898 nextToken(); 1899 addUnwrappedLine(); 1900 } 1901 1902 bool UnwrappedLineParser::parseEnum() { 1903 // Won't be 'enum' for NS_ENUMs. 1904 if (FormatTok->Tok.is(tok::kw_enum)) 1905 nextToken(); 1906 1907 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1908 // declarations. An "enum" keyword followed by a colon would be a syntax 1909 // error and thus assume it is just an identifier. 1910 if (Style.Language == FormatStyle::LK_JavaScript && 1911 FormatTok->isOneOf(tok::colon, tok::question)) 1912 return false; 1913 1914 // Eat up enum class ... 1915 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1916 nextToken(); 1917 1918 while (FormatTok->Tok.getIdentifierInfo() || 1919 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1920 tok::greater, tok::comma, tok::question)) { 1921 nextToken(); 1922 // We can have macros or attributes in between 'enum' and the enum name. 1923 if (FormatTok->is(tok::l_paren)) 1924 parseParens(); 1925 if (FormatTok->is(tok::identifier)) { 1926 nextToken(); 1927 // If there are two identifiers in a row, this is likely an elaborate 1928 // return type. In Java, this can be "implements", etc. 1929 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1930 return false; 1931 } 1932 } 1933 1934 // Just a declaration or something is wrong. 1935 if (FormatTok->isNot(tok::l_brace)) 1936 return true; 1937 FormatTok->BlockKind = BK_Block; 1938 1939 if (Style.Language == FormatStyle::LK_Java) { 1940 // Java enums are different. 1941 parseJavaEnumBody(); 1942 return true; 1943 } 1944 if (Style.Language == FormatStyle::LK_Proto) { 1945 parseBlock(/*MustBeDeclaration=*/true); 1946 return true; 1947 } 1948 1949 // Parse enum body. 1950 nextToken(); 1951 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1952 if (HasError) { 1953 if (FormatTok->is(tok::semi)) 1954 nextToken(); 1955 addUnwrappedLine(); 1956 } 1957 return true; 1958 1959 // There is no addUnwrappedLine() here so that we fall through to parsing a 1960 // structural element afterwards. Thus, in "enum A {} n, m;", 1961 // "} n, m;" will end up in one unwrapped line. 1962 } 1963 1964 void UnwrappedLineParser::parseJavaEnumBody() { 1965 // Determine whether the enum is simple, i.e. does not have a semicolon or 1966 // constants with class bodies. Simple enums can be formatted like braced 1967 // lists, contracted to a single line, etc. 1968 unsigned StoredPosition = Tokens->getPosition(); 1969 bool IsSimple = true; 1970 FormatToken *Tok = Tokens->getNextToken(); 1971 while (Tok) { 1972 if (Tok->is(tok::r_brace)) 1973 break; 1974 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1975 IsSimple = false; 1976 break; 1977 } 1978 // FIXME: This will also mark enums with braces in the arguments to enum 1979 // constants as "not simple". This is probably fine in practice, though. 1980 Tok = Tokens->getNextToken(); 1981 } 1982 FormatTok = Tokens->setPosition(StoredPosition); 1983 1984 if (IsSimple) { 1985 nextToken(); 1986 parseBracedList(); 1987 addUnwrappedLine(); 1988 return; 1989 } 1990 1991 // Parse the body of a more complex enum. 1992 // First add a line for everything up to the "{". 1993 nextToken(); 1994 addUnwrappedLine(); 1995 ++Line->Level; 1996 1997 // Parse the enum constants. 1998 while (FormatTok) { 1999 if (FormatTok->is(tok::l_brace)) { 2000 // Parse the constant's class body. 2001 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2002 /*MunchSemi=*/false); 2003 } else if (FormatTok->is(tok::l_paren)) { 2004 parseParens(); 2005 } else if (FormatTok->is(tok::comma)) { 2006 nextToken(); 2007 addUnwrappedLine(); 2008 } else if (FormatTok->is(tok::semi)) { 2009 nextToken(); 2010 addUnwrappedLine(); 2011 break; 2012 } else if (FormatTok->is(tok::r_brace)) { 2013 addUnwrappedLine(); 2014 break; 2015 } else { 2016 nextToken(); 2017 } 2018 } 2019 2020 // Parse the class body after the enum's ";" if any. 2021 parseLevel(/*HasOpeningBrace=*/true); 2022 nextToken(); 2023 --Line->Level; 2024 addUnwrappedLine(); 2025 } 2026 2027 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2028 const FormatToken &InitialToken = *FormatTok; 2029 nextToken(); 2030 2031 // The actual identifier can be a nested name specifier, and in macros 2032 // it is often token-pasted. 2033 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2034 tok::kw___attribute, tok::kw___declspec, 2035 tok::kw_alignas) || 2036 ((Style.Language == FormatStyle::LK_Java || 2037 Style.Language == FormatStyle::LK_JavaScript) && 2038 FormatTok->isOneOf(tok::period, tok::comma))) { 2039 if (Style.Language == FormatStyle::LK_JavaScript && 2040 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2041 // JavaScript/TypeScript supports inline object types in 2042 // extends/implements positions: 2043 // class Foo implements {bar: number} { } 2044 nextToken(); 2045 if (FormatTok->is(tok::l_brace)) { 2046 tryToParseBracedList(); 2047 continue; 2048 } 2049 } 2050 bool IsNonMacroIdentifier = 2051 FormatTok->is(tok::identifier) && 2052 FormatTok->TokenText != FormatTok->TokenText.upper(); 2053 nextToken(); 2054 // We can have macros or attributes in between 'class' and the class name. 2055 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2056 parseParens(); 2057 } 2058 2059 // Note that parsing away template declarations here leads to incorrectly 2060 // accepting function declarations as record declarations. 2061 // In general, we cannot solve this problem. Consider: 2062 // class A<int> B() {} 2063 // which can be a function definition or a class definition when B() is a 2064 // macro. If we find enough real-world cases where this is a problem, we 2065 // can parse for the 'template' keyword in the beginning of the statement, 2066 // and thus rule out the record production in case there is no template 2067 // (this would still leave us with an ambiguity between template function 2068 // and class declarations). 2069 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2070 while (!eof()) { 2071 if (FormatTok->is(tok::l_brace)) { 2072 calculateBraceTypes(/*ExpectClassBody=*/true); 2073 if (!tryToParseBracedList()) 2074 break; 2075 } 2076 if (FormatTok->Tok.is(tok::semi)) 2077 return; 2078 nextToken(); 2079 } 2080 } 2081 if (FormatTok->Tok.is(tok::l_brace)) { 2082 if (ParseAsExpr) { 2083 parseChildBlock(); 2084 } else { 2085 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2086 addUnwrappedLine(); 2087 2088 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2089 /*MunchSemi=*/false); 2090 } 2091 } 2092 // There is no addUnwrappedLine() here so that we fall through to parsing a 2093 // structural element afterwards. Thus, in "class A {} n, m;", 2094 // "} n, m;" will end up in one unwrapped line. 2095 } 2096 2097 void UnwrappedLineParser::parseObjCProtocolList() { 2098 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2099 do 2100 nextToken(); 2101 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2102 nextToken(); // Skip '>'. 2103 } 2104 2105 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2106 do { 2107 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2108 nextToken(); 2109 addUnwrappedLine(); 2110 break; 2111 } 2112 if (FormatTok->is(tok::l_brace)) { 2113 parseBlock(/*MustBeDeclaration=*/false); 2114 // In ObjC interfaces, nothing should be following the "}". 2115 addUnwrappedLine(); 2116 } else if (FormatTok->is(tok::r_brace)) { 2117 // Ignore stray "}". parseStructuralElement doesn't consume them. 2118 nextToken(); 2119 addUnwrappedLine(); 2120 } else { 2121 parseStructuralElement(); 2122 } 2123 } while (!eof()); 2124 } 2125 2126 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2127 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2128 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2129 nextToken(); 2130 nextToken(); // interface name 2131 2132 // @interface can be followed by either a base class, or a category. 2133 if (FormatTok->Tok.is(tok::colon)) { 2134 nextToken(); 2135 nextToken(); // base class name 2136 } else if (FormatTok->Tok.is(tok::l_paren)) 2137 // Skip category, if present. 2138 parseParens(); 2139 2140 if (FormatTok->Tok.is(tok::less)) 2141 parseObjCProtocolList(); 2142 2143 if (FormatTok->Tok.is(tok::l_brace)) { 2144 if (Style.BraceWrapping.AfterObjCDeclaration) 2145 addUnwrappedLine(); 2146 parseBlock(/*MustBeDeclaration=*/true); 2147 } 2148 2149 // With instance variables, this puts '}' on its own line. Without instance 2150 // variables, this ends the @interface line. 2151 addUnwrappedLine(); 2152 2153 parseObjCUntilAtEnd(); 2154 } 2155 2156 // Returns true for the declaration/definition form of @protocol, 2157 // false for the expression form. 2158 bool UnwrappedLineParser::parseObjCProtocol() { 2159 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2160 nextToken(); 2161 2162 if (FormatTok->is(tok::l_paren)) 2163 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2164 return false; 2165 2166 // The definition/declaration form, 2167 // @protocol Foo 2168 // - (int)someMethod; 2169 // @end 2170 2171 nextToken(); // protocol name 2172 2173 if (FormatTok->Tok.is(tok::less)) 2174 parseObjCProtocolList(); 2175 2176 // Check for protocol declaration. 2177 if (FormatTok->Tok.is(tok::semi)) { 2178 nextToken(); 2179 addUnwrappedLine(); 2180 return true; 2181 } 2182 2183 addUnwrappedLine(); 2184 parseObjCUntilAtEnd(); 2185 return true; 2186 } 2187 2188 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2189 bool IsImport = FormatTok->is(Keywords.kw_import); 2190 assert(IsImport || FormatTok->is(tok::kw_export)); 2191 nextToken(); 2192 2193 // Consume the "default" in "export default class/function". 2194 if (FormatTok->is(tok::kw_default)) 2195 nextToken(); 2196 2197 // Consume "async function", "function" and "default function", so that these 2198 // get parsed as free-standing JS functions, i.e. do not require a trailing 2199 // semicolon. 2200 if (FormatTok->is(Keywords.kw_async)) 2201 nextToken(); 2202 if (FormatTok->is(Keywords.kw_function)) { 2203 nextToken(); 2204 return; 2205 } 2206 2207 // For imports, `export *`, `export {...}`, consume the rest of the line up 2208 // to the terminating `;`. For everything else, just return and continue 2209 // parsing the structural element, i.e. the declaration or expression for 2210 // `export default`. 2211 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2212 !FormatTok->isStringLiteral()) 2213 return; 2214 2215 while (!eof()) { 2216 if (FormatTok->is(tok::semi)) 2217 return; 2218 if (Line->Tokens.empty()) { 2219 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2220 // import statement should terminate. 2221 return; 2222 } 2223 if (FormatTok->is(tok::l_brace)) { 2224 FormatTok->BlockKind = BK_Block; 2225 nextToken(); 2226 parseBracedList(); 2227 } else { 2228 nextToken(); 2229 } 2230 } 2231 } 2232 2233 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2234 StringRef Prefix = "") { 2235 llvm::dbgs() << Prefix << "Line(" << Line.Level 2236 << ", FSC=" << Line.FirstStartColumn << ")" 2237 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2238 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2239 E = Line.Tokens.end(); 2240 I != E; ++I) { 2241 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2242 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2243 << "] "; 2244 } 2245 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2246 E = Line.Tokens.end(); 2247 I != E; ++I) { 2248 const UnwrappedLineNode &Node = *I; 2249 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2250 I = Node.Children.begin(), 2251 E = Node.Children.end(); 2252 I != E; ++I) { 2253 printDebugInfo(*I, "\nChild: "); 2254 } 2255 } 2256 llvm::dbgs() << "\n"; 2257 } 2258 2259 void UnwrappedLineParser::addUnwrappedLine() { 2260 if (Line->Tokens.empty()) 2261 return; 2262 DEBUG({ 2263 if (CurrentLines == &Lines) 2264 printDebugInfo(*Line); 2265 }); 2266 CurrentLines->push_back(std::move(*Line)); 2267 Line->Tokens.clear(); 2268 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2269 Line->FirstStartColumn = 0; 2270 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2271 CurrentLines->append( 2272 std::make_move_iterator(PreprocessorDirectives.begin()), 2273 std::make_move_iterator(PreprocessorDirectives.end())); 2274 PreprocessorDirectives.clear(); 2275 } 2276 // Disconnect the current token from the last token on the previous line. 2277 FormatTok->Previous = nullptr; 2278 } 2279 2280 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2281 2282 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2283 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2284 FormatTok.NewlinesBefore > 0; 2285 } 2286 2287 // Checks if \p FormatTok is a line comment that continues the line comment 2288 // section on \p Line. 2289 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2290 const UnwrappedLine &Line, 2291 llvm::Regex &CommentPragmasRegex) { 2292 if (Line.Tokens.empty()) 2293 return false; 2294 2295 StringRef IndentContent = FormatTok.TokenText; 2296 if (FormatTok.TokenText.startswith("//") || 2297 FormatTok.TokenText.startswith("/*")) 2298 IndentContent = FormatTok.TokenText.substr(2); 2299 if (CommentPragmasRegex.match(IndentContent)) 2300 return false; 2301 2302 // If Line starts with a line comment, then FormatTok continues the comment 2303 // section if its original column is greater or equal to the original start 2304 // column of the line. 2305 // 2306 // Define the min column token of a line as follows: if a line ends in '{' or 2307 // contains a '{' followed by a line comment, then the min column token is 2308 // that '{'. Otherwise, the min column token of the line is the first token of 2309 // the line. 2310 // 2311 // If Line starts with a token other than a line comment, then FormatTok 2312 // continues the comment section if its original column is greater than the 2313 // original start column of the min column token of the line. 2314 // 2315 // For example, the second line comment continues the first in these cases: 2316 // 2317 // // first line 2318 // // second line 2319 // 2320 // and: 2321 // 2322 // // first line 2323 // // second line 2324 // 2325 // and: 2326 // 2327 // int i; // first line 2328 // // second line 2329 // 2330 // and: 2331 // 2332 // do { // first line 2333 // // second line 2334 // int i; 2335 // } while (true); 2336 // 2337 // and: 2338 // 2339 // enum { 2340 // a, // first line 2341 // // second line 2342 // b 2343 // }; 2344 // 2345 // The second line comment doesn't continue the first in these cases: 2346 // 2347 // // first line 2348 // // second line 2349 // 2350 // and: 2351 // 2352 // int i; // first line 2353 // // second line 2354 // 2355 // and: 2356 // 2357 // do { // first line 2358 // // second line 2359 // int i; 2360 // } while (true); 2361 // 2362 // and: 2363 // 2364 // enum { 2365 // a, // first line 2366 // // second line 2367 // }; 2368 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2369 2370 // Scan for '{//'. If found, use the column of '{' as a min column for line 2371 // comment section continuation. 2372 const FormatToken *PreviousToken = nullptr; 2373 for (const UnwrappedLineNode &Node : Line.Tokens) { 2374 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2375 isLineComment(*Node.Tok)) { 2376 MinColumnToken = PreviousToken; 2377 break; 2378 } 2379 PreviousToken = Node.Tok; 2380 2381 // Grab the last newline preceding a token in this unwrapped line. 2382 if (Node.Tok->NewlinesBefore > 0) { 2383 MinColumnToken = Node.Tok; 2384 } 2385 } 2386 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2387 MinColumnToken = PreviousToken; 2388 } 2389 2390 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2391 MinColumnToken); 2392 } 2393 2394 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2395 bool JustComments = Line->Tokens.empty(); 2396 for (SmallVectorImpl<FormatToken *>::const_iterator 2397 I = CommentsBeforeNextToken.begin(), 2398 E = CommentsBeforeNextToken.end(); 2399 I != E; ++I) { 2400 // Line comments that belong to the same line comment section are put on the 2401 // same line since later we might want to reflow content between them. 2402 // Additional fine-grained breaking of line comment sections is controlled 2403 // by the class BreakableLineCommentSection in case it is desirable to keep 2404 // several line comment sections in the same unwrapped line. 2405 // 2406 // FIXME: Consider putting separate line comment sections as children to the 2407 // unwrapped line instead. 2408 (*I)->ContinuesLineCommentSection = 2409 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2410 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2411 addUnwrappedLine(); 2412 pushToken(*I); 2413 } 2414 if (NewlineBeforeNext && JustComments) 2415 addUnwrappedLine(); 2416 CommentsBeforeNextToken.clear(); 2417 } 2418 2419 void UnwrappedLineParser::nextToken(int LevelDifference) { 2420 if (eof()) 2421 return; 2422 flushComments(isOnNewLine(*FormatTok)); 2423 pushToken(FormatTok); 2424 FormatToken *Previous = FormatTok; 2425 if (Style.Language != FormatStyle::LK_JavaScript) 2426 readToken(LevelDifference); 2427 else 2428 readTokenWithJavaScriptASI(); 2429 FormatTok->Previous = Previous; 2430 } 2431 2432 void UnwrappedLineParser::distributeComments( 2433 const SmallVectorImpl<FormatToken *> &Comments, 2434 const FormatToken *NextTok) { 2435 // Whether or not a line comment token continues a line is controlled by 2436 // the method continuesLineCommentSection, with the following caveat: 2437 // 2438 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2439 // that each comment line from the trail is aligned with the next token, if 2440 // the next token exists. If a trail exists, the beginning of the maximal 2441 // trail is marked as a start of a new comment section. 2442 // 2443 // For example in this code: 2444 // 2445 // int a; // line about a 2446 // // line 1 about b 2447 // // line 2 about b 2448 // int b; 2449 // 2450 // the two lines about b form a maximal trail, so there are two sections, the 2451 // first one consisting of the single comment "// line about a" and the 2452 // second one consisting of the next two comments. 2453 if (Comments.empty()) 2454 return; 2455 bool ShouldPushCommentsInCurrentLine = true; 2456 bool HasTrailAlignedWithNextToken = false; 2457 unsigned StartOfTrailAlignedWithNextToken = 0; 2458 if (NextTok) { 2459 // We are skipping the first element intentionally. 2460 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2461 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2462 HasTrailAlignedWithNextToken = true; 2463 StartOfTrailAlignedWithNextToken = i; 2464 } 2465 } 2466 } 2467 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2468 FormatToken *FormatTok = Comments[i]; 2469 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2470 FormatTok->ContinuesLineCommentSection = false; 2471 } else { 2472 FormatTok->ContinuesLineCommentSection = 2473 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2474 } 2475 if (!FormatTok->ContinuesLineCommentSection && 2476 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2477 ShouldPushCommentsInCurrentLine = false; 2478 } 2479 if (ShouldPushCommentsInCurrentLine) { 2480 pushToken(FormatTok); 2481 } else { 2482 CommentsBeforeNextToken.push_back(FormatTok); 2483 } 2484 } 2485 } 2486 2487 void UnwrappedLineParser::readToken(int LevelDifference) { 2488 SmallVector<FormatToken *, 1> Comments; 2489 do { 2490 FormatTok = Tokens->getNextToken(); 2491 assert(FormatTok); 2492 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2493 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2494 distributeComments(Comments, FormatTok); 2495 Comments.clear(); 2496 // If there is an unfinished unwrapped line, we flush the preprocessor 2497 // directives only after that unwrapped line was finished later. 2498 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2499 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2500 assert((LevelDifference >= 0 || 2501 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2502 "LevelDifference makes Line->Level negative"); 2503 Line->Level += LevelDifference; 2504 // Comments stored before the preprocessor directive need to be output 2505 // before the preprocessor directive, at the same level as the 2506 // preprocessor directive, as we consider them to apply to the directive. 2507 flushComments(isOnNewLine(*FormatTok)); 2508 parsePPDirective(); 2509 } 2510 while (FormatTok->Type == TT_ConflictStart || 2511 FormatTok->Type == TT_ConflictEnd || 2512 FormatTok->Type == TT_ConflictAlternative) { 2513 if (FormatTok->Type == TT_ConflictStart) { 2514 conditionalCompilationStart(/*Unreachable=*/false); 2515 } else if (FormatTok->Type == TT_ConflictAlternative) { 2516 conditionalCompilationAlternative(); 2517 } else if (FormatTok->Type == TT_ConflictEnd) { 2518 conditionalCompilationEnd(); 2519 } 2520 FormatTok = Tokens->getNextToken(); 2521 FormatTok->MustBreakBefore = true; 2522 } 2523 2524 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2525 !Line->InPPDirective) { 2526 continue; 2527 } 2528 2529 if (!FormatTok->Tok.is(tok::comment)) { 2530 distributeComments(Comments, FormatTok); 2531 Comments.clear(); 2532 return; 2533 } 2534 2535 Comments.push_back(FormatTok); 2536 } while (!eof()); 2537 2538 distributeComments(Comments, nullptr); 2539 Comments.clear(); 2540 } 2541 2542 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2543 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2544 if (MustBreakBeforeNextToken) { 2545 Line->Tokens.back().Tok->MustBreakBefore = true; 2546 MustBreakBeforeNextToken = false; 2547 } 2548 } 2549 2550 } // end namespace format 2551 } // end namespace clang 2552