1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 static bool isLineComment(const FormatToken &FormatTok) { 59 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 60 } 61 62 // Checks if \p FormatTok is a line comment that continues the line comment 63 // \p Previous. The original column of \p MinColumnToken is used to determine 64 // whether \p FormatTok is indented enough to the right to continue \p Previous. 65 static bool continuesLineComment(const FormatToken &FormatTok, 66 const FormatToken *Previous, 67 const FormatToken *MinColumnToken) { 68 if (!Previous || !MinColumnToken) 69 return false; 70 unsigned MinContinueColumn = 71 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 72 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 73 isLineComment(*Previous) && 74 FormatTok.OriginalColumn >= MinContinueColumn; 75 } 76 77 class ScopedMacroState : public FormatTokenSource { 78 public: 79 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 80 FormatToken *&ResetToken) 81 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 82 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 83 Token(nullptr), PreviousToken(nullptr) { 84 TokenSource = this; 85 Line.Level = 0; 86 Line.InPPDirective = true; 87 } 88 89 ~ScopedMacroState() override { 90 TokenSource = PreviousTokenSource; 91 ResetToken = Token; 92 Line.InPPDirective = false; 93 Line.Level = PreviousLineLevel; 94 } 95 96 FormatToken *getNextToken() override { 97 // The \c UnwrappedLineParser guards against this by never calling 98 // \c getNextToken() after it has encountered the first eof token. 99 assert(!eof()); 100 PreviousToken = Token; 101 Token = PreviousTokenSource->getNextToken(); 102 if (eof()) 103 return getFakeEOF(); 104 return Token; 105 } 106 107 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 108 109 FormatToken *setPosition(unsigned Position) override { 110 PreviousToken = nullptr; 111 Token = PreviousTokenSource->setPosition(Position); 112 return Token; 113 } 114 115 private: 116 bool eof() { 117 return Token && Token->HasUnescapedNewline && 118 !continuesLineComment(*Token, PreviousToken, 119 /*MinColumnToken=*/PreviousToken); 120 } 121 122 FormatToken *getFakeEOF() { 123 static bool EOFInitialized = false; 124 static FormatToken FormatTok; 125 if (!EOFInitialized) { 126 FormatTok.Tok.startToken(); 127 FormatTok.Tok.setKind(tok::eof); 128 EOFInitialized = true; 129 } 130 return &FormatTok; 131 } 132 133 UnwrappedLine &Line; 134 FormatTokenSource *&TokenSource; 135 FormatToken *&ResetToken; 136 unsigned PreviousLineLevel; 137 FormatTokenSource *PreviousTokenSource; 138 139 FormatToken *Token; 140 FormatToken *PreviousToken; 141 }; 142 143 } // end anonymous namespace 144 145 class ScopedLineState { 146 public: 147 ScopedLineState(UnwrappedLineParser &Parser, 148 bool SwitchToPreprocessorLines = false) 149 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 150 if (SwitchToPreprocessorLines) 151 Parser.CurrentLines = &Parser.PreprocessorDirectives; 152 else if (!Parser.Line->Tokens.empty()) 153 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 154 PreBlockLine = std::move(Parser.Line); 155 Parser.Line = llvm::make_unique<UnwrappedLine>(); 156 Parser.Line->Level = PreBlockLine->Level; 157 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 158 } 159 160 ~ScopedLineState() { 161 if (!Parser.Line->Tokens.empty()) { 162 Parser.addUnwrappedLine(); 163 } 164 assert(Parser.Line->Tokens.empty()); 165 Parser.Line = std::move(PreBlockLine); 166 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 167 Parser.MustBreakBeforeNextToken = true; 168 Parser.CurrentLines = OriginalLines; 169 } 170 171 private: 172 UnwrappedLineParser &Parser; 173 174 std::unique_ptr<UnwrappedLine> PreBlockLine; 175 SmallVectorImpl<UnwrappedLine> *OriginalLines; 176 }; 177 178 class CompoundStatementIndenter { 179 public: 180 CompoundStatementIndenter(UnwrappedLineParser *Parser, 181 const FormatStyle &Style, unsigned &LineLevel) 182 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 183 if (Style.BraceWrapping.AfterControlStatement) 184 Parser->addUnwrappedLine(); 185 if (Style.BraceWrapping.IndentBraces) 186 ++LineLevel; 187 } 188 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 189 190 private: 191 unsigned &LineLevel; 192 unsigned OldLineLevel; 193 }; 194 195 namespace { 196 197 class IndexedTokenSource : public FormatTokenSource { 198 public: 199 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 200 : Tokens(Tokens), Position(-1) {} 201 202 FormatToken *getNextToken() override { 203 ++Position; 204 return Tokens[Position]; 205 } 206 207 unsigned getPosition() override { 208 assert(Position >= 0); 209 return Position; 210 } 211 212 FormatToken *setPosition(unsigned P) override { 213 Position = P; 214 return Tokens[Position]; 215 } 216 217 void reset() { Position = -1; } 218 219 private: 220 ArrayRef<FormatToken *> Tokens; 221 int Position; 222 }; 223 224 } // end anonymous namespace 225 226 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 227 const AdditionalKeywords &Keywords, 228 unsigned FirstStartColumn, 229 ArrayRef<FormatToken *> Tokens, 230 UnwrappedLineConsumer &Callback) 231 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 232 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 233 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 234 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 235 IfNdefCondition(nullptr), FoundIncludeGuardStart(false), 236 IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {} 237 238 void UnwrappedLineParser::reset() { 239 PPBranchLevel = -1; 240 IfNdefCondition = nullptr; 241 FoundIncludeGuardStart = false; 242 IncludeGuardRejected = false; 243 Line.reset(new UnwrappedLine); 244 CommentsBeforeNextToken.clear(); 245 FormatTok = nullptr; 246 MustBreakBeforeNextToken = false; 247 PreprocessorDirectives.clear(); 248 CurrentLines = &Lines; 249 DeclarationScopeStack.clear(); 250 PPStack.clear(); 251 Line->FirstStartColumn = FirstStartColumn; 252 } 253 254 void UnwrappedLineParser::parse() { 255 IndexedTokenSource TokenSource(AllTokens); 256 Line->FirstStartColumn = FirstStartColumn; 257 do { 258 DEBUG(llvm::dbgs() << "----\n"); 259 reset(); 260 Tokens = &TokenSource; 261 TokenSource.reset(); 262 263 readToken(); 264 parseFile(); 265 // Create line with eof token. 266 pushToken(FormatTok); 267 addUnwrappedLine(); 268 269 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 270 E = Lines.end(); 271 I != E; ++I) { 272 Callback.consumeUnwrappedLine(*I); 273 } 274 Callback.finishRun(); 275 Lines.clear(); 276 while (!PPLevelBranchIndex.empty() && 277 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 278 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 279 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 280 } 281 if (!PPLevelBranchIndex.empty()) { 282 ++PPLevelBranchIndex.back(); 283 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 284 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 285 } 286 } while (!PPLevelBranchIndex.empty()); 287 } 288 289 void UnwrappedLineParser::parseFile() { 290 // The top-level context in a file always has declarations, except for pre- 291 // processor directives and JavaScript files. 292 bool MustBeDeclaration = 293 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 294 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 295 MustBeDeclaration); 296 if (Style.Language == FormatStyle::LK_TextProto) 297 parseBracedList(); 298 else 299 parseLevel(/*HasOpeningBrace=*/false); 300 // Make sure to format the remaining tokens. 301 flushComments(true); 302 addUnwrappedLine(); 303 } 304 305 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 306 bool SwitchLabelEncountered = false; 307 do { 308 tok::TokenKind kind = FormatTok->Tok.getKind(); 309 if (FormatTok->Type == TT_MacroBlockBegin) { 310 kind = tok::l_brace; 311 } else if (FormatTok->Type == TT_MacroBlockEnd) { 312 kind = tok::r_brace; 313 } 314 315 switch (kind) { 316 case tok::comment: 317 nextToken(); 318 addUnwrappedLine(); 319 break; 320 case tok::l_brace: 321 // FIXME: Add parameter whether this can happen - if this happens, we must 322 // be in a non-declaration context. 323 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 324 continue; 325 parseBlock(/*MustBeDeclaration=*/false); 326 addUnwrappedLine(); 327 break; 328 case tok::r_brace: 329 if (HasOpeningBrace) 330 return; 331 nextToken(); 332 addUnwrappedLine(); 333 break; 334 case tok::kw_default: 335 case tok::kw_case: 336 if (Style.Language == FormatStyle::LK_JavaScript && 337 Line->MustBeDeclaration) { 338 // A 'case: string' style field declaration. 339 parseStructuralElement(); 340 break; 341 } 342 if (!SwitchLabelEncountered && 343 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 344 ++Line->Level; 345 SwitchLabelEncountered = true; 346 parseStructuralElement(); 347 break; 348 default: 349 parseStructuralElement(); 350 break; 351 } 352 } while (!eof()); 353 } 354 355 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 356 // We'll parse forward through the tokens until we hit 357 // a closing brace or eof - note that getNextToken() will 358 // parse macros, so this will magically work inside macro 359 // definitions, too. 360 unsigned StoredPosition = Tokens->getPosition(); 361 FormatToken *Tok = FormatTok; 362 const FormatToken *PrevTok = Tok->Previous; 363 // Keep a stack of positions of lbrace tokens. We will 364 // update information about whether an lbrace starts a 365 // braced init list or a different block during the loop. 366 SmallVector<FormatToken *, 8> LBraceStack; 367 assert(Tok->Tok.is(tok::l_brace)); 368 do { 369 // Get next non-comment token. 370 FormatToken *NextTok; 371 unsigned ReadTokens = 0; 372 do { 373 NextTok = Tokens->getNextToken(); 374 ++ReadTokens; 375 } while (NextTok->is(tok::comment)); 376 377 switch (Tok->Tok.getKind()) { 378 case tok::l_brace: 379 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 380 if (PrevTok->is(tok::colon)) 381 // A colon indicates this code is in a type, or a braced list 382 // following a label in an object literal ({a: {b: 1}}). The code 383 // below could be confused by semicolons between the individual 384 // members in a type member list, which would normally trigger 385 // BK_Block. In both cases, this must be parsed as an inline braced 386 // init. 387 Tok->BlockKind = BK_BracedInit; 388 else if (PrevTok->is(tok::r_paren)) 389 // `) { }` can only occur in function or method declarations in JS. 390 Tok->BlockKind = BK_Block; 391 } else { 392 Tok->BlockKind = BK_Unknown; 393 } 394 LBraceStack.push_back(Tok); 395 break; 396 case tok::r_brace: 397 if (LBraceStack.empty()) 398 break; 399 if (LBraceStack.back()->BlockKind == BK_Unknown) { 400 bool ProbablyBracedList = false; 401 if (Style.Language == FormatStyle::LK_Proto) { 402 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 403 } else { 404 // Using OriginalColumn to distinguish between ObjC methods and 405 // binary operators is a bit hacky. 406 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 407 NextTok->OriginalColumn == 0; 408 409 // If there is a comma, semicolon or right paren after the closing 410 // brace, we assume this is a braced initializer list. Note that 411 // regardless how we mark inner braces here, we will overwrite the 412 // BlockKind later if we parse a braced list (where all blocks 413 // inside are by default braced lists), or when we explicitly detect 414 // blocks (for example while parsing lambdas). 415 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 416 // braced list in JS. 417 ProbablyBracedList = 418 (Style.Language == FormatStyle::LK_JavaScript && 419 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 420 Keywords.kw_as)) || 421 (Style.isCpp() && NextTok->is(tok::l_paren)) || 422 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 423 tok::r_paren, tok::r_square, tok::l_brace, 424 tok::l_square, tok::ellipsis) || 425 (NextTok->is(tok::identifier) && 426 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 427 (NextTok->is(tok::semi) && 428 (!ExpectClassBody || LBraceStack.size() != 1)) || 429 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 430 } 431 if (ProbablyBracedList) { 432 Tok->BlockKind = BK_BracedInit; 433 LBraceStack.back()->BlockKind = BK_BracedInit; 434 } else { 435 Tok->BlockKind = BK_Block; 436 LBraceStack.back()->BlockKind = BK_Block; 437 } 438 } 439 LBraceStack.pop_back(); 440 break; 441 case tok::at: 442 case tok::semi: 443 case tok::kw_if: 444 case tok::kw_while: 445 case tok::kw_for: 446 case tok::kw_switch: 447 case tok::kw_try: 448 case tok::kw___try: 449 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 450 LBraceStack.back()->BlockKind = BK_Block; 451 break; 452 default: 453 break; 454 } 455 PrevTok = Tok; 456 Tok = NextTok; 457 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 458 459 // Assume other blocks for all unclosed opening braces. 460 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 461 if (LBraceStack[i]->BlockKind == BK_Unknown) 462 LBraceStack[i]->BlockKind = BK_Block; 463 } 464 465 FormatTok = Tokens->setPosition(StoredPosition); 466 } 467 468 template <class T> 469 static inline void hash_combine(std::size_t &seed, const T &v) { 470 std::hash<T> hasher; 471 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 472 } 473 474 size_t UnwrappedLineParser::computePPHash() const { 475 size_t h = 0; 476 for (const auto &i : PPStack) { 477 hash_combine(h, size_t(i.Kind)); 478 hash_combine(h, i.Line); 479 } 480 return h; 481 } 482 483 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 484 bool MunchSemi) { 485 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 486 "'{' or macro block token expected"); 487 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 488 FormatTok->BlockKind = BK_Block; 489 490 size_t PPStartHash = computePPHash(); 491 492 unsigned InitialLevel = Line->Level; 493 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 494 495 if (MacroBlock && FormatTok->is(tok::l_paren)) 496 parseParens(); 497 498 size_t NbPreprocessorDirectives = 499 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 500 addUnwrappedLine(); 501 size_t OpeningLineIndex = 502 CurrentLines->empty() 503 ? (UnwrappedLine::kInvalidIndex) 504 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 505 506 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 507 MustBeDeclaration); 508 if (AddLevel) 509 ++Line->Level; 510 parseLevel(/*HasOpeningBrace=*/true); 511 512 if (eof()) 513 return; 514 515 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 516 : !FormatTok->is(tok::r_brace)) { 517 Line->Level = InitialLevel; 518 FormatTok->BlockKind = BK_Block; 519 return; 520 } 521 522 size_t PPEndHash = computePPHash(); 523 524 // Munch the closing brace. 525 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 526 527 if (MacroBlock && FormatTok->is(tok::l_paren)) 528 parseParens(); 529 530 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 531 nextToken(); 532 Line->Level = InitialLevel; 533 534 if (PPStartHash == PPEndHash) { 535 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 536 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 537 // Update the opening line to add the forward reference as well 538 (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = 539 CurrentLines->size() - 1; 540 } 541 } 542 } 543 544 static bool isGoogScope(const UnwrappedLine &Line) { 545 // FIXME: Closure-library specific stuff should not be hard-coded but be 546 // configurable. 547 if (Line.Tokens.size() < 4) 548 return false; 549 auto I = Line.Tokens.begin(); 550 if (I->Tok->TokenText != "goog") 551 return false; 552 ++I; 553 if (I->Tok->isNot(tok::period)) 554 return false; 555 ++I; 556 if (I->Tok->TokenText != "scope") 557 return false; 558 ++I; 559 return I->Tok->is(tok::l_paren); 560 } 561 562 static bool isIIFE(const UnwrappedLine &Line, 563 const AdditionalKeywords &Keywords) { 564 // Look for the start of an immediately invoked anonymous function. 565 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 566 // This is commonly done in JavaScript to create a new, anonymous scope. 567 // Example: (function() { ... })() 568 if (Line.Tokens.size() < 3) 569 return false; 570 auto I = Line.Tokens.begin(); 571 if (I->Tok->isNot(tok::l_paren)) 572 return false; 573 ++I; 574 if (I->Tok->isNot(Keywords.kw_function)) 575 return false; 576 ++I; 577 return I->Tok->is(tok::l_paren); 578 } 579 580 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 581 const FormatToken &InitialToken) { 582 if (InitialToken.is(tok::kw_namespace)) 583 return Style.BraceWrapping.AfterNamespace; 584 if (InitialToken.is(tok::kw_class)) 585 return Style.BraceWrapping.AfterClass; 586 if (InitialToken.is(tok::kw_union)) 587 return Style.BraceWrapping.AfterUnion; 588 if (InitialToken.is(tok::kw_struct)) 589 return Style.BraceWrapping.AfterStruct; 590 return false; 591 } 592 593 void UnwrappedLineParser::parseChildBlock() { 594 FormatTok->BlockKind = BK_Block; 595 nextToken(); 596 { 597 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 598 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 599 ScopedLineState LineState(*this); 600 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 601 /*MustBeDeclaration=*/false); 602 Line->Level += SkipIndent ? 0 : 1; 603 parseLevel(/*HasOpeningBrace=*/true); 604 flushComments(isOnNewLine(*FormatTok)); 605 Line->Level -= SkipIndent ? 0 : 1; 606 } 607 nextToken(); 608 } 609 610 void UnwrappedLineParser::parsePPDirective() { 611 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 612 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 613 nextToken(); 614 615 if (!FormatTok->Tok.getIdentifierInfo()) { 616 parsePPUnknown(); 617 return; 618 } 619 620 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 621 case tok::pp_define: 622 parsePPDefine(); 623 return; 624 case tok::pp_if: 625 parsePPIf(/*IfDef=*/false); 626 break; 627 case tok::pp_ifdef: 628 case tok::pp_ifndef: 629 parsePPIf(/*IfDef=*/true); 630 break; 631 case tok::pp_else: 632 parsePPElse(); 633 break; 634 case tok::pp_elif: 635 parsePPElIf(); 636 break; 637 case tok::pp_endif: 638 parsePPEndIf(); 639 break; 640 default: 641 parsePPUnknown(); 642 break; 643 } 644 } 645 646 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 647 size_t Line = CurrentLines->size(); 648 if (CurrentLines == &PreprocessorDirectives) 649 Line += Lines.size(); 650 651 if (Unreachable || 652 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 653 PPStack.push_back({PP_Unreachable, Line}); 654 else 655 PPStack.push_back({PP_Conditional, Line}); 656 } 657 658 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 659 ++PPBranchLevel; 660 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 661 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 662 PPLevelBranchIndex.push_back(0); 663 PPLevelBranchCount.push_back(0); 664 } 665 PPChainBranchIndex.push(0); 666 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 667 conditionalCompilationCondition(Unreachable || Skip); 668 } 669 670 void UnwrappedLineParser::conditionalCompilationAlternative() { 671 if (!PPStack.empty()) 672 PPStack.pop_back(); 673 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 674 if (!PPChainBranchIndex.empty()) 675 ++PPChainBranchIndex.top(); 676 conditionalCompilationCondition( 677 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 678 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 679 } 680 681 void UnwrappedLineParser::conditionalCompilationEnd() { 682 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 683 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 684 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 685 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 686 } 687 } 688 // Guard against #endif's without #if. 689 if (PPBranchLevel > -1) 690 --PPBranchLevel; 691 if (!PPChainBranchIndex.empty()) 692 PPChainBranchIndex.pop(); 693 if (!PPStack.empty()) 694 PPStack.pop_back(); 695 } 696 697 void UnwrappedLineParser::parsePPIf(bool IfDef) { 698 bool IfNDef = FormatTok->is(tok::pp_ifndef); 699 nextToken(); 700 bool Unreachable = false; 701 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 702 Unreachable = true; 703 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 704 Unreachable = true; 705 conditionalCompilationStart(Unreachable); 706 FormatToken *IfCondition = FormatTok; 707 // If there's a #ifndef on the first line, and the only lines before it are 708 // comments, it could be an include guard. 709 bool MaybeIncludeGuard = IfNDef; 710 if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) { 711 for (auto &Line : Lines) { 712 if (!Line.Tokens.front().Tok->is(tok::comment)) { 713 MaybeIncludeGuard = false; 714 IncludeGuardRejected = true; 715 break; 716 } 717 } 718 } 719 --PPBranchLevel; 720 parsePPUnknown(); 721 ++PPBranchLevel; 722 if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) 723 IfNdefCondition = IfCondition; 724 } 725 726 void UnwrappedLineParser::parsePPElse() { 727 // If a potential include guard has an #else, it's not an include guard. 728 if (FoundIncludeGuardStart && PPBranchLevel == 0) 729 FoundIncludeGuardStart = false; 730 conditionalCompilationAlternative(); 731 if (PPBranchLevel > -1) 732 --PPBranchLevel; 733 parsePPUnknown(); 734 ++PPBranchLevel; 735 } 736 737 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 738 739 void UnwrappedLineParser::parsePPEndIf() { 740 conditionalCompilationEnd(); 741 parsePPUnknown(); 742 // If the #endif of a potential include guard is the last thing in the file, 743 // then we count it as a real include guard and subtract one from every 744 // preprocessor indent. 745 unsigned TokenPosition = Tokens->getPosition(); 746 FormatToken *PeekNext = AllTokens[TokenPosition]; 747 if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) && 748 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 749 for (auto &Line : Lines) 750 if (Line.InPPDirective && Line.Level > 0) 751 --Line.Level; 752 } 753 754 void UnwrappedLineParser::parsePPDefine() { 755 nextToken(); 756 757 if (FormatTok->Tok.getKind() != tok::identifier) { 758 parsePPUnknown(); 759 return; 760 } 761 if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) { 762 FoundIncludeGuardStart = true; 763 for (auto &Line : Lines) { 764 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 765 FoundIncludeGuardStart = false; 766 break; 767 } 768 } 769 } 770 IfNdefCondition = nullptr; 771 nextToken(); 772 if (FormatTok->Tok.getKind() == tok::l_paren && 773 FormatTok->WhitespaceRange.getBegin() == 774 FormatTok->WhitespaceRange.getEnd()) { 775 parseParens(); 776 } 777 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 778 Line->Level += PPBranchLevel + 1; 779 addUnwrappedLine(); 780 ++Line->Level; 781 782 // Errors during a preprocessor directive can only affect the layout of the 783 // preprocessor directive, and thus we ignore them. An alternative approach 784 // would be to use the same approach we use on the file level (no 785 // re-indentation if there was a structural error) within the macro 786 // definition. 787 parseFile(); 788 } 789 790 void UnwrappedLineParser::parsePPUnknown() { 791 do { 792 nextToken(); 793 } while (!eof()); 794 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 795 Line->Level += PPBranchLevel + 1; 796 addUnwrappedLine(); 797 IfNdefCondition = nullptr; 798 } 799 800 // Here we blacklist certain tokens that are not usually the first token in an 801 // unwrapped line. This is used in attempt to distinguish macro calls without 802 // trailing semicolons from other constructs split to several lines. 803 static bool tokenCanStartNewLine(const clang::Token &Tok) { 804 // Semicolon can be a null-statement, l_square can be a start of a macro or 805 // a C++11 attribute, but this doesn't seem to be common. 806 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 807 Tok.isNot(tok::l_square) && 808 // Tokens that can only be used as binary operators and a part of 809 // overloaded operator names. 810 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 811 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 812 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 813 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 814 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 815 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 816 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 817 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 818 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 819 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 820 Tok.isNot(tok::lesslessequal) && 821 // Colon is used in labels, base class lists, initializer lists, 822 // range-based for loops, ternary operator, but should never be the 823 // first token in an unwrapped line. 824 Tok.isNot(tok::colon) && 825 // 'noexcept' is a trailing annotation. 826 Tok.isNot(tok::kw_noexcept); 827 } 828 829 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 830 const FormatToken *FormatTok) { 831 // FIXME: This returns true for C/C++ keywords like 'struct'. 832 return FormatTok->is(tok::identifier) && 833 (FormatTok->Tok.getIdentifierInfo() == nullptr || 834 !FormatTok->isOneOf( 835 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 836 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 837 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 838 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 839 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 840 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 841 Keywords.kw_from)); 842 } 843 844 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 845 const FormatToken *FormatTok) { 846 return FormatTok->Tok.isLiteral() || 847 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 848 mustBeJSIdent(Keywords, FormatTok); 849 } 850 851 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 852 // when encountered after a value (see mustBeJSIdentOrValue). 853 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 854 const FormatToken *FormatTok) { 855 return FormatTok->isOneOf( 856 tok::kw_return, Keywords.kw_yield, 857 // conditionals 858 tok::kw_if, tok::kw_else, 859 // loops 860 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 861 // switch/case 862 tok::kw_switch, tok::kw_case, 863 // exceptions 864 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 865 // declaration 866 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 867 Keywords.kw_async, Keywords.kw_function, 868 // import/export 869 Keywords.kw_import, tok::kw_export); 870 } 871 872 // readTokenWithJavaScriptASI reads the next token and terminates the current 873 // line if JavaScript Automatic Semicolon Insertion must 874 // happen between the current token and the next token. 875 // 876 // This method is conservative - it cannot cover all edge cases of JavaScript, 877 // but only aims to correctly handle certain well known cases. It *must not* 878 // return true in speculative cases. 879 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 880 FormatToken *Previous = FormatTok; 881 readToken(); 882 FormatToken *Next = FormatTok; 883 884 bool IsOnSameLine = 885 CommentsBeforeNextToken.empty() 886 ? Next->NewlinesBefore == 0 887 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 888 if (IsOnSameLine) 889 return; 890 891 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 892 bool PreviousStartsTemplateExpr = 893 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 894 if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { 895 // If the token before the previous one is an '@', the previous token is an 896 // annotation and can precede another identifier/value. 897 const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; 898 if (PrePrevious->is(tok::at)) 899 return; 900 } 901 if (Next->is(tok::exclaim) && PreviousMustBeValue) 902 return addUnwrappedLine(); 903 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 904 bool NextEndsTemplateExpr = 905 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 906 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 907 (PreviousMustBeValue || 908 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 909 tok::minusminus))) 910 return addUnwrappedLine(); 911 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 912 isJSDeclOrStmt(Keywords, Next)) 913 return addUnwrappedLine(); 914 } 915 916 void UnwrappedLineParser::parseStructuralElement() { 917 assert(!FormatTok->is(tok::l_brace)); 918 if (Style.Language == FormatStyle::LK_TableGen && 919 FormatTok->is(tok::pp_include)) { 920 nextToken(); 921 if (FormatTok->is(tok::string_literal)) 922 nextToken(); 923 addUnwrappedLine(); 924 return; 925 } 926 switch (FormatTok->Tok.getKind()) { 927 case tok::at: 928 nextToken(); 929 if (FormatTok->Tok.is(tok::l_brace)) { 930 nextToken(); 931 parseBracedList(); 932 break; 933 } 934 switch (FormatTok->Tok.getObjCKeywordID()) { 935 case tok::objc_public: 936 case tok::objc_protected: 937 case tok::objc_package: 938 case tok::objc_private: 939 return parseAccessSpecifier(); 940 case tok::objc_interface: 941 case tok::objc_implementation: 942 return parseObjCInterfaceOrImplementation(); 943 case tok::objc_protocol: 944 return parseObjCProtocol(); 945 case tok::objc_end: 946 return; // Handled by the caller. 947 case tok::objc_optional: 948 case tok::objc_required: 949 nextToken(); 950 addUnwrappedLine(); 951 return; 952 case tok::objc_autoreleasepool: 953 nextToken(); 954 if (FormatTok->Tok.is(tok::l_brace)) { 955 if (Style.BraceWrapping.AfterObjCDeclaration) 956 addUnwrappedLine(); 957 parseBlock(/*MustBeDeclaration=*/false); 958 } 959 addUnwrappedLine(); 960 return; 961 case tok::objc_try: 962 // This branch isn't strictly necessary (the kw_try case below would 963 // do this too after the tok::at is parsed above). But be explicit. 964 parseTryCatch(); 965 return; 966 default: 967 break; 968 } 969 break; 970 case tok::kw_asm: 971 nextToken(); 972 if (FormatTok->is(tok::l_brace)) { 973 FormatTok->Type = TT_InlineASMBrace; 974 nextToken(); 975 while (FormatTok && FormatTok->isNot(tok::eof)) { 976 if (FormatTok->is(tok::r_brace)) { 977 FormatTok->Type = TT_InlineASMBrace; 978 nextToken(); 979 addUnwrappedLine(); 980 break; 981 } 982 FormatTok->Finalized = true; 983 nextToken(); 984 } 985 } 986 break; 987 case tok::kw_namespace: 988 parseNamespace(); 989 return; 990 case tok::kw_inline: 991 nextToken(); 992 if (FormatTok->Tok.is(tok::kw_namespace)) { 993 parseNamespace(); 994 return; 995 } 996 break; 997 case tok::kw_public: 998 case tok::kw_protected: 999 case tok::kw_private: 1000 if (Style.Language == FormatStyle::LK_Java || 1001 Style.Language == FormatStyle::LK_JavaScript) 1002 nextToken(); 1003 else 1004 parseAccessSpecifier(); 1005 return; 1006 case tok::kw_if: 1007 parseIfThenElse(); 1008 return; 1009 case tok::kw_for: 1010 case tok::kw_while: 1011 parseForOrWhileLoop(); 1012 return; 1013 case tok::kw_do: 1014 parseDoWhile(); 1015 return; 1016 case tok::kw_switch: 1017 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1018 // 'switch: string' field declaration. 1019 break; 1020 parseSwitch(); 1021 return; 1022 case tok::kw_default: 1023 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1024 // 'default: string' field declaration. 1025 break; 1026 nextToken(); 1027 parseLabel(); 1028 return; 1029 case tok::kw_case: 1030 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1031 // 'case: string' field declaration. 1032 break; 1033 parseCaseLabel(); 1034 return; 1035 case tok::kw_try: 1036 case tok::kw___try: 1037 parseTryCatch(); 1038 return; 1039 case tok::kw_extern: 1040 nextToken(); 1041 if (FormatTok->Tok.is(tok::string_literal)) { 1042 nextToken(); 1043 if (FormatTok->Tok.is(tok::l_brace)) { 1044 if (Style.BraceWrapping.AfterExternBlock) { 1045 addUnwrappedLine(); 1046 parseBlock(/*MustBeDeclaration=*/true); 1047 } else { 1048 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1049 } 1050 addUnwrappedLine(); 1051 return; 1052 } 1053 } 1054 break; 1055 case tok::kw_export: 1056 if (Style.Language == FormatStyle::LK_JavaScript) { 1057 parseJavaScriptEs6ImportExport(); 1058 return; 1059 } 1060 break; 1061 case tok::identifier: 1062 if (FormatTok->is(TT_ForEachMacro)) { 1063 parseForOrWhileLoop(); 1064 return; 1065 } 1066 if (FormatTok->is(TT_MacroBlockBegin)) { 1067 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1068 /*MunchSemi=*/false); 1069 return; 1070 } 1071 if (FormatTok->is(Keywords.kw_import)) { 1072 if (Style.Language == FormatStyle::LK_JavaScript) { 1073 parseJavaScriptEs6ImportExport(); 1074 return; 1075 } 1076 if (Style.Language == FormatStyle::LK_Proto) { 1077 nextToken(); 1078 if (FormatTok->is(tok::kw_public)) 1079 nextToken(); 1080 if (!FormatTok->is(tok::string_literal)) 1081 return; 1082 nextToken(); 1083 if (FormatTok->is(tok::semi)) 1084 nextToken(); 1085 addUnwrappedLine(); 1086 return; 1087 } 1088 } 1089 if (Style.isCpp() && 1090 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1091 Keywords.kw_slots, Keywords.kw_qslots)) { 1092 nextToken(); 1093 if (FormatTok->is(tok::colon)) { 1094 nextToken(); 1095 addUnwrappedLine(); 1096 return; 1097 } 1098 } 1099 // In all other cases, parse the declaration. 1100 break; 1101 default: 1102 break; 1103 } 1104 do { 1105 const FormatToken *Previous = FormatTok->Previous; 1106 switch (FormatTok->Tok.getKind()) { 1107 case tok::at: 1108 nextToken(); 1109 if (FormatTok->Tok.is(tok::l_brace)) { 1110 nextToken(); 1111 parseBracedList(); 1112 } 1113 break; 1114 case tok::kw_enum: 1115 // Ignore if this is part of "template <enum ...". 1116 if (Previous && Previous->is(tok::less)) { 1117 nextToken(); 1118 break; 1119 } 1120 1121 // parseEnum falls through and does not yet add an unwrapped line as an 1122 // enum definition can start a structural element. 1123 if (!parseEnum()) 1124 break; 1125 // This only applies for C++. 1126 if (!Style.isCpp()) { 1127 addUnwrappedLine(); 1128 return; 1129 } 1130 break; 1131 case tok::kw_typedef: 1132 nextToken(); 1133 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1134 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1135 parseEnum(); 1136 break; 1137 case tok::kw_struct: 1138 case tok::kw_union: 1139 case tok::kw_class: 1140 // parseRecord falls through and does not yet add an unwrapped line as a 1141 // record declaration or definition can start a structural element. 1142 parseRecord(); 1143 // This does not apply for Java and JavaScript. 1144 if (Style.Language == FormatStyle::LK_Java || 1145 Style.Language == FormatStyle::LK_JavaScript) { 1146 if (FormatTok->is(tok::semi)) 1147 nextToken(); 1148 addUnwrappedLine(); 1149 return; 1150 } 1151 break; 1152 case tok::period: 1153 nextToken(); 1154 // In Java, classes have an implicit static member "class". 1155 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1156 FormatTok->is(tok::kw_class)) 1157 nextToken(); 1158 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1159 FormatTok->Tok.getIdentifierInfo()) 1160 // JavaScript only has pseudo keywords, all keywords are allowed to 1161 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1162 nextToken(); 1163 break; 1164 case tok::semi: 1165 nextToken(); 1166 addUnwrappedLine(); 1167 return; 1168 case tok::r_brace: 1169 addUnwrappedLine(); 1170 return; 1171 case tok::l_paren: 1172 parseParens(); 1173 break; 1174 case tok::kw_operator: 1175 nextToken(); 1176 if (FormatTok->isBinaryOperator()) 1177 nextToken(); 1178 break; 1179 case tok::caret: 1180 nextToken(); 1181 if (FormatTok->Tok.isAnyIdentifier() || 1182 FormatTok->isSimpleTypeSpecifier()) 1183 nextToken(); 1184 if (FormatTok->is(tok::l_paren)) 1185 parseParens(); 1186 if (FormatTok->is(tok::l_brace)) 1187 parseChildBlock(); 1188 break; 1189 case tok::l_brace: 1190 if (!tryToParseBracedList()) { 1191 // A block outside of parentheses must be the last part of a 1192 // structural element. 1193 // FIXME: Figure out cases where this is not true, and add projections 1194 // for them (the one we know is missing are lambdas). 1195 if (Style.BraceWrapping.AfterFunction) 1196 addUnwrappedLine(); 1197 FormatTok->Type = TT_FunctionLBrace; 1198 parseBlock(/*MustBeDeclaration=*/false); 1199 addUnwrappedLine(); 1200 return; 1201 } 1202 // Otherwise this was a braced init list, and the structural 1203 // element continues. 1204 break; 1205 case tok::kw_try: 1206 // We arrive here when parsing function-try blocks. 1207 parseTryCatch(); 1208 return; 1209 case tok::identifier: { 1210 if (FormatTok->is(TT_MacroBlockEnd)) { 1211 addUnwrappedLine(); 1212 return; 1213 } 1214 1215 // Function declarations (as opposed to function expressions) are parsed 1216 // on their own unwrapped line by continuing this loop. Function 1217 // expressions (functions that are not on their own line) must not create 1218 // a new unwrapped line, so they are special cased below. 1219 size_t TokenCount = Line->Tokens.size(); 1220 if (Style.Language == FormatStyle::LK_JavaScript && 1221 FormatTok->is(Keywords.kw_function) && 1222 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1223 Keywords.kw_async)))) { 1224 tryToParseJSFunction(); 1225 break; 1226 } 1227 if ((Style.Language == FormatStyle::LK_JavaScript || 1228 Style.Language == FormatStyle::LK_Java) && 1229 FormatTok->is(Keywords.kw_interface)) { 1230 if (Style.Language == FormatStyle::LK_JavaScript) { 1231 // In JavaScript/TypeScript, "interface" can be used as a standalone 1232 // identifier, e.g. in `var interface = 1;`. If "interface" is 1233 // followed by another identifier, it is very like to be an actual 1234 // interface declaration. 1235 unsigned StoredPosition = Tokens->getPosition(); 1236 FormatToken *Next = Tokens->getNextToken(); 1237 FormatTok = Tokens->setPosition(StoredPosition); 1238 if (Next && !mustBeJSIdent(Keywords, Next)) { 1239 nextToken(); 1240 break; 1241 } 1242 } 1243 parseRecord(); 1244 addUnwrappedLine(); 1245 return; 1246 } 1247 1248 // See if the following token should start a new unwrapped line. 1249 StringRef Text = FormatTok->TokenText; 1250 nextToken(); 1251 if (Line->Tokens.size() == 1 && 1252 // JS doesn't have macros, and within classes colons indicate fields, 1253 // not labels. 1254 Style.Language != FormatStyle::LK_JavaScript) { 1255 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1256 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1257 parseLabel(); 1258 return; 1259 } 1260 // Recognize function-like macro usages without trailing semicolon as 1261 // well as free-standing macros like Q_OBJECT. 1262 bool FunctionLike = FormatTok->is(tok::l_paren); 1263 if (FunctionLike) 1264 parseParens(); 1265 1266 bool FollowedByNewline = 1267 CommentsBeforeNextToken.empty() 1268 ? FormatTok->NewlinesBefore > 0 1269 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1270 1271 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1272 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1273 addUnwrappedLine(); 1274 return; 1275 } 1276 } 1277 break; 1278 } 1279 case tok::equal: 1280 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1281 // TT_JsFatArrow. The always start an expression or a child block if 1282 // followed by a curly. 1283 if (FormatTok->is(TT_JsFatArrow)) { 1284 nextToken(); 1285 if (FormatTok->is(tok::l_brace)) 1286 parseChildBlock(); 1287 break; 1288 } 1289 1290 nextToken(); 1291 if (FormatTok->Tok.is(tok::l_brace)) { 1292 nextToken(); 1293 parseBracedList(); 1294 } else if (Style.Language == FormatStyle::LK_Proto && 1295 FormatTok->Tok.is(tok::less)) { 1296 nextToken(); 1297 parseBracedList(/*ContinueOnSemicolons=*/false, 1298 /*ClosingBraceKind=*/tok::greater); 1299 } 1300 break; 1301 case tok::l_square: 1302 parseSquare(); 1303 break; 1304 case tok::kw_new: 1305 parseNew(); 1306 break; 1307 default: 1308 nextToken(); 1309 break; 1310 } 1311 } while (!eof()); 1312 } 1313 1314 bool UnwrappedLineParser::tryToParseLambda() { 1315 if (!Style.isCpp()) { 1316 nextToken(); 1317 return false; 1318 } 1319 assert(FormatTok->is(tok::l_square)); 1320 FormatToken &LSquare = *FormatTok; 1321 if (!tryToParseLambdaIntroducer()) 1322 return false; 1323 1324 while (FormatTok->isNot(tok::l_brace)) { 1325 if (FormatTok->isSimpleTypeSpecifier()) { 1326 nextToken(); 1327 continue; 1328 } 1329 switch (FormatTok->Tok.getKind()) { 1330 case tok::l_brace: 1331 break; 1332 case tok::l_paren: 1333 parseParens(); 1334 break; 1335 case tok::amp: 1336 case tok::star: 1337 case tok::kw_const: 1338 case tok::comma: 1339 case tok::less: 1340 case tok::greater: 1341 case tok::identifier: 1342 case tok::numeric_constant: 1343 case tok::coloncolon: 1344 case tok::kw_mutable: 1345 nextToken(); 1346 break; 1347 case tok::arrow: 1348 FormatTok->Type = TT_LambdaArrow; 1349 nextToken(); 1350 break; 1351 default: 1352 return true; 1353 } 1354 } 1355 LSquare.Type = TT_LambdaLSquare; 1356 parseChildBlock(); 1357 return true; 1358 } 1359 1360 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1361 const FormatToken *Previous = FormatTok->Previous; 1362 if (Previous && 1363 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1364 tok::kw_delete) || 1365 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1366 Previous->isSimpleTypeSpecifier())) { 1367 nextToken(); 1368 return false; 1369 } 1370 nextToken(); 1371 parseSquare(/*LambdaIntroducer=*/true); 1372 return true; 1373 } 1374 1375 void UnwrappedLineParser::tryToParseJSFunction() { 1376 assert(FormatTok->is(Keywords.kw_function) || 1377 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1378 if (FormatTok->is(Keywords.kw_async)) 1379 nextToken(); 1380 // Consume "function". 1381 nextToken(); 1382 1383 // Consume * (generator function). Treat it like C++'s overloaded operators. 1384 if (FormatTok->is(tok::star)) { 1385 FormatTok->Type = TT_OverloadedOperator; 1386 nextToken(); 1387 } 1388 1389 // Consume function name. 1390 if (FormatTok->is(tok::identifier)) 1391 nextToken(); 1392 1393 if (FormatTok->isNot(tok::l_paren)) 1394 return; 1395 1396 // Parse formal parameter list. 1397 parseParens(); 1398 1399 if (FormatTok->is(tok::colon)) { 1400 // Parse a type definition. 1401 nextToken(); 1402 1403 // Eat the type declaration. For braced inline object types, balance braces, 1404 // otherwise just parse until finding an l_brace for the function body. 1405 if (FormatTok->is(tok::l_brace)) 1406 tryToParseBracedList(); 1407 else 1408 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1409 nextToken(); 1410 } 1411 1412 if (FormatTok->is(tok::semi)) 1413 return; 1414 1415 parseChildBlock(); 1416 } 1417 1418 bool UnwrappedLineParser::tryToParseBracedList() { 1419 if (FormatTok->BlockKind == BK_Unknown) 1420 calculateBraceTypes(); 1421 assert(FormatTok->BlockKind != BK_Unknown); 1422 if (FormatTok->BlockKind == BK_Block) 1423 return false; 1424 nextToken(); 1425 parseBracedList(); 1426 return true; 1427 } 1428 1429 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1430 tok::TokenKind ClosingBraceKind) { 1431 bool HasError = false; 1432 1433 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1434 // replace this by using parseAssigmentExpression() inside. 1435 do { 1436 if (Style.Language == FormatStyle::LK_JavaScript) { 1437 if (FormatTok->is(Keywords.kw_function) || 1438 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1439 tryToParseJSFunction(); 1440 continue; 1441 } 1442 if (FormatTok->is(TT_JsFatArrow)) { 1443 nextToken(); 1444 // Fat arrows can be followed by simple expressions or by child blocks 1445 // in curly braces. 1446 if (FormatTok->is(tok::l_brace)) { 1447 parseChildBlock(); 1448 continue; 1449 } 1450 } 1451 if (FormatTok->is(tok::l_brace)) { 1452 // Could be a method inside of a braced list `{a() { return 1; }}`. 1453 if (tryToParseBracedList()) 1454 continue; 1455 parseChildBlock(); 1456 } 1457 } 1458 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1459 nextToken(); 1460 return !HasError; 1461 } 1462 switch (FormatTok->Tok.getKind()) { 1463 case tok::caret: 1464 nextToken(); 1465 if (FormatTok->is(tok::l_brace)) { 1466 parseChildBlock(); 1467 } 1468 break; 1469 case tok::l_square: 1470 tryToParseLambda(); 1471 break; 1472 case tok::l_paren: 1473 parseParens(); 1474 // JavaScript can just have free standing methods and getters/setters in 1475 // object literals. Detect them by a "{" following ")". 1476 if (Style.Language == FormatStyle::LK_JavaScript) { 1477 if (FormatTok->is(tok::l_brace)) 1478 parseChildBlock(); 1479 break; 1480 } 1481 break; 1482 case tok::l_brace: 1483 // Assume there are no blocks inside a braced init list apart 1484 // from the ones we explicitly parse out (like lambdas). 1485 FormatTok->BlockKind = BK_BracedInit; 1486 nextToken(); 1487 parseBracedList(); 1488 break; 1489 case tok::less: 1490 if (Style.Language == FormatStyle::LK_Proto) { 1491 nextToken(); 1492 parseBracedList(/*ContinueOnSemicolons=*/false, 1493 /*ClosingBraceKind=*/tok::greater); 1494 } else { 1495 nextToken(); 1496 } 1497 break; 1498 case tok::semi: 1499 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1500 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1501 // used for error recovery if we have otherwise determined that this is 1502 // a braced list. 1503 if (Style.Language == FormatStyle::LK_JavaScript) { 1504 nextToken(); 1505 break; 1506 } 1507 HasError = true; 1508 if (!ContinueOnSemicolons) 1509 return !HasError; 1510 nextToken(); 1511 break; 1512 case tok::comma: 1513 nextToken(); 1514 break; 1515 default: 1516 nextToken(); 1517 break; 1518 } 1519 } while (!eof()); 1520 return false; 1521 } 1522 1523 void UnwrappedLineParser::parseParens() { 1524 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1525 nextToken(); 1526 do { 1527 switch (FormatTok->Tok.getKind()) { 1528 case tok::l_paren: 1529 parseParens(); 1530 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1531 parseChildBlock(); 1532 break; 1533 case tok::r_paren: 1534 nextToken(); 1535 return; 1536 case tok::r_brace: 1537 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1538 return; 1539 case tok::l_square: 1540 tryToParseLambda(); 1541 break; 1542 case tok::l_brace: 1543 if (!tryToParseBracedList()) 1544 parseChildBlock(); 1545 break; 1546 case tok::at: 1547 nextToken(); 1548 if (FormatTok->Tok.is(tok::l_brace)) { 1549 nextToken(); 1550 parseBracedList(); 1551 } 1552 break; 1553 case tok::kw_class: 1554 if (Style.Language == FormatStyle::LK_JavaScript) 1555 parseRecord(/*ParseAsExpr=*/true); 1556 else 1557 nextToken(); 1558 break; 1559 case tok::identifier: 1560 if (Style.Language == FormatStyle::LK_JavaScript && 1561 (FormatTok->is(Keywords.kw_function) || 1562 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1563 tryToParseJSFunction(); 1564 else 1565 nextToken(); 1566 break; 1567 default: 1568 nextToken(); 1569 break; 1570 } 1571 } while (!eof()); 1572 } 1573 1574 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1575 if (!LambdaIntroducer) { 1576 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1577 if (tryToParseLambda()) 1578 return; 1579 } 1580 do { 1581 switch (FormatTok->Tok.getKind()) { 1582 case tok::l_paren: 1583 parseParens(); 1584 break; 1585 case tok::r_square: 1586 nextToken(); 1587 return; 1588 case tok::r_brace: 1589 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1590 return; 1591 case tok::l_square: 1592 parseSquare(); 1593 break; 1594 case tok::l_brace: { 1595 if (!tryToParseBracedList()) 1596 parseChildBlock(); 1597 break; 1598 } 1599 case tok::at: 1600 nextToken(); 1601 if (FormatTok->Tok.is(tok::l_brace)) { 1602 nextToken(); 1603 parseBracedList(); 1604 } 1605 break; 1606 default: 1607 nextToken(); 1608 break; 1609 } 1610 } while (!eof()); 1611 } 1612 1613 void UnwrappedLineParser::parseIfThenElse() { 1614 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1615 nextToken(); 1616 if (FormatTok->Tok.is(tok::kw_constexpr)) 1617 nextToken(); 1618 if (FormatTok->Tok.is(tok::l_paren)) 1619 parseParens(); 1620 bool NeedsUnwrappedLine = false; 1621 if (FormatTok->Tok.is(tok::l_brace)) { 1622 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1623 parseBlock(/*MustBeDeclaration=*/false); 1624 if (Style.BraceWrapping.BeforeElse) 1625 addUnwrappedLine(); 1626 else 1627 NeedsUnwrappedLine = true; 1628 } else { 1629 addUnwrappedLine(); 1630 ++Line->Level; 1631 parseStructuralElement(); 1632 --Line->Level; 1633 } 1634 if (FormatTok->Tok.is(tok::kw_else)) { 1635 nextToken(); 1636 if (FormatTok->Tok.is(tok::l_brace)) { 1637 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1638 parseBlock(/*MustBeDeclaration=*/false); 1639 addUnwrappedLine(); 1640 } else if (FormatTok->Tok.is(tok::kw_if)) { 1641 parseIfThenElse(); 1642 } else { 1643 addUnwrappedLine(); 1644 ++Line->Level; 1645 parseStructuralElement(); 1646 if (FormatTok->is(tok::eof)) 1647 addUnwrappedLine(); 1648 --Line->Level; 1649 } 1650 } else if (NeedsUnwrappedLine) { 1651 addUnwrappedLine(); 1652 } 1653 } 1654 1655 void UnwrappedLineParser::parseTryCatch() { 1656 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1657 nextToken(); 1658 bool NeedsUnwrappedLine = false; 1659 if (FormatTok->is(tok::colon)) { 1660 // We are in a function try block, what comes is an initializer list. 1661 nextToken(); 1662 while (FormatTok->is(tok::identifier)) { 1663 nextToken(); 1664 if (FormatTok->is(tok::l_paren)) 1665 parseParens(); 1666 if (FormatTok->is(tok::comma)) 1667 nextToken(); 1668 } 1669 } 1670 // Parse try with resource. 1671 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1672 parseParens(); 1673 } 1674 if (FormatTok->is(tok::l_brace)) { 1675 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1676 parseBlock(/*MustBeDeclaration=*/false); 1677 if (Style.BraceWrapping.BeforeCatch) { 1678 addUnwrappedLine(); 1679 } else { 1680 NeedsUnwrappedLine = true; 1681 } 1682 } else if (!FormatTok->is(tok::kw_catch)) { 1683 // The C++ standard requires a compound-statement after a try. 1684 // If there's none, we try to assume there's a structuralElement 1685 // and try to continue. 1686 addUnwrappedLine(); 1687 ++Line->Level; 1688 parseStructuralElement(); 1689 --Line->Level; 1690 } 1691 while (1) { 1692 if (FormatTok->is(tok::at)) 1693 nextToken(); 1694 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1695 tok::kw___finally) || 1696 ((Style.Language == FormatStyle::LK_Java || 1697 Style.Language == FormatStyle::LK_JavaScript) && 1698 FormatTok->is(Keywords.kw_finally)) || 1699 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1700 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1701 break; 1702 nextToken(); 1703 while (FormatTok->isNot(tok::l_brace)) { 1704 if (FormatTok->is(tok::l_paren)) { 1705 parseParens(); 1706 continue; 1707 } 1708 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1709 return; 1710 nextToken(); 1711 } 1712 NeedsUnwrappedLine = false; 1713 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1714 parseBlock(/*MustBeDeclaration=*/false); 1715 if (Style.BraceWrapping.BeforeCatch) 1716 addUnwrappedLine(); 1717 else 1718 NeedsUnwrappedLine = true; 1719 } 1720 if (NeedsUnwrappedLine) 1721 addUnwrappedLine(); 1722 } 1723 1724 void UnwrappedLineParser::parseNamespace() { 1725 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1726 1727 const FormatToken &InitialToken = *FormatTok; 1728 nextToken(); 1729 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1730 nextToken(); 1731 if (FormatTok->Tok.is(tok::l_brace)) { 1732 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1733 addUnwrappedLine(); 1734 1735 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1736 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1737 DeclarationScopeStack.size() > 1); 1738 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1739 // Munch the semicolon after a namespace. This is more common than one would 1740 // think. Puttin the semicolon into its own line is very ugly. 1741 if (FormatTok->Tok.is(tok::semi)) 1742 nextToken(); 1743 addUnwrappedLine(); 1744 } 1745 // FIXME: Add error handling. 1746 } 1747 1748 void UnwrappedLineParser::parseNew() { 1749 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1750 nextToken(); 1751 if (Style.Language != FormatStyle::LK_Java) 1752 return; 1753 1754 // In Java, we can parse everything up to the parens, which aren't optional. 1755 do { 1756 // There should not be a ;, { or } before the new's open paren. 1757 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1758 return; 1759 1760 // Consume the parens. 1761 if (FormatTok->is(tok::l_paren)) { 1762 parseParens(); 1763 1764 // If there is a class body of an anonymous class, consume that as child. 1765 if (FormatTok->is(tok::l_brace)) 1766 parseChildBlock(); 1767 return; 1768 } 1769 nextToken(); 1770 } while (!eof()); 1771 } 1772 1773 void UnwrappedLineParser::parseForOrWhileLoop() { 1774 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1775 "'for', 'while' or foreach macro expected"); 1776 nextToken(); 1777 // JS' for await ( ... 1778 if (Style.Language == FormatStyle::LK_JavaScript && 1779 FormatTok->is(Keywords.kw_await)) 1780 nextToken(); 1781 if (FormatTok->Tok.is(tok::l_paren)) 1782 parseParens(); 1783 if (FormatTok->Tok.is(tok::l_brace)) { 1784 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1785 parseBlock(/*MustBeDeclaration=*/false); 1786 addUnwrappedLine(); 1787 } else { 1788 addUnwrappedLine(); 1789 ++Line->Level; 1790 parseStructuralElement(); 1791 --Line->Level; 1792 } 1793 } 1794 1795 void UnwrappedLineParser::parseDoWhile() { 1796 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1797 nextToken(); 1798 if (FormatTok->Tok.is(tok::l_brace)) { 1799 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1800 parseBlock(/*MustBeDeclaration=*/false); 1801 if (Style.BraceWrapping.IndentBraces) 1802 addUnwrappedLine(); 1803 } else { 1804 addUnwrappedLine(); 1805 ++Line->Level; 1806 parseStructuralElement(); 1807 --Line->Level; 1808 } 1809 1810 // FIXME: Add error handling. 1811 if (!FormatTok->Tok.is(tok::kw_while)) { 1812 addUnwrappedLine(); 1813 return; 1814 } 1815 1816 nextToken(); 1817 parseStructuralElement(); 1818 } 1819 1820 void UnwrappedLineParser::parseLabel() { 1821 nextToken(); 1822 unsigned OldLineLevel = Line->Level; 1823 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1824 --Line->Level; 1825 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1826 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1827 parseBlock(/*MustBeDeclaration=*/false); 1828 if (FormatTok->Tok.is(tok::kw_break)) { 1829 if (Style.BraceWrapping.AfterControlStatement) 1830 addUnwrappedLine(); 1831 parseStructuralElement(); 1832 } 1833 addUnwrappedLine(); 1834 } else { 1835 if (FormatTok->is(tok::semi)) 1836 nextToken(); 1837 addUnwrappedLine(); 1838 } 1839 Line->Level = OldLineLevel; 1840 if (FormatTok->isNot(tok::l_brace)) { 1841 parseStructuralElement(); 1842 addUnwrappedLine(); 1843 } 1844 } 1845 1846 void UnwrappedLineParser::parseCaseLabel() { 1847 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1848 // FIXME: fix handling of complex expressions here. 1849 do { 1850 nextToken(); 1851 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1852 parseLabel(); 1853 } 1854 1855 void UnwrappedLineParser::parseSwitch() { 1856 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1857 nextToken(); 1858 if (FormatTok->Tok.is(tok::l_paren)) 1859 parseParens(); 1860 if (FormatTok->Tok.is(tok::l_brace)) { 1861 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1862 parseBlock(/*MustBeDeclaration=*/false); 1863 addUnwrappedLine(); 1864 } else { 1865 addUnwrappedLine(); 1866 ++Line->Level; 1867 parseStructuralElement(); 1868 --Line->Level; 1869 } 1870 } 1871 1872 void UnwrappedLineParser::parseAccessSpecifier() { 1873 nextToken(); 1874 // Understand Qt's slots. 1875 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1876 nextToken(); 1877 // Otherwise, we don't know what it is, and we'd better keep the next token. 1878 if (FormatTok->Tok.is(tok::colon)) 1879 nextToken(); 1880 addUnwrappedLine(); 1881 } 1882 1883 bool UnwrappedLineParser::parseEnum() { 1884 // Won't be 'enum' for NS_ENUMs. 1885 if (FormatTok->Tok.is(tok::kw_enum)) 1886 nextToken(); 1887 1888 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1889 // declarations. An "enum" keyword followed by a colon would be a syntax 1890 // error and thus assume it is just an identifier. 1891 if (Style.Language == FormatStyle::LK_JavaScript && 1892 FormatTok->isOneOf(tok::colon, tok::question)) 1893 return false; 1894 1895 // Eat up enum class ... 1896 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1897 nextToken(); 1898 1899 while (FormatTok->Tok.getIdentifierInfo() || 1900 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1901 tok::greater, tok::comma, tok::question)) { 1902 nextToken(); 1903 // We can have macros or attributes in between 'enum' and the enum name. 1904 if (FormatTok->is(tok::l_paren)) 1905 parseParens(); 1906 if (FormatTok->is(tok::identifier)) { 1907 nextToken(); 1908 // If there are two identifiers in a row, this is likely an elaborate 1909 // return type. In Java, this can be "implements", etc. 1910 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1911 return false; 1912 } 1913 } 1914 1915 // Just a declaration or something is wrong. 1916 if (FormatTok->isNot(tok::l_brace)) 1917 return true; 1918 FormatTok->BlockKind = BK_Block; 1919 1920 if (Style.Language == FormatStyle::LK_Java) { 1921 // Java enums are different. 1922 parseJavaEnumBody(); 1923 return true; 1924 } 1925 if (Style.Language == FormatStyle::LK_Proto) { 1926 parseBlock(/*MustBeDeclaration=*/true); 1927 return true; 1928 } 1929 1930 // Parse enum body. 1931 nextToken(); 1932 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1933 if (HasError) { 1934 if (FormatTok->is(tok::semi)) 1935 nextToken(); 1936 addUnwrappedLine(); 1937 } 1938 return true; 1939 1940 // There is no addUnwrappedLine() here so that we fall through to parsing a 1941 // structural element afterwards. Thus, in "enum A {} n, m;", 1942 // "} n, m;" will end up in one unwrapped line. 1943 } 1944 1945 void UnwrappedLineParser::parseJavaEnumBody() { 1946 // Determine whether the enum is simple, i.e. does not have a semicolon or 1947 // constants with class bodies. Simple enums can be formatted like braced 1948 // lists, contracted to a single line, etc. 1949 unsigned StoredPosition = Tokens->getPosition(); 1950 bool IsSimple = true; 1951 FormatToken *Tok = Tokens->getNextToken(); 1952 while (Tok) { 1953 if (Tok->is(tok::r_brace)) 1954 break; 1955 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1956 IsSimple = false; 1957 break; 1958 } 1959 // FIXME: This will also mark enums with braces in the arguments to enum 1960 // constants as "not simple". This is probably fine in practice, though. 1961 Tok = Tokens->getNextToken(); 1962 } 1963 FormatTok = Tokens->setPosition(StoredPosition); 1964 1965 if (IsSimple) { 1966 nextToken(); 1967 parseBracedList(); 1968 addUnwrappedLine(); 1969 return; 1970 } 1971 1972 // Parse the body of a more complex enum. 1973 // First add a line for everything up to the "{". 1974 nextToken(); 1975 addUnwrappedLine(); 1976 ++Line->Level; 1977 1978 // Parse the enum constants. 1979 while (FormatTok) { 1980 if (FormatTok->is(tok::l_brace)) { 1981 // Parse the constant's class body. 1982 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1983 /*MunchSemi=*/false); 1984 } else if (FormatTok->is(tok::l_paren)) { 1985 parseParens(); 1986 } else if (FormatTok->is(tok::comma)) { 1987 nextToken(); 1988 addUnwrappedLine(); 1989 } else if (FormatTok->is(tok::semi)) { 1990 nextToken(); 1991 addUnwrappedLine(); 1992 break; 1993 } else if (FormatTok->is(tok::r_brace)) { 1994 addUnwrappedLine(); 1995 break; 1996 } else { 1997 nextToken(); 1998 } 1999 } 2000 2001 // Parse the class body after the enum's ";" if any. 2002 parseLevel(/*HasOpeningBrace=*/true); 2003 nextToken(); 2004 --Line->Level; 2005 addUnwrappedLine(); 2006 } 2007 2008 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2009 const FormatToken &InitialToken = *FormatTok; 2010 nextToken(); 2011 2012 // The actual identifier can be a nested name specifier, and in macros 2013 // it is often token-pasted. 2014 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2015 tok::kw___attribute, tok::kw___declspec, 2016 tok::kw_alignas) || 2017 ((Style.Language == FormatStyle::LK_Java || 2018 Style.Language == FormatStyle::LK_JavaScript) && 2019 FormatTok->isOneOf(tok::period, tok::comma))) { 2020 if (Style.Language == FormatStyle::LK_JavaScript && 2021 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2022 // JavaScript/TypeScript supports inline object types in 2023 // extends/implements positions: 2024 // class Foo implements {bar: number} { } 2025 nextToken(); 2026 if (FormatTok->is(tok::l_brace)) { 2027 tryToParseBracedList(); 2028 continue; 2029 } 2030 } 2031 bool IsNonMacroIdentifier = 2032 FormatTok->is(tok::identifier) && 2033 FormatTok->TokenText != FormatTok->TokenText.upper(); 2034 nextToken(); 2035 // We can have macros or attributes in between 'class' and the class name. 2036 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2037 parseParens(); 2038 } 2039 2040 // Note that parsing away template declarations here leads to incorrectly 2041 // accepting function declarations as record declarations. 2042 // In general, we cannot solve this problem. Consider: 2043 // class A<int> B() {} 2044 // which can be a function definition or a class definition when B() is a 2045 // macro. If we find enough real-world cases where this is a problem, we 2046 // can parse for the 'template' keyword in the beginning of the statement, 2047 // and thus rule out the record production in case there is no template 2048 // (this would still leave us with an ambiguity between template function 2049 // and class declarations). 2050 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2051 while (!eof()) { 2052 if (FormatTok->is(tok::l_brace)) { 2053 calculateBraceTypes(/*ExpectClassBody=*/true); 2054 if (!tryToParseBracedList()) 2055 break; 2056 } 2057 if (FormatTok->Tok.is(tok::semi)) 2058 return; 2059 nextToken(); 2060 } 2061 } 2062 if (FormatTok->Tok.is(tok::l_brace)) { 2063 if (ParseAsExpr) { 2064 parseChildBlock(); 2065 } else { 2066 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2067 addUnwrappedLine(); 2068 2069 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2070 /*MunchSemi=*/false); 2071 } 2072 } 2073 // There is no addUnwrappedLine() here so that we fall through to parsing a 2074 // structural element afterwards. Thus, in "class A {} n, m;", 2075 // "} n, m;" will end up in one unwrapped line. 2076 } 2077 2078 void UnwrappedLineParser::parseObjCProtocolList() { 2079 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2080 do 2081 nextToken(); 2082 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2083 nextToken(); // Skip '>'. 2084 } 2085 2086 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2087 do { 2088 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2089 nextToken(); 2090 addUnwrappedLine(); 2091 break; 2092 } 2093 if (FormatTok->is(tok::l_brace)) { 2094 parseBlock(/*MustBeDeclaration=*/false); 2095 // In ObjC interfaces, nothing should be following the "}". 2096 addUnwrappedLine(); 2097 } else if (FormatTok->is(tok::r_brace)) { 2098 // Ignore stray "}". parseStructuralElement doesn't consume them. 2099 nextToken(); 2100 addUnwrappedLine(); 2101 } else { 2102 parseStructuralElement(); 2103 } 2104 } while (!eof()); 2105 } 2106 2107 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2108 nextToken(); 2109 nextToken(); // interface name 2110 2111 // @interface can be followed by either a base class, or a category. 2112 if (FormatTok->Tok.is(tok::colon)) { 2113 nextToken(); 2114 nextToken(); // base class name 2115 } else if (FormatTok->Tok.is(tok::l_paren)) 2116 // Skip category, if present. 2117 parseParens(); 2118 2119 if (FormatTok->Tok.is(tok::less)) 2120 parseObjCProtocolList(); 2121 2122 if (FormatTok->Tok.is(tok::l_brace)) { 2123 if (Style.BraceWrapping.AfterObjCDeclaration) 2124 addUnwrappedLine(); 2125 parseBlock(/*MustBeDeclaration=*/true); 2126 } 2127 2128 // With instance variables, this puts '}' on its own line. Without instance 2129 // variables, this ends the @interface line. 2130 addUnwrappedLine(); 2131 2132 parseObjCUntilAtEnd(); 2133 } 2134 2135 void UnwrappedLineParser::parseObjCProtocol() { 2136 nextToken(); 2137 nextToken(); // protocol name 2138 2139 if (FormatTok->Tok.is(tok::less)) 2140 parseObjCProtocolList(); 2141 2142 // Check for protocol declaration. 2143 if (FormatTok->Tok.is(tok::semi)) { 2144 nextToken(); 2145 return addUnwrappedLine(); 2146 } 2147 2148 addUnwrappedLine(); 2149 parseObjCUntilAtEnd(); 2150 } 2151 2152 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2153 bool IsImport = FormatTok->is(Keywords.kw_import); 2154 assert(IsImport || FormatTok->is(tok::kw_export)); 2155 nextToken(); 2156 2157 // Consume the "default" in "export default class/function". 2158 if (FormatTok->is(tok::kw_default)) 2159 nextToken(); 2160 2161 // Consume "async function", "function" and "default function", so that these 2162 // get parsed as free-standing JS functions, i.e. do not require a trailing 2163 // semicolon. 2164 if (FormatTok->is(Keywords.kw_async)) 2165 nextToken(); 2166 if (FormatTok->is(Keywords.kw_function)) { 2167 nextToken(); 2168 return; 2169 } 2170 2171 // For imports, `export *`, `export {...}`, consume the rest of the line up 2172 // to the terminating `;`. For everything else, just return and continue 2173 // parsing the structural element, i.e. the declaration or expression for 2174 // `export default`. 2175 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2176 !FormatTok->isStringLiteral()) 2177 return; 2178 2179 while (!eof()) { 2180 if (FormatTok->is(tok::semi)) 2181 return; 2182 if (Line->Tokens.empty()) { 2183 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2184 // import statement should terminate. 2185 return; 2186 } 2187 if (FormatTok->is(tok::l_brace)) { 2188 FormatTok->BlockKind = BK_Block; 2189 nextToken(); 2190 parseBracedList(); 2191 } else { 2192 nextToken(); 2193 } 2194 } 2195 } 2196 2197 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2198 StringRef Prefix = "") { 2199 llvm::dbgs() << Prefix << "Line(" << Line.Level 2200 << ", FSC=" << Line.FirstStartColumn << ")" 2201 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2202 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2203 E = Line.Tokens.end(); 2204 I != E; ++I) { 2205 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2206 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2207 << "] "; 2208 } 2209 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2210 E = Line.Tokens.end(); 2211 I != E; ++I) { 2212 const UnwrappedLineNode &Node = *I; 2213 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2214 I = Node.Children.begin(), 2215 E = Node.Children.end(); 2216 I != E; ++I) { 2217 printDebugInfo(*I, "\nChild: "); 2218 } 2219 } 2220 llvm::dbgs() << "\n"; 2221 } 2222 2223 void UnwrappedLineParser::addUnwrappedLine() { 2224 if (Line->Tokens.empty()) 2225 return; 2226 DEBUG({ 2227 if (CurrentLines == &Lines) 2228 printDebugInfo(*Line); 2229 }); 2230 CurrentLines->push_back(std::move(*Line)); 2231 Line->Tokens.clear(); 2232 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2233 Line->FirstStartColumn = 0; 2234 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2235 CurrentLines->append( 2236 std::make_move_iterator(PreprocessorDirectives.begin()), 2237 std::make_move_iterator(PreprocessorDirectives.end())); 2238 PreprocessorDirectives.clear(); 2239 } 2240 // Disconnect the current token from the last token on the previous line. 2241 FormatTok->Previous = nullptr; 2242 } 2243 2244 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2245 2246 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2247 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2248 FormatTok.NewlinesBefore > 0; 2249 } 2250 2251 // Checks if \p FormatTok is a line comment that continues the line comment 2252 // section on \p Line. 2253 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2254 const UnwrappedLine &Line, 2255 llvm::Regex &CommentPragmasRegex) { 2256 if (Line.Tokens.empty()) 2257 return false; 2258 2259 StringRef IndentContent = FormatTok.TokenText; 2260 if (FormatTok.TokenText.startswith("//") || 2261 FormatTok.TokenText.startswith("/*")) 2262 IndentContent = FormatTok.TokenText.substr(2); 2263 if (CommentPragmasRegex.match(IndentContent)) 2264 return false; 2265 2266 // If Line starts with a line comment, then FormatTok continues the comment 2267 // section if its original column is greater or equal to the original start 2268 // column of the line. 2269 // 2270 // Define the min column token of a line as follows: if a line ends in '{' or 2271 // contains a '{' followed by a line comment, then the min column token is 2272 // that '{'. Otherwise, the min column token of the line is the first token of 2273 // the line. 2274 // 2275 // If Line starts with a token other than a line comment, then FormatTok 2276 // continues the comment section if its original column is greater than the 2277 // original start column of the min column token of the line. 2278 // 2279 // For example, the second line comment continues the first in these cases: 2280 // 2281 // // first line 2282 // // second line 2283 // 2284 // and: 2285 // 2286 // // first line 2287 // // second line 2288 // 2289 // and: 2290 // 2291 // int i; // first line 2292 // // second line 2293 // 2294 // and: 2295 // 2296 // do { // first line 2297 // // second line 2298 // int i; 2299 // } while (true); 2300 // 2301 // and: 2302 // 2303 // enum { 2304 // a, // first line 2305 // // second line 2306 // b 2307 // }; 2308 // 2309 // The second line comment doesn't continue the first in these cases: 2310 // 2311 // // first line 2312 // // second line 2313 // 2314 // and: 2315 // 2316 // int i; // first line 2317 // // second line 2318 // 2319 // and: 2320 // 2321 // do { // first line 2322 // // second line 2323 // int i; 2324 // } while (true); 2325 // 2326 // and: 2327 // 2328 // enum { 2329 // a, // first line 2330 // // second line 2331 // }; 2332 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2333 2334 // Scan for '{//'. If found, use the column of '{' as a min column for line 2335 // comment section continuation. 2336 const FormatToken *PreviousToken = nullptr; 2337 for (const UnwrappedLineNode &Node : Line.Tokens) { 2338 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2339 isLineComment(*Node.Tok)) { 2340 MinColumnToken = PreviousToken; 2341 break; 2342 } 2343 PreviousToken = Node.Tok; 2344 2345 // Grab the last newline preceding a token in this unwrapped line. 2346 if (Node.Tok->NewlinesBefore > 0) { 2347 MinColumnToken = Node.Tok; 2348 } 2349 } 2350 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2351 MinColumnToken = PreviousToken; 2352 } 2353 2354 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2355 MinColumnToken); 2356 } 2357 2358 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2359 bool JustComments = Line->Tokens.empty(); 2360 for (SmallVectorImpl<FormatToken *>::const_iterator 2361 I = CommentsBeforeNextToken.begin(), 2362 E = CommentsBeforeNextToken.end(); 2363 I != E; ++I) { 2364 // Line comments that belong to the same line comment section are put on the 2365 // same line since later we might want to reflow content between them. 2366 // Additional fine-grained breaking of line comment sections is controlled 2367 // by the class BreakableLineCommentSection in case it is desirable to keep 2368 // several line comment sections in the same unwrapped line. 2369 // 2370 // FIXME: Consider putting separate line comment sections as children to the 2371 // unwrapped line instead. 2372 (*I)->ContinuesLineCommentSection = 2373 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2374 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2375 addUnwrappedLine(); 2376 pushToken(*I); 2377 } 2378 if (NewlineBeforeNext && JustComments) 2379 addUnwrappedLine(); 2380 CommentsBeforeNextToken.clear(); 2381 } 2382 2383 void UnwrappedLineParser::nextToken(int LevelDifference) { 2384 if (eof()) 2385 return; 2386 flushComments(isOnNewLine(*FormatTok)); 2387 pushToken(FormatTok); 2388 FormatToken *Previous = FormatTok; 2389 if (Style.Language != FormatStyle::LK_JavaScript) 2390 readToken(LevelDifference); 2391 else 2392 readTokenWithJavaScriptASI(); 2393 FormatTok->Previous = Previous; 2394 } 2395 2396 void UnwrappedLineParser::distributeComments( 2397 const SmallVectorImpl<FormatToken *> &Comments, 2398 const FormatToken *NextTok) { 2399 // Whether or not a line comment token continues a line is controlled by 2400 // the method continuesLineCommentSection, with the following caveat: 2401 // 2402 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2403 // that each comment line from the trail is aligned with the next token, if 2404 // the next token exists. If a trail exists, the beginning of the maximal 2405 // trail is marked as a start of a new comment section. 2406 // 2407 // For example in this code: 2408 // 2409 // int a; // line about a 2410 // // line 1 about b 2411 // // line 2 about b 2412 // int b; 2413 // 2414 // the two lines about b form a maximal trail, so there are two sections, the 2415 // first one consisting of the single comment "// line about a" and the 2416 // second one consisting of the next two comments. 2417 if (Comments.empty()) 2418 return; 2419 bool ShouldPushCommentsInCurrentLine = true; 2420 bool HasTrailAlignedWithNextToken = false; 2421 unsigned StartOfTrailAlignedWithNextToken = 0; 2422 if (NextTok) { 2423 // We are skipping the first element intentionally. 2424 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2425 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2426 HasTrailAlignedWithNextToken = true; 2427 StartOfTrailAlignedWithNextToken = i; 2428 } 2429 } 2430 } 2431 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2432 FormatToken *FormatTok = Comments[i]; 2433 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2434 FormatTok->ContinuesLineCommentSection = false; 2435 } else { 2436 FormatTok->ContinuesLineCommentSection = 2437 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2438 } 2439 if (!FormatTok->ContinuesLineCommentSection && 2440 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2441 ShouldPushCommentsInCurrentLine = false; 2442 } 2443 if (ShouldPushCommentsInCurrentLine) { 2444 pushToken(FormatTok); 2445 } else { 2446 CommentsBeforeNextToken.push_back(FormatTok); 2447 } 2448 } 2449 } 2450 2451 void UnwrappedLineParser::readToken(int LevelDifference) { 2452 SmallVector<FormatToken *, 1> Comments; 2453 do { 2454 FormatTok = Tokens->getNextToken(); 2455 assert(FormatTok); 2456 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2457 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2458 distributeComments(Comments, FormatTok); 2459 Comments.clear(); 2460 // If there is an unfinished unwrapped line, we flush the preprocessor 2461 // directives only after that unwrapped line was finished later. 2462 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2463 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2464 assert((LevelDifference >= 0 || 2465 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2466 "LevelDifference makes Line->Level negative"); 2467 Line->Level += LevelDifference; 2468 // Comments stored before the preprocessor directive need to be output 2469 // before the preprocessor directive, at the same level as the 2470 // preprocessor directive, as we consider them to apply to the directive. 2471 flushComments(isOnNewLine(*FormatTok)); 2472 parsePPDirective(); 2473 } 2474 while (FormatTok->Type == TT_ConflictStart || 2475 FormatTok->Type == TT_ConflictEnd || 2476 FormatTok->Type == TT_ConflictAlternative) { 2477 if (FormatTok->Type == TT_ConflictStart) { 2478 conditionalCompilationStart(/*Unreachable=*/false); 2479 } else if (FormatTok->Type == TT_ConflictAlternative) { 2480 conditionalCompilationAlternative(); 2481 } else if (FormatTok->Type == TT_ConflictEnd) { 2482 conditionalCompilationEnd(); 2483 } 2484 FormatTok = Tokens->getNextToken(); 2485 FormatTok->MustBreakBefore = true; 2486 } 2487 2488 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2489 !Line->InPPDirective) { 2490 continue; 2491 } 2492 2493 if (!FormatTok->Tok.is(tok::comment)) { 2494 distributeComments(Comments, FormatTok); 2495 Comments.clear(); 2496 return; 2497 } 2498 2499 Comments.push_back(FormatTok); 2500 } while (!eof()); 2501 2502 distributeComments(Comments, nullptr); 2503 Comments.clear(); 2504 } 2505 2506 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2507 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2508 if (MustBreakBeforeNextToken) { 2509 Line->Tokens.back().Tok->MustBreakBefore = true; 2510 MustBreakBeforeNextToken = false; 2511 } 2512 } 2513 2514 } // end namespace format 2515 } // end namespace clang 2516