1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 static bool isLineComment(const FormatToken &FormatTok) { 59 return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//"); 60 } 61 62 // Checks if \p FormatTok is a line comment that continues the line comment 63 // \p Previous. The original column of \p MinColumnToken is used to determine 64 // whether \p FormatTok is indented enough to the right to continue \p Previous. 65 static bool continuesLineComment(const FormatToken &FormatTok, 66 const FormatToken *Previous, 67 const FormatToken *MinColumnToken) { 68 if (!Previous || !MinColumnToken) 69 return false; 70 unsigned MinContinueColumn = 71 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 72 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 73 isLineComment(*Previous) && 74 FormatTok.OriginalColumn >= MinContinueColumn; 75 } 76 77 class ScopedMacroState : public FormatTokenSource { 78 public: 79 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 80 FormatToken *&ResetToken) 81 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 82 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 83 Token(nullptr), PreviousToken(nullptr) { 84 TokenSource = this; 85 Line.Level = 0; 86 Line.InPPDirective = true; 87 } 88 89 ~ScopedMacroState() override { 90 TokenSource = PreviousTokenSource; 91 ResetToken = Token; 92 Line.InPPDirective = false; 93 Line.Level = PreviousLineLevel; 94 } 95 96 FormatToken *getNextToken() override { 97 // The \c UnwrappedLineParser guards against this by never calling 98 // \c getNextToken() after it has encountered the first eof token. 99 assert(!eof()); 100 PreviousToken = Token; 101 Token = PreviousTokenSource->getNextToken(); 102 if (eof()) 103 return getFakeEOF(); 104 return Token; 105 } 106 107 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 108 109 FormatToken *setPosition(unsigned Position) override { 110 PreviousToken = nullptr; 111 Token = PreviousTokenSource->setPosition(Position); 112 return Token; 113 } 114 115 private: 116 bool eof() { 117 return Token && Token->HasUnescapedNewline && 118 !continuesLineComment(*Token, PreviousToken, 119 /*MinColumnToken=*/PreviousToken); 120 } 121 122 FormatToken *getFakeEOF() { 123 static bool EOFInitialized = false; 124 static FormatToken FormatTok; 125 if (!EOFInitialized) { 126 FormatTok.Tok.startToken(); 127 FormatTok.Tok.setKind(tok::eof); 128 EOFInitialized = true; 129 } 130 return &FormatTok; 131 } 132 133 UnwrappedLine &Line; 134 FormatTokenSource *&TokenSource; 135 FormatToken *&ResetToken; 136 unsigned PreviousLineLevel; 137 FormatTokenSource *PreviousTokenSource; 138 139 FormatToken *Token; 140 FormatToken *PreviousToken; 141 }; 142 143 } // end anonymous namespace 144 145 class ScopedLineState { 146 public: 147 ScopedLineState(UnwrappedLineParser &Parser, 148 bool SwitchToPreprocessorLines = false) 149 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 150 if (SwitchToPreprocessorLines) 151 Parser.CurrentLines = &Parser.PreprocessorDirectives; 152 else if (!Parser.Line->Tokens.empty()) 153 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 154 PreBlockLine = std::move(Parser.Line); 155 Parser.Line = llvm::make_unique<UnwrappedLine>(); 156 Parser.Line->Level = PreBlockLine->Level; 157 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 158 } 159 160 ~ScopedLineState() { 161 if (!Parser.Line->Tokens.empty()) { 162 Parser.addUnwrappedLine(); 163 } 164 assert(Parser.Line->Tokens.empty()); 165 Parser.Line = std::move(PreBlockLine); 166 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 167 Parser.MustBreakBeforeNextToken = true; 168 Parser.CurrentLines = OriginalLines; 169 } 170 171 private: 172 UnwrappedLineParser &Parser; 173 174 std::unique_ptr<UnwrappedLine> PreBlockLine; 175 SmallVectorImpl<UnwrappedLine> *OriginalLines; 176 }; 177 178 class CompoundStatementIndenter { 179 public: 180 CompoundStatementIndenter(UnwrappedLineParser *Parser, 181 const FormatStyle &Style, unsigned &LineLevel) 182 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 183 if (Style.BraceWrapping.AfterControlStatement) 184 Parser->addUnwrappedLine(); 185 if (Style.BraceWrapping.IndentBraces) 186 ++LineLevel; 187 } 188 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 189 190 private: 191 unsigned &LineLevel; 192 unsigned OldLineLevel; 193 }; 194 195 namespace { 196 197 class IndexedTokenSource : public FormatTokenSource { 198 public: 199 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 200 : Tokens(Tokens), Position(-1) {} 201 202 FormatToken *getNextToken() override { 203 ++Position; 204 return Tokens[Position]; 205 } 206 207 unsigned getPosition() override { 208 assert(Position >= 0); 209 return Position; 210 } 211 212 FormatToken *setPosition(unsigned P) override { 213 Position = P; 214 return Tokens[Position]; 215 } 216 217 void reset() { Position = -1; } 218 219 private: 220 ArrayRef<FormatToken *> Tokens; 221 int Position; 222 }; 223 224 } // end anonymous namespace 225 226 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 227 const AdditionalKeywords &Keywords, 228 ArrayRef<FormatToken *> Tokens, 229 UnwrappedLineConsumer &Callback) 230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 231 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 234 IfNdefCondition(nullptr), FoundIncludeGuardStart(false), 235 IncludeGuardRejected(false) {} 236 237 void UnwrappedLineParser::reset() { 238 PPBranchLevel = -1; 239 IfNdefCondition = nullptr; 240 FoundIncludeGuardStart = false; 241 IncludeGuardRejected = false; 242 Line.reset(new UnwrappedLine); 243 CommentsBeforeNextToken.clear(); 244 FormatTok = nullptr; 245 MustBreakBeforeNextToken = false; 246 PreprocessorDirectives.clear(); 247 CurrentLines = &Lines; 248 DeclarationScopeStack.clear(); 249 PPStack.clear(); 250 } 251 252 void UnwrappedLineParser::parse() { 253 IndexedTokenSource TokenSource(AllTokens); 254 do { 255 DEBUG(llvm::dbgs() << "----\n"); 256 reset(); 257 Tokens = &TokenSource; 258 TokenSource.reset(); 259 260 readToken(); 261 parseFile(); 262 // Create line with eof token. 263 pushToken(FormatTok); 264 addUnwrappedLine(); 265 266 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 267 E = Lines.end(); 268 I != E; ++I) { 269 Callback.consumeUnwrappedLine(*I); 270 } 271 Callback.finishRun(); 272 Lines.clear(); 273 while (!PPLevelBranchIndex.empty() && 274 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 275 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 276 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 277 } 278 if (!PPLevelBranchIndex.empty()) { 279 ++PPLevelBranchIndex.back(); 280 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 281 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 282 } 283 } while (!PPLevelBranchIndex.empty()); 284 } 285 286 void UnwrappedLineParser::parseFile() { 287 // The top-level context in a file always has declarations, except for pre- 288 // processor directives and JavaScript files. 289 bool MustBeDeclaration = 290 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 291 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 292 MustBeDeclaration); 293 if (Style.Language == FormatStyle::LK_TextProto) 294 parseBracedList(); 295 else 296 parseLevel(/*HasOpeningBrace=*/false); 297 // Make sure to format the remaining tokens. 298 flushComments(true); 299 addUnwrappedLine(); 300 } 301 302 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 303 bool SwitchLabelEncountered = false; 304 do { 305 tok::TokenKind kind = FormatTok->Tok.getKind(); 306 if (FormatTok->Type == TT_MacroBlockBegin) { 307 kind = tok::l_brace; 308 } else if (FormatTok->Type == TT_MacroBlockEnd) { 309 kind = tok::r_brace; 310 } 311 312 switch (kind) { 313 case tok::comment: 314 nextToken(); 315 addUnwrappedLine(); 316 break; 317 case tok::l_brace: 318 // FIXME: Add parameter whether this can happen - if this happens, we must 319 // be in a non-declaration context. 320 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 321 continue; 322 parseBlock(/*MustBeDeclaration=*/false); 323 addUnwrappedLine(); 324 break; 325 case tok::r_brace: 326 if (HasOpeningBrace) 327 return; 328 nextToken(); 329 addUnwrappedLine(); 330 break; 331 case tok::kw_default: 332 case tok::kw_case: 333 if (Style.Language == FormatStyle::LK_JavaScript && 334 Line->MustBeDeclaration) { 335 // A 'case: string' style field declaration. 336 parseStructuralElement(); 337 break; 338 } 339 if (!SwitchLabelEncountered && 340 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 341 ++Line->Level; 342 SwitchLabelEncountered = true; 343 parseStructuralElement(); 344 break; 345 default: 346 parseStructuralElement(); 347 break; 348 } 349 } while (!eof()); 350 } 351 352 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 353 // We'll parse forward through the tokens until we hit 354 // a closing brace or eof - note that getNextToken() will 355 // parse macros, so this will magically work inside macro 356 // definitions, too. 357 unsigned StoredPosition = Tokens->getPosition(); 358 FormatToken *Tok = FormatTok; 359 const FormatToken *PrevTok = Tok->Previous; 360 // Keep a stack of positions of lbrace tokens. We will 361 // update information about whether an lbrace starts a 362 // braced init list or a different block during the loop. 363 SmallVector<FormatToken *, 8> LBraceStack; 364 assert(Tok->Tok.is(tok::l_brace)); 365 do { 366 // Get next non-comment token. 367 FormatToken *NextTok; 368 unsigned ReadTokens = 0; 369 do { 370 NextTok = Tokens->getNextToken(); 371 ++ReadTokens; 372 } while (NextTok->is(tok::comment)); 373 374 switch (Tok->Tok.getKind()) { 375 case tok::l_brace: 376 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 377 if (PrevTok->is(tok::colon)) 378 // A colon indicates this code is in a type, or a braced list 379 // following a label in an object literal ({a: {b: 1}}). The code 380 // below could be confused by semicolons between the individual 381 // members in a type member list, which would normally trigger 382 // BK_Block. In both cases, this must be parsed as an inline braced 383 // init. 384 Tok->BlockKind = BK_BracedInit; 385 else if (PrevTok->is(tok::r_paren)) 386 // `) { }` can only occur in function or method declarations in JS. 387 Tok->BlockKind = BK_Block; 388 } else { 389 Tok->BlockKind = BK_Unknown; 390 } 391 LBraceStack.push_back(Tok); 392 break; 393 case tok::r_brace: 394 if (LBraceStack.empty()) 395 break; 396 if (LBraceStack.back()->BlockKind == BK_Unknown) { 397 bool ProbablyBracedList = false; 398 if (Style.Language == FormatStyle::LK_Proto) { 399 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 400 } else { 401 // Using OriginalColumn to distinguish between ObjC methods and 402 // binary operators is a bit hacky. 403 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 404 NextTok->OriginalColumn == 0; 405 406 // If there is a comma, semicolon or right paren after the closing 407 // brace, we assume this is a braced initializer list. Note that 408 // regardless how we mark inner braces here, we will overwrite the 409 // BlockKind later if we parse a braced list (where all blocks 410 // inside are by default braced lists), or when we explicitly detect 411 // blocks (for example while parsing lambdas). 412 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 413 // braced list in JS. 414 ProbablyBracedList = 415 (Style.Language == FormatStyle::LK_JavaScript && 416 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 417 Keywords.kw_as)) || 418 (Style.isCpp() && NextTok->is(tok::l_paren)) || 419 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 420 tok::r_paren, tok::r_square, tok::l_brace, 421 tok::l_square, tok::ellipsis) || 422 (NextTok->is(tok::identifier) && 423 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 424 (NextTok->is(tok::semi) && 425 (!ExpectClassBody || LBraceStack.size() != 1)) || 426 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 427 } 428 if (ProbablyBracedList) { 429 Tok->BlockKind = BK_BracedInit; 430 LBraceStack.back()->BlockKind = BK_BracedInit; 431 } else { 432 Tok->BlockKind = BK_Block; 433 LBraceStack.back()->BlockKind = BK_Block; 434 } 435 } 436 LBraceStack.pop_back(); 437 break; 438 case tok::at: 439 case tok::semi: 440 case tok::kw_if: 441 case tok::kw_while: 442 case tok::kw_for: 443 case tok::kw_switch: 444 case tok::kw_try: 445 case tok::kw___try: 446 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 447 LBraceStack.back()->BlockKind = BK_Block; 448 break; 449 default: 450 break; 451 } 452 PrevTok = Tok; 453 Tok = NextTok; 454 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 455 456 // Assume other blocks for all unclosed opening braces. 457 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 458 if (LBraceStack[i]->BlockKind == BK_Unknown) 459 LBraceStack[i]->BlockKind = BK_Block; 460 } 461 462 FormatTok = Tokens->setPosition(StoredPosition); 463 } 464 465 template <class T> 466 static inline void hash_combine(std::size_t &seed, const T &v) { 467 std::hash<T> hasher; 468 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 469 } 470 471 size_t UnwrappedLineParser::computePPHash() const { 472 size_t h = 0; 473 for (const auto &i : PPStack) { 474 hash_combine(h, size_t(i.Kind)); 475 hash_combine(h, i.Line); 476 } 477 return h; 478 } 479 480 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 481 bool MunchSemi) { 482 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 483 "'{' or macro block token expected"); 484 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 485 FormatTok->BlockKind = BK_Block; 486 487 size_t PPStartHash = computePPHash(); 488 489 unsigned InitialLevel = Line->Level; 490 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 491 492 if (MacroBlock && FormatTok->is(tok::l_paren)) 493 parseParens(); 494 495 size_t NbPreprocessorDirectives = 496 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 497 addUnwrappedLine(); 498 size_t OpeningLineIndex = 499 CurrentLines->empty() 500 ? (UnwrappedLine::kInvalidIndex) 501 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 502 503 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 504 MustBeDeclaration); 505 if (AddLevel) 506 ++Line->Level; 507 parseLevel(/*HasOpeningBrace=*/true); 508 509 if (eof()) 510 return; 511 512 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 513 : !FormatTok->is(tok::r_brace)) { 514 Line->Level = InitialLevel; 515 FormatTok->BlockKind = BK_Block; 516 return; 517 } 518 519 size_t PPEndHash = computePPHash(); 520 521 // Munch the closing brace. 522 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 523 524 if (MacroBlock && FormatTok->is(tok::l_paren)) 525 parseParens(); 526 527 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 528 nextToken(); 529 Line->Level = InitialLevel; 530 531 if (PPStartHash == PPEndHash) { 532 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 533 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 534 // Update the opening line to add the forward reference as well 535 (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = 536 CurrentLines->size() - 1; 537 } 538 } 539 } 540 541 static bool isGoogScope(const UnwrappedLine &Line) { 542 // FIXME: Closure-library specific stuff should not be hard-coded but be 543 // configurable. 544 if (Line.Tokens.size() < 4) 545 return false; 546 auto I = Line.Tokens.begin(); 547 if (I->Tok->TokenText != "goog") 548 return false; 549 ++I; 550 if (I->Tok->isNot(tok::period)) 551 return false; 552 ++I; 553 if (I->Tok->TokenText != "scope") 554 return false; 555 ++I; 556 return I->Tok->is(tok::l_paren); 557 } 558 559 static bool isIIFE(const UnwrappedLine &Line, 560 const AdditionalKeywords &Keywords) { 561 // Look for the start of an immediately invoked anonymous function. 562 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 563 // This is commonly done in JavaScript to create a new, anonymous scope. 564 // Example: (function() { ... })() 565 if (Line.Tokens.size() < 3) 566 return false; 567 auto I = Line.Tokens.begin(); 568 if (I->Tok->isNot(tok::l_paren)) 569 return false; 570 ++I; 571 if (I->Tok->isNot(Keywords.kw_function)) 572 return false; 573 ++I; 574 return I->Tok->is(tok::l_paren); 575 } 576 577 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 578 const FormatToken &InitialToken) { 579 if (InitialToken.is(tok::kw_namespace)) 580 return Style.BraceWrapping.AfterNamespace; 581 if (InitialToken.is(tok::kw_class)) 582 return Style.BraceWrapping.AfterClass; 583 if (InitialToken.is(tok::kw_union)) 584 return Style.BraceWrapping.AfterUnion; 585 if (InitialToken.is(tok::kw_struct)) 586 return Style.BraceWrapping.AfterStruct; 587 return false; 588 } 589 590 void UnwrappedLineParser::parseChildBlock() { 591 FormatTok->BlockKind = BK_Block; 592 nextToken(); 593 { 594 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 595 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 596 ScopedLineState LineState(*this); 597 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 598 /*MustBeDeclaration=*/false); 599 Line->Level += SkipIndent ? 0 : 1; 600 parseLevel(/*HasOpeningBrace=*/true); 601 flushComments(isOnNewLine(*FormatTok)); 602 Line->Level -= SkipIndent ? 0 : 1; 603 } 604 nextToken(); 605 } 606 607 void UnwrappedLineParser::parsePPDirective() { 608 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 609 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 610 nextToken(); 611 612 if (!FormatTok->Tok.getIdentifierInfo()) { 613 parsePPUnknown(); 614 return; 615 } 616 617 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 618 case tok::pp_define: 619 parsePPDefine(); 620 return; 621 case tok::pp_if: 622 parsePPIf(/*IfDef=*/false); 623 break; 624 case tok::pp_ifdef: 625 case tok::pp_ifndef: 626 parsePPIf(/*IfDef=*/true); 627 break; 628 case tok::pp_else: 629 parsePPElse(); 630 break; 631 case tok::pp_elif: 632 parsePPElIf(); 633 break; 634 case tok::pp_endif: 635 parsePPEndIf(); 636 break; 637 default: 638 parsePPUnknown(); 639 break; 640 } 641 } 642 643 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 644 size_t Line = CurrentLines->size(); 645 if (CurrentLines == &PreprocessorDirectives) 646 Line += Lines.size(); 647 648 if (Unreachable || 649 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 650 PPStack.push_back({PP_Unreachable, Line}); 651 else 652 PPStack.push_back({PP_Conditional, Line}); 653 } 654 655 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 656 ++PPBranchLevel; 657 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 658 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 659 PPLevelBranchIndex.push_back(0); 660 PPLevelBranchCount.push_back(0); 661 } 662 PPChainBranchIndex.push(0); 663 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 664 conditionalCompilationCondition(Unreachable || Skip); 665 } 666 667 void UnwrappedLineParser::conditionalCompilationAlternative() { 668 if (!PPStack.empty()) 669 PPStack.pop_back(); 670 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 671 if (!PPChainBranchIndex.empty()) 672 ++PPChainBranchIndex.top(); 673 conditionalCompilationCondition( 674 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 675 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 676 } 677 678 void UnwrappedLineParser::conditionalCompilationEnd() { 679 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 680 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 681 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 682 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 683 } 684 } 685 // Guard against #endif's without #if. 686 if (PPBranchLevel > -1) 687 --PPBranchLevel; 688 if (!PPChainBranchIndex.empty()) 689 PPChainBranchIndex.pop(); 690 if (!PPStack.empty()) 691 PPStack.pop_back(); 692 } 693 694 void UnwrappedLineParser::parsePPIf(bool IfDef) { 695 bool IfNDef = FormatTok->is(tok::pp_ifndef); 696 nextToken(); 697 bool Unreachable = false; 698 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 699 Unreachable = true; 700 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 701 Unreachable = true; 702 conditionalCompilationStart(Unreachable); 703 FormatToken *IfCondition = FormatTok; 704 // If there's a #ifndef on the first line, and the only lines before it are 705 // comments, it could be an include guard. 706 bool MaybeIncludeGuard = IfNDef; 707 if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) { 708 for (auto &Line : Lines) { 709 if (!Line.Tokens.front().Tok->is(tok::comment)) { 710 MaybeIncludeGuard = false; 711 IncludeGuardRejected = true; 712 break; 713 } 714 } 715 } 716 --PPBranchLevel; 717 parsePPUnknown(); 718 ++PPBranchLevel; 719 if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) 720 IfNdefCondition = IfCondition; 721 } 722 723 void UnwrappedLineParser::parsePPElse() { 724 // If a potential include guard has an #else, it's not an include guard. 725 if (FoundIncludeGuardStart && PPBranchLevel == 0) 726 FoundIncludeGuardStart = false; 727 conditionalCompilationAlternative(); 728 if (PPBranchLevel > -1) 729 --PPBranchLevel; 730 parsePPUnknown(); 731 ++PPBranchLevel; 732 } 733 734 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 735 736 void UnwrappedLineParser::parsePPEndIf() { 737 conditionalCompilationEnd(); 738 parsePPUnknown(); 739 // If the #endif of a potential include guard is the last thing in the file, 740 // then we count it as a real include guard and subtract one from every 741 // preprocessor indent. 742 unsigned TokenPosition = Tokens->getPosition(); 743 FormatToken *PeekNext = AllTokens[TokenPosition]; 744 if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) && 745 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 746 for (auto &Line : Lines) 747 if (Line.InPPDirective && Line.Level > 0) 748 --Line.Level; 749 } 750 751 void UnwrappedLineParser::parsePPDefine() { 752 nextToken(); 753 754 if (FormatTok->Tok.getKind() != tok::identifier) { 755 parsePPUnknown(); 756 return; 757 } 758 if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) { 759 FoundIncludeGuardStart = true; 760 for (auto &Line : Lines) { 761 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 762 FoundIncludeGuardStart = false; 763 break; 764 } 765 } 766 } 767 IfNdefCondition = nullptr; 768 nextToken(); 769 if (FormatTok->Tok.getKind() == tok::l_paren && 770 FormatTok->WhitespaceRange.getBegin() == 771 FormatTok->WhitespaceRange.getEnd()) { 772 parseParens(); 773 } 774 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 775 Line->Level += PPBranchLevel + 1; 776 addUnwrappedLine(); 777 ++Line->Level; 778 779 // Errors during a preprocessor directive can only affect the layout of the 780 // preprocessor directive, and thus we ignore them. An alternative approach 781 // would be to use the same approach we use on the file level (no 782 // re-indentation if there was a structural error) within the macro 783 // definition. 784 parseFile(); 785 } 786 787 void UnwrappedLineParser::parsePPUnknown() { 788 do { 789 nextToken(); 790 } while (!eof()); 791 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 792 Line->Level += PPBranchLevel + 1; 793 addUnwrappedLine(); 794 IfNdefCondition = nullptr; 795 } 796 797 // Here we blacklist certain tokens that are not usually the first token in an 798 // unwrapped line. This is used in attempt to distinguish macro calls without 799 // trailing semicolons from other constructs split to several lines. 800 static bool tokenCanStartNewLine(const clang::Token &Tok) { 801 // Semicolon can be a null-statement, l_square can be a start of a macro or 802 // a C++11 attribute, but this doesn't seem to be common. 803 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 804 Tok.isNot(tok::l_square) && 805 // Tokens that can only be used as binary operators and a part of 806 // overloaded operator names. 807 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 808 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 809 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 810 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 811 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 812 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 813 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 814 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 815 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 816 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 817 Tok.isNot(tok::lesslessequal) && 818 // Colon is used in labels, base class lists, initializer lists, 819 // range-based for loops, ternary operator, but should never be the 820 // first token in an unwrapped line. 821 Tok.isNot(tok::colon) && 822 // 'noexcept' is a trailing annotation. 823 Tok.isNot(tok::kw_noexcept); 824 } 825 826 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 827 const FormatToken *FormatTok) { 828 // FIXME: This returns true for C/C++ keywords like 'struct'. 829 return FormatTok->is(tok::identifier) && 830 (FormatTok->Tok.getIdentifierInfo() == nullptr || 831 !FormatTok->isOneOf( 832 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 833 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 834 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 835 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 836 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 837 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 838 Keywords.kw_from)); 839 } 840 841 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 842 const FormatToken *FormatTok) { 843 return FormatTok->Tok.isLiteral() || 844 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 845 mustBeJSIdent(Keywords, FormatTok); 846 } 847 848 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 849 // when encountered after a value (see mustBeJSIdentOrValue). 850 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 851 const FormatToken *FormatTok) { 852 return FormatTok->isOneOf( 853 tok::kw_return, Keywords.kw_yield, 854 // conditionals 855 tok::kw_if, tok::kw_else, 856 // loops 857 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 858 // switch/case 859 tok::kw_switch, tok::kw_case, 860 // exceptions 861 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 862 // declaration 863 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 864 Keywords.kw_async, Keywords.kw_function, 865 // import/export 866 Keywords.kw_import, tok::kw_export); 867 } 868 869 // readTokenWithJavaScriptASI reads the next token and terminates the current 870 // line if JavaScript Automatic Semicolon Insertion must 871 // happen between the current token and the next token. 872 // 873 // This method is conservative - it cannot cover all edge cases of JavaScript, 874 // but only aims to correctly handle certain well known cases. It *must not* 875 // return true in speculative cases. 876 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 877 FormatToken *Previous = FormatTok; 878 readToken(); 879 FormatToken *Next = FormatTok; 880 881 bool IsOnSameLine = 882 CommentsBeforeNextToken.empty() 883 ? Next->NewlinesBefore == 0 884 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 885 if (IsOnSameLine) 886 return; 887 888 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 889 bool PreviousStartsTemplateExpr = 890 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 891 if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { 892 // If the token before the previous one is an '@', the previous token is an 893 // annotation and can precede another identifier/value. 894 const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; 895 if (PrePrevious->is(tok::at)) 896 return; 897 } 898 if (Next->is(tok::exclaim) && PreviousMustBeValue) 899 return addUnwrappedLine(); 900 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 901 bool NextEndsTemplateExpr = 902 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 903 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 904 (PreviousMustBeValue || 905 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 906 tok::minusminus))) 907 return addUnwrappedLine(); 908 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 909 isJSDeclOrStmt(Keywords, Next)) 910 return addUnwrappedLine(); 911 } 912 913 void UnwrappedLineParser::parseStructuralElement() { 914 assert(!FormatTok->is(tok::l_brace)); 915 if (Style.Language == FormatStyle::LK_TableGen && 916 FormatTok->is(tok::pp_include)) { 917 nextToken(); 918 if (FormatTok->is(tok::string_literal)) 919 nextToken(); 920 addUnwrappedLine(); 921 return; 922 } 923 switch (FormatTok->Tok.getKind()) { 924 case tok::at: 925 nextToken(); 926 if (FormatTok->Tok.is(tok::l_brace)) { 927 nextToken(); 928 parseBracedList(); 929 break; 930 } 931 switch (FormatTok->Tok.getObjCKeywordID()) { 932 case tok::objc_public: 933 case tok::objc_protected: 934 case tok::objc_package: 935 case tok::objc_private: 936 return parseAccessSpecifier(); 937 case tok::objc_interface: 938 case tok::objc_implementation: 939 return parseObjCInterfaceOrImplementation(); 940 case tok::objc_protocol: 941 return parseObjCProtocol(); 942 case tok::objc_end: 943 return; // Handled by the caller. 944 case tok::objc_optional: 945 case tok::objc_required: 946 nextToken(); 947 addUnwrappedLine(); 948 return; 949 case tok::objc_autoreleasepool: 950 nextToken(); 951 if (FormatTok->Tok.is(tok::l_brace)) { 952 if (Style.BraceWrapping.AfterObjCDeclaration) 953 addUnwrappedLine(); 954 parseBlock(/*MustBeDeclaration=*/false); 955 } 956 addUnwrappedLine(); 957 return; 958 case tok::objc_try: 959 // This branch isn't strictly necessary (the kw_try case below would 960 // do this too after the tok::at is parsed above). But be explicit. 961 parseTryCatch(); 962 return; 963 default: 964 break; 965 } 966 break; 967 case tok::kw_asm: 968 nextToken(); 969 if (FormatTok->is(tok::l_brace)) { 970 FormatTok->Type = TT_InlineASMBrace; 971 nextToken(); 972 while (FormatTok && FormatTok->isNot(tok::eof)) { 973 if (FormatTok->is(tok::r_brace)) { 974 FormatTok->Type = TT_InlineASMBrace; 975 nextToken(); 976 addUnwrappedLine(); 977 break; 978 } 979 FormatTok->Finalized = true; 980 nextToken(); 981 } 982 } 983 break; 984 case tok::kw_namespace: 985 parseNamespace(); 986 return; 987 case tok::kw_inline: 988 nextToken(); 989 if (FormatTok->Tok.is(tok::kw_namespace)) { 990 parseNamespace(); 991 return; 992 } 993 break; 994 case tok::kw_public: 995 case tok::kw_protected: 996 case tok::kw_private: 997 if (Style.Language == FormatStyle::LK_Java || 998 Style.Language == FormatStyle::LK_JavaScript) 999 nextToken(); 1000 else 1001 parseAccessSpecifier(); 1002 return; 1003 case tok::kw_if: 1004 parseIfThenElse(); 1005 return; 1006 case tok::kw_for: 1007 case tok::kw_while: 1008 parseForOrWhileLoop(); 1009 return; 1010 case tok::kw_do: 1011 parseDoWhile(); 1012 return; 1013 case tok::kw_switch: 1014 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1015 // 'switch: string' field declaration. 1016 break; 1017 parseSwitch(); 1018 return; 1019 case tok::kw_default: 1020 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1021 // 'default: string' field declaration. 1022 break; 1023 nextToken(); 1024 parseLabel(); 1025 return; 1026 case tok::kw_case: 1027 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1028 // 'case: string' field declaration. 1029 break; 1030 parseCaseLabel(); 1031 return; 1032 case tok::kw_try: 1033 case tok::kw___try: 1034 parseTryCatch(); 1035 return; 1036 case tok::kw_extern: 1037 nextToken(); 1038 if (FormatTok->Tok.is(tok::string_literal)) { 1039 nextToken(); 1040 if (FormatTok->Tok.is(tok::l_brace)) { 1041 if (Style.BraceWrapping.AfterExternBlock) { 1042 addUnwrappedLine(); 1043 parseBlock(/*MustBeDeclaration=*/true); 1044 } else { 1045 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1046 } 1047 addUnwrappedLine(); 1048 return; 1049 } 1050 } 1051 break; 1052 case tok::kw_export: 1053 if (Style.Language == FormatStyle::LK_JavaScript) { 1054 parseJavaScriptEs6ImportExport(); 1055 return; 1056 } 1057 break; 1058 case tok::identifier: 1059 if (FormatTok->is(TT_ForEachMacro)) { 1060 parseForOrWhileLoop(); 1061 return; 1062 } 1063 if (FormatTok->is(TT_MacroBlockBegin)) { 1064 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1065 /*MunchSemi=*/false); 1066 return; 1067 } 1068 if (FormatTok->is(Keywords.kw_import)) { 1069 if (Style.Language == FormatStyle::LK_JavaScript) { 1070 parseJavaScriptEs6ImportExport(); 1071 return; 1072 } 1073 if (Style.Language == FormatStyle::LK_Proto) { 1074 nextToken(); 1075 if (FormatTok->is(tok::kw_public)) 1076 nextToken(); 1077 if (!FormatTok->is(tok::string_literal)) 1078 return; 1079 nextToken(); 1080 if (FormatTok->is(tok::semi)) 1081 nextToken(); 1082 addUnwrappedLine(); 1083 return; 1084 } 1085 } 1086 if (Style.isCpp() && 1087 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1088 Keywords.kw_slots, Keywords.kw_qslots)) { 1089 nextToken(); 1090 if (FormatTok->is(tok::colon)) { 1091 nextToken(); 1092 addUnwrappedLine(); 1093 return; 1094 } 1095 } 1096 // In all other cases, parse the declaration. 1097 break; 1098 default: 1099 break; 1100 } 1101 do { 1102 const FormatToken *Previous = FormatTok->Previous; 1103 switch (FormatTok->Tok.getKind()) { 1104 case tok::at: 1105 nextToken(); 1106 if (FormatTok->Tok.is(tok::l_brace)) { 1107 nextToken(); 1108 parseBracedList(); 1109 } 1110 break; 1111 case tok::kw_enum: 1112 // Ignore if this is part of "template <enum ...". 1113 if (Previous && Previous->is(tok::less)) { 1114 nextToken(); 1115 break; 1116 } 1117 1118 // parseEnum falls through and does not yet add an unwrapped line as an 1119 // enum definition can start a structural element. 1120 if (!parseEnum()) 1121 break; 1122 // This only applies for C++. 1123 if (!Style.isCpp()) { 1124 addUnwrappedLine(); 1125 return; 1126 } 1127 break; 1128 case tok::kw_typedef: 1129 nextToken(); 1130 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1131 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1132 parseEnum(); 1133 break; 1134 case tok::kw_struct: 1135 case tok::kw_union: 1136 case tok::kw_class: 1137 // parseRecord falls through and does not yet add an unwrapped line as a 1138 // record declaration or definition can start a structural element. 1139 parseRecord(); 1140 // This does not apply for Java and JavaScript. 1141 if (Style.Language == FormatStyle::LK_Java || 1142 Style.Language == FormatStyle::LK_JavaScript) { 1143 if (FormatTok->is(tok::semi)) 1144 nextToken(); 1145 addUnwrappedLine(); 1146 return; 1147 } 1148 break; 1149 case tok::period: 1150 nextToken(); 1151 // In Java, classes have an implicit static member "class". 1152 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1153 FormatTok->is(tok::kw_class)) 1154 nextToken(); 1155 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1156 FormatTok->Tok.getIdentifierInfo()) 1157 // JavaScript only has pseudo keywords, all keywords are allowed to 1158 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1159 nextToken(); 1160 break; 1161 case tok::semi: 1162 nextToken(); 1163 addUnwrappedLine(); 1164 return; 1165 case tok::r_brace: 1166 addUnwrappedLine(); 1167 return; 1168 case tok::l_paren: 1169 parseParens(); 1170 break; 1171 case tok::kw_operator: 1172 nextToken(); 1173 if (FormatTok->isBinaryOperator()) 1174 nextToken(); 1175 break; 1176 case tok::caret: 1177 nextToken(); 1178 if (FormatTok->Tok.isAnyIdentifier() || 1179 FormatTok->isSimpleTypeSpecifier()) 1180 nextToken(); 1181 if (FormatTok->is(tok::l_paren)) 1182 parseParens(); 1183 if (FormatTok->is(tok::l_brace)) 1184 parseChildBlock(); 1185 break; 1186 case tok::l_brace: 1187 if (!tryToParseBracedList()) { 1188 // A block outside of parentheses must be the last part of a 1189 // structural element. 1190 // FIXME: Figure out cases where this is not true, and add projections 1191 // for them (the one we know is missing are lambdas). 1192 if (Style.BraceWrapping.AfterFunction) 1193 addUnwrappedLine(); 1194 FormatTok->Type = TT_FunctionLBrace; 1195 parseBlock(/*MustBeDeclaration=*/false); 1196 addUnwrappedLine(); 1197 return; 1198 } 1199 // Otherwise this was a braced init list, and the structural 1200 // element continues. 1201 break; 1202 case tok::kw_try: 1203 // We arrive here when parsing function-try blocks. 1204 parseTryCatch(); 1205 return; 1206 case tok::identifier: { 1207 if (FormatTok->is(TT_MacroBlockEnd)) { 1208 addUnwrappedLine(); 1209 return; 1210 } 1211 1212 // Function declarations (as opposed to function expressions) are parsed 1213 // on their own unwrapped line by continuing this loop. Function 1214 // expressions (functions that are not on their own line) must not create 1215 // a new unwrapped line, so they are special cased below. 1216 size_t TokenCount = Line->Tokens.size(); 1217 if (Style.Language == FormatStyle::LK_JavaScript && 1218 FormatTok->is(Keywords.kw_function) && 1219 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1220 Keywords.kw_async)))) { 1221 tryToParseJSFunction(); 1222 break; 1223 } 1224 if ((Style.Language == FormatStyle::LK_JavaScript || 1225 Style.Language == FormatStyle::LK_Java) && 1226 FormatTok->is(Keywords.kw_interface)) { 1227 if (Style.Language == FormatStyle::LK_JavaScript) { 1228 // In JavaScript/TypeScript, "interface" can be used as a standalone 1229 // identifier, e.g. in `var interface = 1;`. If "interface" is 1230 // followed by another identifier, it is very like to be an actual 1231 // interface declaration. 1232 unsigned StoredPosition = Tokens->getPosition(); 1233 FormatToken *Next = Tokens->getNextToken(); 1234 FormatTok = Tokens->setPosition(StoredPosition); 1235 if (Next && !mustBeJSIdent(Keywords, Next)) { 1236 nextToken(); 1237 break; 1238 } 1239 } 1240 parseRecord(); 1241 addUnwrappedLine(); 1242 return; 1243 } 1244 1245 // See if the following token should start a new unwrapped line. 1246 StringRef Text = FormatTok->TokenText; 1247 nextToken(); 1248 if (Line->Tokens.size() == 1 && 1249 // JS doesn't have macros, and within classes colons indicate fields, 1250 // not labels. 1251 Style.Language != FormatStyle::LK_JavaScript) { 1252 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1253 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1254 parseLabel(); 1255 return; 1256 } 1257 // Recognize function-like macro usages without trailing semicolon as 1258 // well as free-standing macros like Q_OBJECT. 1259 bool FunctionLike = FormatTok->is(tok::l_paren); 1260 if (FunctionLike) 1261 parseParens(); 1262 1263 bool FollowedByNewline = 1264 CommentsBeforeNextToken.empty() 1265 ? FormatTok->NewlinesBefore > 0 1266 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1267 1268 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1269 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1270 addUnwrappedLine(); 1271 return; 1272 } 1273 } 1274 break; 1275 } 1276 case tok::equal: 1277 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1278 // TT_JsFatArrow. The always start an expression or a child block if 1279 // followed by a curly. 1280 if (FormatTok->is(TT_JsFatArrow)) { 1281 nextToken(); 1282 if (FormatTok->is(tok::l_brace)) 1283 parseChildBlock(); 1284 break; 1285 } 1286 1287 nextToken(); 1288 if (FormatTok->Tok.is(tok::l_brace)) { 1289 nextToken(); 1290 parseBracedList(); 1291 } else if (Style.Language == FormatStyle::LK_Proto && 1292 FormatTok->Tok.is(tok::less)) { 1293 nextToken(); 1294 parseBracedList(/*ContinueOnSemicolons=*/false, 1295 /*ClosingBraceKind=*/tok::greater); 1296 } 1297 break; 1298 case tok::l_square: 1299 parseSquare(); 1300 break; 1301 case tok::kw_new: 1302 parseNew(); 1303 break; 1304 default: 1305 nextToken(); 1306 break; 1307 } 1308 } while (!eof()); 1309 } 1310 1311 bool UnwrappedLineParser::tryToParseLambda() { 1312 if (!Style.isCpp()) { 1313 nextToken(); 1314 return false; 1315 } 1316 assert(FormatTok->is(tok::l_square)); 1317 FormatToken &LSquare = *FormatTok; 1318 if (!tryToParseLambdaIntroducer()) 1319 return false; 1320 1321 while (FormatTok->isNot(tok::l_brace)) { 1322 if (FormatTok->isSimpleTypeSpecifier()) { 1323 nextToken(); 1324 continue; 1325 } 1326 switch (FormatTok->Tok.getKind()) { 1327 case tok::l_brace: 1328 break; 1329 case tok::l_paren: 1330 parseParens(); 1331 break; 1332 case tok::amp: 1333 case tok::star: 1334 case tok::kw_const: 1335 case tok::comma: 1336 case tok::less: 1337 case tok::greater: 1338 case tok::identifier: 1339 case tok::numeric_constant: 1340 case tok::coloncolon: 1341 case tok::kw_mutable: 1342 nextToken(); 1343 break; 1344 case tok::arrow: 1345 FormatTok->Type = TT_LambdaArrow; 1346 nextToken(); 1347 break; 1348 default: 1349 return true; 1350 } 1351 } 1352 LSquare.Type = TT_LambdaLSquare; 1353 parseChildBlock(); 1354 return true; 1355 } 1356 1357 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1358 const FormatToken *Previous = FormatTok->Previous; 1359 if (Previous && 1360 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1361 tok::kw_delete) || 1362 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1363 Previous->isSimpleTypeSpecifier())) { 1364 nextToken(); 1365 return false; 1366 } 1367 nextToken(); 1368 parseSquare(/*LambdaIntroducer=*/true); 1369 return true; 1370 } 1371 1372 void UnwrappedLineParser::tryToParseJSFunction() { 1373 assert(FormatTok->is(Keywords.kw_function) || 1374 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1375 if (FormatTok->is(Keywords.kw_async)) 1376 nextToken(); 1377 // Consume "function". 1378 nextToken(); 1379 1380 // Consume * (generator function). Treat it like C++'s overloaded operators. 1381 if (FormatTok->is(tok::star)) { 1382 FormatTok->Type = TT_OverloadedOperator; 1383 nextToken(); 1384 } 1385 1386 // Consume function name. 1387 if (FormatTok->is(tok::identifier)) 1388 nextToken(); 1389 1390 if (FormatTok->isNot(tok::l_paren)) 1391 return; 1392 1393 // Parse formal parameter list. 1394 parseParens(); 1395 1396 if (FormatTok->is(tok::colon)) { 1397 // Parse a type definition. 1398 nextToken(); 1399 1400 // Eat the type declaration. For braced inline object types, balance braces, 1401 // otherwise just parse until finding an l_brace for the function body. 1402 if (FormatTok->is(tok::l_brace)) 1403 tryToParseBracedList(); 1404 else 1405 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1406 nextToken(); 1407 } 1408 1409 if (FormatTok->is(tok::semi)) 1410 return; 1411 1412 parseChildBlock(); 1413 } 1414 1415 bool UnwrappedLineParser::tryToParseBracedList() { 1416 if (FormatTok->BlockKind == BK_Unknown) 1417 calculateBraceTypes(); 1418 assert(FormatTok->BlockKind != BK_Unknown); 1419 if (FormatTok->BlockKind == BK_Block) 1420 return false; 1421 nextToken(); 1422 parseBracedList(); 1423 return true; 1424 } 1425 1426 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1427 tok::TokenKind ClosingBraceKind) { 1428 bool HasError = false; 1429 1430 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1431 // replace this by using parseAssigmentExpression() inside. 1432 do { 1433 if (Style.Language == FormatStyle::LK_JavaScript) { 1434 if (FormatTok->is(Keywords.kw_function) || 1435 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1436 tryToParseJSFunction(); 1437 continue; 1438 } 1439 if (FormatTok->is(TT_JsFatArrow)) { 1440 nextToken(); 1441 // Fat arrows can be followed by simple expressions or by child blocks 1442 // in curly braces. 1443 if (FormatTok->is(tok::l_brace)) { 1444 parseChildBlock(); 1445 continue; 1446 } 1447 } 1448 if (FormatTok->is(tok::l_brace)) { 1449 // Could be a method inside of a braced list `{a() { return 1; }}`. 1450 if (tryToParseBracedList()) 1451 continue; 1452 parseChildBlock(); 1453 } 1454 } 1455 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1456 nextToken(); 1457 return !HasError; 1458 } 1459 switch (FormatTok->Tok.getKind()) { 1460 case tok::caret: 1461 nextToken(); 1462 if (FormatTok->is(tok::l_brace)) { 1463 parseChildBlock(); 1464 } 1465 break; 1466 case tok::l_square: 1467 tryToParseLambda(); 1468 break; 1469 case tok::l_paren: 1470 parseParens(); 1471 // JavaScript can just have free standing methods and getters/setters in 1472 // object literals. Detect them by a "{" following ")". 1473 if (Style.Language == FormatStyle::LK_JavaScript) { 1474 if (FormatTok->is(tok::l_brace)) 1475 parseChildBlock(); 1476 break; 1477 } 1478 break; 1479 case tok::l_brace: 1480 // Assume there are no blocks inside a braced init list apart 1481 // from the ones we explicitly parse out (like lambdas). 1482 FormatTok->BlockKind = BK_BracedInit; 1483 nextToken(); 1484 parseBracedList(); 1485 break; 1486 case tok::less: 1487 if (Style.Language == FormatStyle::LK_Proto) { 1488 nextToken(); 1489 parseBracedList(/*ContinueOnSemicolons=*/false, 1490 /*ClosingBraceKind=*/tok::greater); 1491 } else { 1492 nextToken(); 1493 } 1494 break; 1495 case tok::semi: 1496 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1497 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1498 // used for error recovery if we have otherwise determined that this is 1499 // a braced list. 1500 if (Style.Language == FormatStyle::LK_JavaScript) { 1501 nextToken(); 1502 break; 1503 } 1504 HasError = true; 1505 if (!ContinueOnSemicolons) 1506 return !HasError; 1507 nextToken(); 1508 break; 1509 case tok::comma: 1510 nextToken(); 1511 break; 1512 default: 1513 nextToken(); 1514 break; 1515 } 1516 } while (!eof()); 1517 return false; 1518 } 1519 1520 void UnwrappedLineParser::parseParens() { 1521 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1522 nextToken(); 1523 do { 1524 switch (FormatTok->Tok.getKind()) { 1525 case tok::l_paren: 1526 parseParens(); 1527 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1528 parseChildBlock(); 1529 break; 1530 case tok::r_paren: 1531 nextToken(); 1532 return; 1533 case tok::r_brace: 1534 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1535 return; 1536 case tok::l_square: 1537 tryToParseLambda(); 1538 break; 1539 case tok::l_brace: 1540 if (!tryToParseBracedList()) 1541 parseChildBlock(); 1542 break; 1543 case tok::at: 1544 nextToken(); 1545 if (FormatTok->Tok.is(tok::l_brace)) { 1546 nextToken(); 1547 parseBracedList(); 1548 } 1549 break; 1550 case tok::kw_class: 1551 if (Style.Language == FormatStyle::LK_JavaScript) 1552 parseRecord(/*ParseAsExpr=*/true); 1553 else 1554 nextToken(); 1555 break; 1556 case tok::identifier: 1557 if (Style.Language == FormatStyle::LK_JavaScript && 1558 (FormatTok->is(Keywords.kw_function) || 1559 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1560 tryToParseJSFunction(); 1561 else 1562 nextToken(); 1563 break; 1564 default: 1565 nextToken(); 1566 break; 1567 } 1568 } while (!eof()); 1569 } 1570 1571 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1572 if (!LambdaIntroducer) { 1573 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1574 if (tryToParseLambda()) 1575 return; 1576 } 1577 do { 1578 switch (FormatTok->Tok.getKind()) { 1579 case tok::l_paren: 1580 parseParens(); 1581 break; 1582 case tok::r_square: 1583 nextToken(); 1584 return; 1585 case tok::r_brace: 1586 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1587 return; 1588 case tok::l_square: 1589 parseSquare(); 1590 break; 1591 case tok::l_brace: { 1592 if (!tryToParseBracedList()) 1593 parseChildBlock(); 1594 break; 1595 } 1596 case tok::at: 1597 nextToken(); 1598 if (FormatTok->Tok.is(tok::l_brace)) { 1599 nextToken(); 1600 parseBracedList(); 1601 } 1602 break; 1603 default: 1604 nextToken(); 1605 break; 1606 } 1607 } while (!eof()); 1608 } 1609 1610 void UnwrappedLineParser::parseIfThenElse() { 1611 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1612 nextToken(); 1613 if (FormatTok->Tok.is(tok::kw_constexpr)) 1614 nextToken(); 1615 if (FormatTok->Tok.is(tok::l_paren)) 1616 parseParens(); 1617 bool NeedsUnwrappedLine = false; 1618 if (FormatTok->Tok.is(tok::l_brace)) { 1619 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1620 parseBlock(/*MustBeDeclaration=*/false); 1621 if (Style.BraceWrapping.BeforeElse) 1622 addUnwrappedLine(); 1623 else 1624 NeedsUnwrappedLine = true; 1625 } else { 1626 addUnwrappedLine(); 1627 ++Line->Level; 1628 parseStructuralElement(); 1629 --Line->Level; 1630 } 1631 if (FormatTok->Tok.is(tok::kw_else)) { 1632 nextToken(); 1633 if (FormatTok->Tok.is(tok::l_brace)) { 1634 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1635 parseBlock(/*MustBeDeclaration=*/false); 1636 addUnwrappedLine(); 1637 } else if (FormatTok->Tok.is(tok::kw_if)) { 1638 parseIfThenElse(); 1639 } else { 1640 addUnwrappedLine(); 1641 ++Line->Level; 1642 parseStructuralElement(); 1643 if (FormatTok->is(tok::eof)) 1644 addUnwrappedLine(); 1645 --Line->Level; 1646 } 1647 } else if (NeedsUnwrappedLine) { 1648 addUnwrappedLine(); 1649 } 1650 } 1651 1652 void UnwrappedLineParser::parseTryCatch() { 1653 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1654 nextToken(); 1655 bool NeedsUnwrappedLine = false; 1656 if (FormatTok->is(tok::colon)) { 1657 // We are in a function try block, what comes is an initializer list. 1658 nextToken(); 1659 while (FormatTok->is(tok::identifier)) { 1660 nextToken(); 1661 if (FormatTok->is(tok::l_paren)) 1662 parseParens(); 1663 if (FormatTok->is(tok::comma)) 1664 nextToken(); 1665 } 1666 } 1667 // Parse try with resource. 1668 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1669 parseParens(); 1670 } 1671 if (FormatTok->is(tok::l_brace)) { 1672 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1673 parseBlock(/*MustBeDeclaration=*/false); 1674 if (Style.BraceWrapping.BeforeCatch) { 1675 addUnwrappedLine(); 1676 } else { 1677 NeedsUnwrappedLine = true; 1678 } 1679 } else if (!FormatTok->is(tok::kw_catch)) { 1680 // The C++ standard requires a compound-statement after a try. 1681 // If there's none, we try to assume there's a structuralElement 1682 // and try to continue. 1683 addUnwrappedLine(); 1684 ++Line->Level; 1685 parseStructuralElement(); 1686 --Line->Level; 1687 } 1688 while (1) { 1689 if (FormatTok->is(tok::at)) 1690 nextToken(); 1691 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1692 tok::kw___finally) || 1693 ((Style.Language == FormatStyle::LK_Java || 1694 Style.Language == FormatStyle::LK_JavaScript) && 1695 FormatTok->is(Keywords.kw_finally)) || 1696 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1697 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1698 break; 1699 nextToken(); 1700 while (FormatTok->isNot(tok::l_brace)) { 1701 if (FormatTok->is(tok::l_paren)) { 1702 parseParens(); 1703 continue; 1704 } 1705 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1706 return; 1707 nextToken(); 1708 } 1709 NeedsUnwrappedLine = false; 1710 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1711 parseBlock(/*MustBeDeclaration=*/false); 1712 if (Style.BraceWrapping.BeforeCatch) 1713 addUnwrappedLine(); 1714 else 1715 NeedsUnwrappedLine = true; 1716 } 1717 if (NeedsUnwrappedLine) 1718 addUnwrappedLine(); 1719 } 1720 1721 void UnwrappedLineParser::parseNamespace() { 1722 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1723 1724 const FormatToken &InitialToken = *FormatTok; 1725 nextToken(); 1726 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1727 nextToken(); 1728 if (FormatTok->Tok.is(tok::l_brace)) { 1729 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1730 addUnwrappedLine(); 1731 1732 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1733 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1734 DeclarationScopeStack.size() > 1); 1735 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1736 // Munch the semicolon after a namespace. This is more common than one would 1737 // think. Puttin the semicolon into its own line is very ugly. 1738 if (FormatTok->Tok.is(tok::semi)) 1739 nextToken(); 1740 addUnwrappedLine(); 1741 } 1742 // FIXME: Add error handling. 1743 } 1744 1745 void UnwrappedLineParser::parseNew() { 1746 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1747 nextToken(); 1748 if (Style.Language != FormatStyle::LK_Java) 1749 return; 1750 1751 // In Java, we can parse everything up to the parens, which aren't optional. 1752 do { 1753 // There should not be a ;, { or } before the new's open paren. 1754 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1755 return; 1756 1757 // Consume the parens. 1758 if (FormatTok->is(tok::l_paren)) { 1759 parseParens(); 1760 1761 // If there is a class body of an anonymous class, consume that as child. 1762 if (FormatTok->is(tok::l_brace)) 1763 parseChildBlock(); 1764 return; 1765 } 1766 nextToken(); 1767 } while (!eof()); 1768 } 1769 1770 void UnwrappedLineParser::parseForOrWhileLoop() { 1771 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1772 "'for', 'while' or foreach macro expected"); 1773 nextToken(); 1774 // JS' for await ( ... 1775 if (Style.Language == FormatStyle::LK_JavaScript && 1776 FormatTok->is(Keywords.kw_await)) 1777 nextToken(); 1778 if (FormatTok->Tok.is(tok::l_paren)) 1779 parseParens(); 1780 if (FormatTok->Tok.is(tok::l_brace)) { 1781 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1782 parseBlock(/*MustBeDeclaration=*/false); 1783 addUnwrappedLine(); 1784 } else { 1785 addUnwrappedLine(); 1786 ++Line->Level; 1787 parseStructuralElement(); 1788 --Line->Level; 1789 } 1790 } 1791 1792 void UnwrappedLineParser::parseDoWhile() { 1793 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1794 nextToken(); 1795 if (FormatTok->Tok.is(tok::l_brace)) { 1796 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1797 parseBlock(/*MustBeDeclaration=*/false); 1798 if (Style.BraceWrapping.IndentBraces) 1799 addUnwrappedLine(); 1800 } else { 1801 addUnwrappedLine(); 1802 ++Line->Level; 1803 parseStructuralElement(); 1804 --Line->Level; 1805 } 1806 1807 // FIXME: Add error handling. 1808 if (!FormatTok->Tok.is(tok::kw_while)) { 1809 addUnwrappedLine(); 1810 return; 1811 } 1812 1813 nextToken(); 1814 parseStructuralElement(); 1815 } 1816 1817 void UnwrappedLineParser::parseLabel() { 1818 nextToken(); 1819 unsigned OldLineLevel = Line->Level; 1820 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1821 --Line->Level; 1822 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1823 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1824 parseBlock(/*MustBeDeclaration=*/false); 1825 if (FormatTok->Tok.is(tok::kw_break)) { 1826 if (Style.BraceWrapping.AfterControlStatement) 1827 addUnwrappedLine(); 1828 parseStructuralElement(); 1829 } 1830 addUnwrappedLine(); 1831 } else { 1832 if (FormatTok->is(tok::semi)) 1833 nextToken(); 1834 addUnwrappedLine(); 1835 } 1836 Line->Level = OldLineLevel; 1837 if (FormatTok->isNot(tok::l_brace)) { 1838 parseStructuralElement(); 1839 addUnwrappedLine(); 1840 } 1841 } 1842 1843 void UnwrappedLineParser::parseCaseLabel() { 1844 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1845 // FIXME: fix handling of complex expressions here. 1846 do { 1847 nextToken(); 1848 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1849 parseLabel(); 1850 } 1851 1852 void UnwrappedLineParser::parseSwitch() { 1853 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1854 nextToken(); 1855 if (FormatTok->Tok.is(tok::l_paren)) 1856 parseParens(); 1857 if (FormatTok->Tok.is(tok::l_brace)) { 1858 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1859 parseBlock(/*MustBeDeclaration=*/false); 1860 addUnwrappedLine(); 1861 } else { 1862 addUnwrappedLine(); 1863 ++Line->Level; 1864 parseStructuralElement(); 1865 --Line->Level; 1866 } 1867 } 1868 1869 void UnwrappedLineParser::parseAccessSpecifier() { 1870 nextToken(); 1871 // Understand Qt's slots. 1872 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1873 nextToken(); 1874 // Otherwise, we don't know what it is, and we'd better keep the next token. 1875 if (FormatTok->Tok.is(tok::colon)) 1876 nextToken(); 1877 addUnwrappedLine(); 1878 } 1879 1880 bool UnwrappedLineParser::parseEnum() { 1881 // Won't be 'enum' for NS_ENUMs. 1882 if (FormatTok->Tok.is(tok::kw_enum)) 1883 nextToken(); 1884 1885 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1886 // declarations. An "enum" keyword followed by a colon would be a syntax 1887 // error and thus assume it is just an identifier. 1888 if (Style.Language == FormatStyle::LK_JavaScript && 1889 FormatTok->isOneOf(tok::colon, tok::question)) 1890 return false; 1891 1892 // Eat up enum class ... 1893 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1894 nextToken(); 1895 1896 while (FormatTok->Tok.getIdentifierInfo() || 1897 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1898 tok::greater, tok::comma, tok::question)) { 1899 nextToken(); 1900 // We can have macros or attributes in between 'enum' and the enum name. 1901 if (FormatTok->is(tok::l_paren)) 1902 parseParens(); 1903 if (FormatTok->is(tok::identifier)) { 1904 nextToken(); 1905 // If there are two identifiers in a row, this is likely an elaborate 1906 // return type. In Java, this can be "implements", etc. 1907 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1908 return false; 1909 } 1910 } 1911 1912 // Just a declaration or something is wrong. 1913 if (FormatTok->isNot(tok::l_brace)) 1914 return true; 1915 FormatTok->BlockKind = BK_Block; 1916 1917 if (Style.Language == FormatStyle::LK_Java) { 1918 // Java enums are different. 1919 parseJavaEnumBody(); 1920 return true; 1921 } 1922 if (Style.Language == FormatStyle::LK_Proto) { 1923 parseBlock(/*MustBeDeclaration=*/true); 1924 return true; 1925 } 1926 1927 // Parse enum body. 1928 nextToken(); 1929 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1930 if (HasError) { 1931 if (FormatTok->is(tok::semi)) 1932 nextToken(); 1933 addUnwrappedLine(); 1934 } 1935 return true; 1936 1937 // There is no addUnwrappedLine() here so that we fall through to parsing a 1938 // structural element afterwards. Thus, in "enum A {} n, m;", 1939 // "} n, m;" will end up in one unwrapped line. 1940 } 1941 1942 void UnwrappedLineParser::parseJavaEnumBody() { 1943 // Determine whether the enum is simple, i.e. does not have a semicolon or 1944 // constants with class bodies. Simple enums can be formatted like braced 1945 // lists, contracted to a single line, etc. 1946 unsigned StoredPosition = Tokens->getPosition(); 1947 bool IsSimple = true; 1948 FormatToken *Tok = Tokens->getNextToken(); 1949 while (Tok) { 1950 if (Tok->is(tok::r_brace)) 1951 break; 1952 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1953 IsSimple = false; 1954 break; 1955 } 1956 // FIXME: This will also mark enums with braces in the arguments to enum 1957 // constants as "not simple". This is probably fine in practice, though. 1958 Tok = Tokens->getNextToken(); 1959 } 1960 FormatTok = Tokens->setPosition(StoredPosition); 1961 1962 if (IsSimple) { 1963 nextToken(); 1964 parseBracedList(); 1965 addUnwrappedLine(); 1966 return; 1967 } 1968 1969 // Parse the body of a more complex enum. 1970 // First add a line for everything up to the "{". 1971 nextToken(); 1972 addUnwrappedLine(); 1973 ++Line->Level; 1974 1975 // Parse the enum constants. 1976 while (FormatTok) { 1977 if (FormatTok->is(tok::l_brace)) { 1978 // Parse the constant's class body. 1979 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1980 /*MunchSemi=*/false); 1981 } else if (FormatTok->is(tok::l_paren)) { 1982 parseParens(); 1983 } else if (FormatTok->is(tok::comma)) { 1984 nextToken(); 1985 addUnwrappedLine(); 1986 } else if (FormatTok->is(tok::semi)) { 1987 nextToken(); 1988 addUnwrappedLine(); 1989 break; 1990 } else if (FormatTok->is(tok::r_brace)) { 1991 addUnwrappedLine(); 1992 break; 1993 } else { 1994 nextToken(); 1995 } 1996 } 1997 1998 // Parse the class body after the enum's ";" if any. 1999 parseLevel(/*HasOpeningBrace=*/true); 2000 nextToken(); 2001 --Line->Level; 2002 addUnwrappedLine(); 2003 } 2004 2005 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2006 const FormatToken &InitialToken = *FormatTok; 2007 nextToken(); 2008 2009 // The actual identifier can be a nested name specifier, and in macros 2010 // it is often token-pasted. 2011 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2012 tok::kw___attribute, tok::kw___declspec, 2013 tok::kw_alignas) || 2014 ((Style.Language == FormatStyle::LK_Java || 2015 Style.Language == FormatStyle::LK_JavaScript) && 2016 FormatTok->isOneOf(tok::period, tok::comma))) { 2017 if (Style.Language == FormatStyle::LK_JavaScript && 2018 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2019 // JavaScript/TypeScript supports inline object types in 2020 // extends/implements positions: 2021 // class Foo implements {bar: number} { } 2022 nextToken(); 2023 if (FormatTok->is(tok::l_brace)) { 2024 tryToParseBracedList(); 2025 continue; 2026 } 2027 } 2028 bool IsNonMacroIdentifier = 2029 FormatTok->is(tok::identifier) && 2030 FormatTok->TokenText != FormatTok->TokenText.upper(); 2031 nextToken(); 2032 // We can have macros or attributes in between 'class' and the class name. 2033 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2034 parseParens(); 2035 } 2036 2037 // Note that parsing away template declarations here leads to incorrectly 2038 // accepting function declarations as record declarations. 2039 // In general, we cannot solve this problem. Consider: 2040 // class A<int> B() {} 2041 // which can be a function definition or a class definition when B() is a 2042 // macro. If we find enough real-world cases where this is a problem, we 2043 // can parse for the 'template' keyword in the beginning of the statement, 2044 // and thus rule out the record production in case there is no template 2045 // (this would still leave us with an ambiguity between template function 2046 // and class declarations). 2047 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2048 while (!eof()) { 2049 if (FormatTok->is(tok::l_brace)) { 2050 calculateBraceTypes(/*ExpectClassBody=*/true); 2051 if (!tryToParseBracedList()) 2052 break; 2053 } 2054 if (FormatTok->Tok.is(tok::semi)) 2055 return; 2056 nextToken(); 2057 } 2058 } 2059 if (FormatTok->Tok.is(tok::l_brace)) { 2060 if (ParseAsExpr) { 2061 parseChildBlock(); 2062 } else { 2063 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2064 addUnwrappedLine(); 2065 2066 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2067 /*MunchSemi=*/false); 2068 } 2069 } 2070 // There is no addUnwrappedLine() here so that we fall through to parsing a 2071 // structural element afterwards. Thus, in "class A {} n, m;", 2072 // "} n, m;" will end up in one unwrapped line. 2073 } 2074 2075 void UnwrappedLineParser::parseObjCProtocolList() { 2076 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2077 do 2078 nextToken(); 2079 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2080 nextToken(); // Skip '>'. 2081 } 2082 2083 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2084 do { 2085 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2086 nextToken(); 2087 addUnwrappedLine(); 2088 break; 2089 } 2090 if (FormatTok->is(tok::l_brace)) { 2091 parseBlock(/*MustBeDeclaration=*/false); 2092 // In ObjC interfaces, nothing should be following the "}". 2093 addUnwrappedLine(); 2094 } else if (FormatTok->is(tok::r_brace)) { 2095 // Ignore stray "}". parseStructuralElement doesn't consume them. 2096 nextToken(); 2097 addUnwrappedLine(); 2098 } else { 2099 parseStructuralElement(); 2100 } 2101 } while (!eof()); 2102 } 2103 2104 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2105 nextToken(); 2106 nextToken(); // interface name 2107 2108 // @interface can be followed by either a base class, or a category. 2109 if (FormatTok->Tok.is(tok::colon)) { 2110 nextToken(); 2111 nextToken(); // base class name 2112 } else if (FormatTok->Tok.is(tok::l_paren)) 2113 // Skip category, if present. 2114 parseParens(); 2115 2116 if (FormatTok->Tok.is(tok::less)) 2117 parseObjCProtocolList(); 2118 2119 if (FormatTok->Tok.is(tok::l_brace)) { 2120 if (Style.BraceWrapping.AfterObjCDeclaration) 2121 addUnwrappedLine(); 2122 parseBlock(/*MustBeDeclaration=*/true); 2123 } 2124 2125 // With instance variables, this puts '}' on its own line. Without instance 2126 // variables, this ends the @interface line. 2127 addUnwrappedLine(); 2128 2129 parseObjCUntilAtEnd(); 2130 } 2131 2132 void UnwrappedLineParser::parseObjCProtocol() { 2133 nextToken(); 2134 nextToken(); // protocol name 2135 2136 if (FormatTok->Tok.is(tok::less)) 2137 parseObjCProtocolList(); 2138 2139 // Check for protocol declaration. 2140 if (FormatTok->Tok.is(tok::semi)) { 2141 nextToken(); 2142 return addUnwrappedLine(); 2143 } 2144 2145 addUnwrappedLine(); 2146 parseObjCUntilAtEnd(); 2147 } 2148 2149 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2150 bool IsImport = FormatTok->is(Keywords.kw_import); 2151 assert(IsImport || FormatTok->is(tok::kw_export)); 2152 nextToken(); 2153 2154 // Consume the "default" in "export default class/function". 2155 if (FormatTok->is(tok::kw_default)) 2156 nextToken(); 2157 2158 // Consume "async function", "function" and "default function", so that these 2159 // get parsed as free-standing JS functions, i.e. do not require a trailing 2160 // semicolon. 2161 if (FormatTok->is(Keywords.kw_async)) 2162 nextToken(); 2163 if (FormatTok->is(Keywords.kw_function)) { 2164 nextToken(); 2165 return; 2166 } 2167 2168 // For imports, `export *`, `export {...}`, consume the rest of the line up 2169 // to the terminating `;`. For everything else, just return and continue 2170 // parsing the structural element, i.e. the declaration or expression for 2171 // `export default`. 2172 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2173 !FormatTok->isStringLiteral()) 2174 return; 2175 2176 while (!eof()) { 2177 if (FormatTok->is(tok::semi)) 2178 return; 2179 if (Line->Tokens.size() == 0) { 2180 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2181 // import statement should terminate. 2182 return; 2183 } 2184 if (FormatTok->is(tok::l_brace)) { 2185 FormatTok->BlockKind = BK_Block; 2186 nextToken(); 2187 parseBracedList(); 2188 } else { 2189 nextToken(); 2190 } 2191 } 2192 } 2193 2194 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2195 StringRef Prefix = "") { 2196 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 2197 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2198 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2199 E = Line.Tokens.end(); 2200 I != E; ++I) { 2201 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2202 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2203 << "] "; 2204 } 2205 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2206 E = Line.Tokens.end(); 2207 I != E; ++I) { 2208 const UnwrappedLineNode &Node = *I; 2209 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2210 I = Node.Children.begin(), 2211 E = Node.Children.end(); 2212 I != E; ++I) { 2213 printDebugInfo(*I, "\nChild: "); 2214 } 2215 } 2216 llvm::dbgs() << "\n"; 2217 } 2218 2219 void UnwrappedLineParser::addUnwrappedLine() { 2220 if (Line->Tokens.empty()) 2221 return; 2222 DEBUG({ 2223 if (CurrentLines == &Lines) 2224 printDebugInfo(*Line); 2225 }); 2226 CurrentLines->push_back(std::move(*Line)); 2227 Line->Tokens.clear(); 2228 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2229 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2230 CurrentLines->append( 2231 std::make_move_iterator(PreprocessorDirectives.begin()), 2232 std::make_move_iterator(PreprocessorDirectives.end())); 2233 PreprocessorDirectives.clear(); 2234 } 2235 // Disconnect the current token from the last token on the previous line. 2236 FormatTok->Previous = nullptr; 2237 } 2238 2239 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2240 2241 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2242 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2243 FormatTok.NewlinesBefore > 0; 2244 } 2245 2246 // Checks if \p FormatTok is a line comment that continues the line comment 2247 // section on \p Line. 2248 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2249 const UnwrappedLine &Line, 2250 llvm::Regex &CommentPragmasRegex) { 2251 if (Line.Tokens.empty()) 2252 return false; 2253 2254 StringRef IndentContent = FormatTok.TokenText; 2255 if (FormatTok.TokenText.startswith("//") || 2256 FormatTok.TokenText.startswith("/*")) 2257 IndentContent = FormatTok.TokenText.substr(2); 2258 if (CommentPragmasRegex.match(IndentContent)) 2259 return false; 2260 2261 // If Line starts with a line comment, then FormatTok continues the comment 2262 // section if its original column is greater or equal to the original start 2263 // column of the line. 2264 // 2265 // Define the min column token of a line as follows: if a line ends in '{' or 2266 // contains a '{' followed by a line comment, then the min column token is 2267 // that '{'. Otherwise, the min column token of the line is the first token of 2268 // the line. 2269 // 2270 // If Line starts with a token other than a line comment, then FormatTok 2271 // continues the comment section if its original column is greater than the 2272 // original start column of the min column token of the line. 2273 // 2274 // For example, the second line comment continues the first in these cases: 2275 // 2276 // // first line 2277 // // second line 2278 // 2279 // and: 2280 // 2281 // // first line 2282 // // second line 2283 // 2284 // and: 2285 // 2286 // int i; // first line 2287 // // second line 2288 // 2289 // and: 2290 // 2291 // do { // first line 2292 // // second line 2293 // int i; 2294 // } while (true); 2295 // 2296 // and: 2297 // 2298 // enum { 2299 // a, // first line 2300 // // second line 2301 // b 2302 // }; 2303 // 2304 // The second line comment doesn't continue the first in these cases: 2305 // 2306 // // first line 2307 // // second line 2308 // 2309 // and: 2310 // 2311 // int i; // first line 2312 // // second line 2313 // 2314 // and: 2315 // 2316 // do { // first line 2317 // // second line 2318 // int i; 2319 // } while (true); 2320 // 2321 // and: 2322 // 2323 // enum { 2324 // a, // first line 2325 // // second line 2326 // }; 2327 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2328 2329 // Scan for '{//'. If found, use the column of '{' as a min column for line 2330 // comment section continuation. 2331 const FormatToken *PreviousToken = nullptr; 2332 for (const UnwrappedLineNode &Node : Line.Tokens) { 2333 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2334 isLineComment(*Node.Tok)) { 2335 MinColumnToken = PreviousToken; 2336 break; 2337 } 2338 PreviousToken = Node.Tok; 2339 2340 // Grab the last newline preceding a token in this unwrapped line. 2341 if (Node.Tok->NewlinesBefore > 0) { 2342 MinColumnToken = Node.Tok; 2343 } 2344 } 2345 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2346 MinColumnToken = PreviousToken; 2347 } 2348 2349 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2350 MinColumnToken); 2351 } 2352 2353 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2354 bool JustComments = Line->Tokens.empty(); 2355 for (SmallVectorImpl<FormatToken *>::const_iterator 2356 I = CommentsBeforeNextToken.begin(), 2357 E = CommentsBeforeNextToken.end(); 2358 I != E; ++I) { 2359 // Line comments that belong to the same line comment section are put on the 2360 // same line since later we might want to reflow content between them. 2361 // Additional fine-grained breaking of line comment sections is controlled 2362 // by the class BreakableLineCommentSection in case it is desirable to keep 2363 // several line comment sections in the same unwrapped line. 2364 // 2365 // FIXME: Consider putting separate line comment sections as children to the 2366 // unwrapped line instead. 2367 (*I)->ContinuesLineCommentSection = 2368 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2369 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2370 addUnwrappedLine(); 2371 pushToken(*I); 2372 } 2373 if (NewlineBeforeNext && JustComments) 2374 addUnwrappedLine(); 2375 CommentsBeforeNextToken.clear(); 2376 } 2377 2378 void UnwrappedLineParser::nextToken(int LevelDifference) { 2379 if (eof()) 2380 return; 2381 flushComments(isOnNewLine(*FormatTok)); 2382 pushToken(FormatTok); 2383 FormatToken *Previous = FormatTok; 2384 if (Style.Language != FormatStyle::LK_JavaScript) 2385 readToken(LevelDifference); 2386 else 2387 readTokenWithJavaScriptASI(); 2388 FormatTok->Previous = Previous; 2389 } 2390 2391 void UnwrappedLineParser::distributeComments( 2392 const SmallVectorImpl<FormatToken *> &Comments, 2393 const FormatToken *NextTok) { 2394 // Whether or not a line comment token continues a line is controlled by 2395 // the method continuesLineCommentSection, with the following caveat: 2396 // 2397 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2398 // that each comment line from the trail is aligned with the next token, if 2399 // the next token exists. If a trail exists, the beginning of the maximal 2400 // trail is marked as a start of a new comment section. 2401 // 2402 // For example in this code: 2403 // 2404 // int a; // line about a 2405 // // line 1 about b 2406 // // line 2 about b 2407 // int b; 2408 // 2409 // the two lines about b form a maximal trail, so there are two sections, the 2410 // first one consisting of the single comment "// line about a" and the 2411 // second one consisting of the next two comments. 2412 if (Comments.empty()) 2413 return; 2414 bool ShouldPushCommentsInCurrentLine = true; 2415 bool HasTrailAlignedWithNextToken = false; 2416 unsigned StartOfTrailAlignedWithNextToken = 0; 2417 if (NextTok) { 2418 // We are skipping the first element intentionally. 2419 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2420 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2421 HasTrailAlignedWithNextToken = true; 2422 StartOfTrailAlignedWithNextToken = i; 2423 } 2424 } 2425 } 2426 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2427 FormatToken *FormatTok = Comments[i]; 2428 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2429 FormatTok->ContinuesLineCommentSection = false; 2430 } else { 2431 FormatTok->ContinuesLineCommentSection = 2432 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2433 } 2434 if (!FormatTok->ContinuesLineCommentSection && 2435 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2436 ShouldPushCommentsInCurrentLine = false; 2437 } 2438 if (ShouldPushCommentsInCurrentLine) { 2439 pushToken(FormatTok); 2440 } else { 2441 CommentsBeforeNextToken.push_back(FormatTok); 2442 } 2443 } 2444 } 2445 2446 void UnwrappedLineParser::readToken(int LevelDifference) { 2447 SmallVector<FormatToken *, 1> Comments; 2448 do { 2449 FormatTok = Tokens->getNextToken(); 2450 assert(FormatTok); 2451 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2452 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2453 distributeComments(Comments, FormatTok); 2454 Comments.clear(); 2455 // If there is an unfinished unwrapped line, we flush the preprocessor 2456 // directives only after that unwrapped line was finished later. 2457 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2458 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2459 assert((LevelDifference >= 0 || 2460 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2461 "LevelDifference makes Line->Level negative"); 2462 Line->Level += LevelDifference; 2463 // Comments stored before the preprocessor directive need to be output 2464 // before the preprocessor directive, at the same level as the 2465 // preprocessor directive, as we consider them to apply to the directive. 2466 flushComments(isOnNewLine(*FormatTok)); 2467 parsePPDirective(); 2468 } 2469 while (FormatTok->Type == TT_ConflictStart || 2470 FormatTok->Type == TT_ConflictEnd || 2471 FormatTok->Type == TT_ConflictAlternative) { 2472 if (FormatTok->Type == TT_ConflictStart) { 2473 conditionalCompilationStart(/*Unreachable=*/false); 2474 } else if (FormatTok->Type == TT_ConflictAlternative) { 2475 conditionalCompilationAlternative(); 2476 } else if (FormatTok->Type == TT_ConflictEnd) { 2477 conditionalCompilationEnd(); 2478 } 2479 FormatTok = Tokens->getNextToken(); 2480 FormatTok->MustBreakBefore = true; 2481 } 2482 2483 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2484 !Line->InPPDirective) { 2485 continue; 2486 } 2487 2488 if (!FormatTok->Tok.is(tok::comment)) { 2489 distributeComments(Comments, FormatTok); 2490 Comments.clear(); 2491 return; 2492 } 2493 2494 Comments.push_back(FormatTok); 2495 } while (!eof()); 2496 2497 distributeComments(Comments, nullptr); 2498 Comments.clear(); 2499 } 2500 2501 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2502 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2503 if (MustBreakBeforeNextToken) { 2504 Line->Tokens.back().Tok->MustBreakBefore = true; 2505 MustBreakBeforeNextToken = false; 2506 } 2507 } 2508 2509 } // end namespace format 2510 } // end namespace clang 2511