1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 static bool isLineComment(const FormatToken &FormatTok) { 59 return FormatTok.is(tok::comment) && 60 FormatTok.TokenText.startswith("//"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 TokenSource = this; 86 Line.Level = 0; 87 Line.InPPDirective = true; 88 } 89 90 ~ScopedMacroState() override { 91 TokenSource = PreviousTokenSource; 92 ResetToken = Token; 93 Line.InPPDirective = false; 94 Line.Level = PreviousLineLevel; 95 } 96 97 FormatToken *getNextToken() override { 98 // The \c UnwrappedLineParser guards against this by never calling 99 // \c getNextToken() after it has encountered the first eof token. 100 assert(!eof()); 101 PreviousToken = Token; 102 Token = PreviousTokenSource->getNextToken(); 103 if (eof()) 104 return getFakeEOF(); 105 return Token; 106 } 107 108 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 109 110 FormatToken *setPosition(unsigned Position) override { 111 PreviousToken = nullptr; 112 Token = PreviousTokenSource->setPosition(Position); 113 return Token; 114 } 115 116 private: 117 bool eof() { 118 return Token && Token->HasUnescapedNewline && 119 !continuesLineComment(*Token, PreviousToken, 120 /*MinColumnToken=*/PreviousToken); 121 } 122 123 FormatToken *getFakeEOF() { 124 static bool EOFInitialized = false; 125 static FormatToken FormatTok; 126 if (!EOFInitialized) { 127 FormatTok.Tok.startToken(); 128 FormatTok.Tok.setKind(tok::eof); 129 EOFInitialized = true; 130 } 131 return &FormatTok; 132 } 133 134 UnwrappedLine &Line; 135 FormatTokenSource *&TokenSource; 136 FormatToken *&ResetToken; 137 unsigned PreviousLineLevel; 138 FormatTokenSource *PreviousTokenSource; 139 140 FormatToken *Token; 141 FormatToken *PreviousToken; 142 }; 143 144 } // end anonymous namespace 145 146 class ScopedLineState { 147 public: 148 ScopedLineState(UnwrappedLineParser &Parser, 149 bool SwitchToPreprocessorLines = false) 150 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 151 if (SwitchToPreprocessorLines) 152 Parser.CurrentLines = &Parser.PreprocessorDirectives; 153 else if (!Parser.Line->Tokens.empty()) 154 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 155 PreBlockLine = std::move(Parser.Line); 156 Parser.Line = llvm::make_unique<UnwrappedLine>(); 157 Parser.Line->Level = PreBlockLine->Level; 158 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 159 } 160 161 ~ScopedLineState() { 162 if (!Parser.Line->Tokens.empty()) { 163 Parser.addUnwrappedLine(); 164 } 165 assert(Parser.Line->Tokens.empty()); 166 Parser.Line = std::move(PreBlockLine); 167 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 168 Parser.MustBreakBeforeNextToken = true; 169 Parser.CurrentLines = OriginalLines; 170 } 171 172 private: 173 UnwrappedLineParser &Parser; 174 175 std::unique_ptr<UnwrappedLine> PreBlockLine; 176 SmallVectorImpl<UnwrappedLine> *OriginalLines; 177 }; 178 179 class CompoundStatementIndenter { 180 public: 181 CompoundStatementIndenter(UnwrappedLineParser *Parser, 182 const FormatStyle &Style, unsigned &LineLevel) 183 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 184 if (Style.BraceWrapping.AfterControlStatement) 185 Parser->addUnwrappedLine(); 186 if (Style.BraceWrapping.IndentBraces) 187 ++LineLevel; 188 } 189 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 190 191 private: 192 unsigned &LineLevel; 193 unsigned OldLineLevel; 194 }; 195 196 namespace { 197 198 class IndexedTokenSource : public FormatTokenSource { 199 public: 200 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 201 : Tokens(Tokens), Position(-1) {} 202 203 FormatToken *getNextToken() override { 204 ++Position; 205 return Tokens[Position]; 206 } 207 208 unsigned getPosition() override { 209 assert(Position >= 0); 210 return Position; 211 } 212 213 FormatToken *setPosition(unsigned P) override { 214 Position = P; 215 return Tokens[Position]; 216 } 217 218 void reset() { Position = -1; } 219 220 private: 221 ArrayRef<FormatToken *> Tokens; 222 int Position; 223 }; 224 225 } // end anonymous namespace 226 227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 228 const AdditionalKeywords &Keywords, 229 ArrayRef<FormatToken *> Tokens, 230 UnwrappedLineConsumer &Callback) 231 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 232 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 233 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 234 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} 235 236 void UnwrappedLineParser::reset() { 237 PPBranchLevel = -1; 238 Line.reset(new UnwrappedLine); 239 CommentsBeforeNextToken.clear(); 240 FormatTok = nullptr; 241 MustBreakBeforeNextToken = false; 242 PreprocessorDirectives.clear(); 243 CurrentLines = &Lines; 244 DeclarationScopeStack.clear(); 245 PPStack.clear(); 246 } 247 248 void UnwrappedLineParser::parse() { 249 IndexedTokenSource TokenSource(AllTokens); 250 do { 251 DEBUG(llvm::dbgs() << "----\n"); 252 reset(); 253 Tokens = &TokenSource; 254 TokenSource.reset(); 255 256 readToken(); 257 parseFile(); 258 // Create line with eof token. 259 pushToken(FormatTok); 260 addUnwrappedLine(); 261 262 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 263 E = Lines.end(); 264 I != E; ++I) { 265 Callback.consumeUnwrappedLine(*I); 266 } 267 Callback.finishRun(); 268 Lines.clear(); 269 while (!PPLevelBranchIndex.empty() && 270 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 271 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 272 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 273 } 274 if (!PPLevelBranchIndex.empty()) { 275 ++PPLevelBranchIndex.back(); 276 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 277 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 278 } 279 } while (!PPLevelBranchIndex.empty()); 280 } 281 282 void UnwrappedLineParser::parseFile() { 283 // The top-level context in a file always has declarations, except for pre- 284 // processor directives and JavaScript files. 285 bool MustBeDeclaration = 286 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 287 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 288 MustBeDeclaration); 289 if (Style.Language == FormatStyle::LK_TextProto) 290 parseBracedList(); 291 else 292 parseLevel(/*HasOpeningBrace=*/false); 293 // Make sure to format the remaining tokens. 294 flushComments(true); 295 addUnwrappedLine(); 296 } 297 298 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 299 bool SwitchLabelEncountered = false; 300 do { 301 tok::TokenKind kind = FormatTok->Tok.getKind(); 302 if (FormatTok->Type == TT_MacroBlockBegin) { 303 kind = tok::l_brace; 304 } else if (FormatTok->Type == TT_MacroBlockEnd) { 305 kind = tok::r_brace; 306 } 307 308 switch (kind) { 309 case tok::comment: 310 nextToken(); 311 addUnwrappedLine(); 312 break; 313 case tok::l_brace: 314 // FIXME: Add parameter whether this can happen - if this happens, we must 315 // be in a non-declaration context. 316 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 317 continue; 318 parseBlock(/*MustBeDeclaration=*/false); 319 addUnwrappedLine(); 320 break; 321 case tok::r_brace: 322 if (HasOpeningBrace) 323 return; 324 nextToken(); 325 addUnwrappedLine(); 326 break; 327 case tok::kw_default: 328 case tok::kw_case: 329 if (!SwitchLabelEncountered && 330 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 331 ++Line->Level; 332 SwitchLabelEncountered = true; 333 parseStructuralElement(); 334 break; 335 default: 336 parseStructuralElement(); 337 break; 338 } 339 } while (!eof()); 340 } 341 342 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 343 // We'll parse forward through the tokens until we hit 344 // a closing brace or eof - note that getNextToken() will 345 // parse macros, so this will magically work inside macro 346 // definitions, too. 347 unsigned StoredPosition = Tokens->getPosition(); 348 FormatToken *Tok = FormatTok; 349 const FormatToken *PrevTok = getPreviousToken(); 350 // Keep a stack of positions of lbrace tokens. We will 351 // update information about whether an lbrace starts a 352 // braced init list or a different block during the loop. 353 SmallVector<FormatToken *, 8> LBraceStack; 354 assert(Tok->Tok.is(tok::l_brace)); 355 do { 356 // Get next non-comment token. 357 FormatToken *NextTok; 358 unsigned ReadTokens = 0; 359 do { 360 NextTok = Tokens->getNextToken(); 361 ++ReadTokens; 362 } while (NextTok->is(tok::comment)); 363 364 switch (Tok->Tok.getKind()) { 365 case tok::l_brace: 366 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 367 if (PrevTok->is(tok::colon)) 368 // A colon indicates this code is in a type, or a braced list 369 // following a label in an object literal ({a: {b: 1}}). The code 370 // below could be confused by semicolons between the individual 371 // members in a type member list, which would normally trigger 372 // BK_Block. In both cases, this must be parsed as an inline braced 373 // init. 374 Tok->BlockKind = BK_BracedInit; 375 else if (PrevTok->is(tok::r_paren)) 376 // `) { }` can only occur in function or method declarations in JS. 377 Tok->BlockKind = BK_Block; 378 } else { 379 Tok->BlockKind = BK_Unknown; 380 } 381 LBraceStack.push_back(Tok); 382 break; 383 case tok::r_brace: 384 if (LBraceStack.empty()) 385 break; 386 if (LBraceStack.back()->BlockKind == BK_Unknown) { 387 bool ProbablyBracedList = false; 388 if (Style.Language == FormatStyle::LK_Proto) { 389 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 390 } else { 391 // Using OriginalColumn to distinguish between ObjC methods and 392 // binary operators is a bit hacky. 393 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 394 NextTok->OriginalColumn == 0; 395 396 // If there is a comma, semicolon or right paren after the closing 397 // brace, we assume this is a braced initializer list. Note that 398 // regardless how we mark inner braces here, we will overwrite the 399 // BlockKind later if we parse a braced list (where all blocks 400 // inside are by default braced lists), or when we explicitly detect 401 // blocks (for example while parsing lambdas). 402 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 403 // braced list in JS. 404 ProbablyBracedList = 405 (Style.Language == FormatStyle::LK_JavaScript && 406 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 407 Keywords.kw_as)) || 408 (Style.isCpp() && NextTok->is(tok::l_paren)) || 409 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 410 tok::r_paren, tok::r_square, tok::l_brace, 411 tok::l_square, tok::ellipsis) || 412 (NextTok->is(tok::identifier) && 413 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 414 (NextTok->is(tok::semi) && 415 (!ExpectClassBody || LBraceStack.size() != 1)) || 416 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 417 } 418 if (ProbablyBracedList) { 419 Tok->BlockKind = BK_BracedInit; 420 LBraceStack.back()->BlockKind = BK_BracedInit; 421 } else { 422 Tok->BlockKind = BK_Block; 423 LBraceStack.back()->BlockKind = BK_Block; 424 } 425 } 426 LBraceStack.pop_back(); 427 break; 428 case tok::at: 429 case tok::semi: 430 case tok::kw_if: 431 case tok::kw_while: 432 case tok::kw_for: 433 case tok::kw_switch: 434 case tok::kw_try: 435 case tok::kw___try: 436 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 437 LBraceStack.back()->BlockKind = BK_Block; 438 break; 439 default: 440 break; 441 } 442 PrevTok = Tok; 443 Tok = NextTok; 444 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 445 446 // Assume other blocks for all unclosed opening braces. 447 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 448 if (LBraceStack[i]->BlockKind == BK_Unknown) 449 LBraceStack[i]->BlockKind = BK_Block; 450 } 451 452 FormatTok = Tokens->setPosition(StoredPosition); 453 } 454 455 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 456 bool MunchSemi) { 457 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 458 "'{' or macro block token expected"); 459 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 460 FormatTok->BlockKind = BK_Block; 461 462 unsigned InitialLevel = Line->Level; 463 nextToken(); 464 465 if (MacroBlock && FormatTok->is(tok::l_paren)) 466 parseParens(); 467 468 addUnwrappedLine(); 469 size_t OpeningLineIndex = CurrentLines->empty() 470 ? (UnwrappedLine::kInvalidIndex) 471 : (CurrentLines->size() - 1); 472 473 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 474 MustBeDeclaration); 475 if (AddLevel) 476 ++Line->Level; 477 parseLevel(/*HasOpeningBrace=*/true); 478 479 if (eof()) 480 return; 481 482 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 483 : !FormatTok->is(tok::r_brace)) { 484 Line->Level = InitialLevel; 485 FormatTok->BlockKind = BK_Block; 486 return; 487 } 488 489 nextToken(); // Munch the closing brace. 490 491 if (MacroBlock && FormatTok->is(tok::l_paren)) 492 parseParens(); 493 494 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 495 nextToken(); 496 Line->Level = InitialLevel; 497 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 498 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 499 // Update the opening line to add the forward reference as well 500 (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = 501 CurrentLines->size() - 1; 502 } 503 } 504 505 static bool isGoogScope(const UnwrappedLine &Line) { 506 // FIXME: Closure-library specific stuff should not be hard-coded but be 507 // configurable. 508 if (Line.Tokens.size() < 4) 509 return false; 510 auto I = Line.Tokens.begin(); 511 if (I->Tok->TokenText != "goog") 512 return false; 513 ++I; 514 if (I->Tok->isNot(tok::period)) 515 return false; 516 ++I; 517 if (I->Tok->TokenText != "scope") 518 return false; 519 ++I; 520 return I->Tok->is(tok::l_paren); 521 } 522 523 static bool isIIFE(const UnwrappedLine &Line, 524 const AdditionalKeywords &Keywords) { 525 // Look for the start of an immediately invoked anonymous function. 526 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 527 // This is commonly done in JavaScript to create a new, anonymous scope. 528 // Example: (function() { ... })() 529 if (Line.Tokens.size() < 3) 530 return false; 531 auto I = Line.Tokens.begin(); 532 if (I->Tok->isNot(tok::l_paren)) 533 return false; 534 ++I; 535 if (I->Tok->isNot(Keywords.kw_function)) 536 return false; 537 ++I; 538 return I->Tok->is(tok::l_paren); 539 } 540 541 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 542 const FormatToken &InitialToken) { 543 if (InitialToken.is(tok::kw_namespace)) 544 return Style.BraceWrapping.AfterNamespace; 545 if (InitialToken.is(tok::kw_class)) 546 return Style.BraceWrapping.AfterClass; 547 if (InitialToken.is(tok::kw_union)) 548 return Style.BraceWrapping.AfterUnion; 549 if (InitialToken.is(tok::kw_struct)) 550 return Style.BraceWrapping.AfterStruct; 551 return false; 552 } 553 554 void UnwrappedLineParser::parseChildBlock() { 555 FormatTok->BlockKind = BK_Block; 556 nextToken(); 557 { 558 bool SkipIndent = 559 (Style.Language == FormatStyle::LK_JavaScript && 560 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 561 ScopedLineState LineState(*this); 562 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 563 /*MustBeDeclaration=*/false); 564 Line->Level += SkipIndent ? 0 : 1; 565 parseLevel(/*HasOpeningBrace=*/true); 566 flushComments(isOnNewLine(*FormatTok)); 567 Line->Level -= SkipIndent ? 0 : 1; 568 } 569 nextToken(); 570 } 571 572 void UnwrappedLineParser::parsePPDirective() { 573 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 574 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 575 nextToken(); 576 577 if (!FormatTok->Tok.getIdentifierInfo()) { 578 parsePPUnknown(); 579 return; 580 } 581 582 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 583 case tok::pp_define: 584 parsePPDefine(); 585 return; 586 case tok::pp_if: 587 parsePPIf(/*IfDef=*/false); 588 break; 589 case tok::pp_ifdef: 590 case tok::pp_ifndef: 591 parsePPIf(/*IfDef=*/true); 592 break; 593 case tok::pp_else: 594 parsePPElse(); 595 break; 596 case tok::pp_elif: 597 parsePPElIf(); 598 break; 599 case tok::pp_endif: 600 parsePPEndIf(); 601 break; 602 default: 603 parsePPUnknown(); 604 break; 605 } 606 } 607 608 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 609 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 610 PPStack.push_back(PP_Unreachable); 611 else 612 PPStack.push_back(PP_Conditional); 613 } 614 615 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 616 ++PPBranchLevel; 617 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 618 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 619 PPLevelBranchIndex.push_back(0); 620 PPLevelBranchCount.push_back(0); 621 } 622 PPChainBranchIndex.push(0); 623 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 624 conditionalCompilationCondition(Unreachable || Skip); 625 } 626 627 void UnwrappedLineParser::conditionalCompilationAlternative() { 628 if (!PPStack.empty()) 629 PPStack.pop_back(); 630 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 631 if (!PPChainBranchIndex.empty()) 632 ++PPChainBranchIndex.top(); 633 conditionalCompilationCondition( 634 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 635 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 636 } 637 638 void UnwrappedLineParser::conditionalCompilationEnd() { 639 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 640 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 641 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 642 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 643 } 644 } 645 // Guard against #endif's without #if. 646 if (PPBranchLevel > 0) 647 --PPBranchLevel; 648 if (!PPChainBranchIndex.empty()) 649 PPChainBranchIndex.pop(); 650 if (!PPStack.empty()) 651 PPStack.pop_back(); 652 } 653 654 void UnwrappedLineParser::parsePPIf(bool IfDef) { 655 bool IfNDef = FormatTok->is(tok::pp_ifndef); 656 nextToken(); 657 bool Unreachable = false; 658 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 659 Unreachable = true; 660 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 661 Unreachable = true; 662 conditionalCompilationStart(Unreachable); 663 parsePPUnknown(); 664 } 665 666 void UnwrappedLineParser::parsePPElse() { 667 conditionalCompilationAlternative(); 668 parsePPUnknown(); 669 } 670 671 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 672 673 void UnwrappedLineParser::parsePPEndIf() { 674 conditionalCompilationEnd(); 675 parsePPUnknown(); 676 } 677 678 void UnwrappedLineParser::parsePPDefine() { 679 nextToken(); 680 681 if (FormatTok->Tok.getKind() != tok::identifier) { 682 parsePPUnknown(); 683 return; 684 } 685 nextToken(); 686 if (FormatTok->Tok.getKind() == tok::l_paren && 687 FormatTok->WhitespaceRange.getBegin() == 688 FormatTok->WhitespaceRange.getEnd()) { 689 parseParens(); 690 } 691 addUnwrappedLine(); 692 Line->Level = 1; 693 694 // Errors during a preprocessor directive can only affect the layout of the 695 // preprocessor directive, and thus we ignore them. An alternative approach 696 // would be to use the same approach we use on the file level (no 697 // re-indentation if there was a structural error) within the macro 698 // definition. 699 parseFile(); 700 } 701 702 void UnwrappedLineParser::parsePPUnknown() { 703 do { 704 nextToken(); 705 } while (!eof()); 706 addUnwrappedLine(); 707 } 708 709 // Here we blacklist certain tokens that are not usually the first token in an 710 // unwrapped line. This is used in attempt to distinguish macro calls without 711 // trailing semicolons from other constructs split to several lines. 712 static bool tokenCanStartNewLine(const clang::Token &Tok) { 713 // Semicolon can be a null-statement, l_square can be a start of a macro or 714 // a C++11 attribute, but this doesn't seem to be common. 715 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 716 Tok.isNot(tok::l_square) && 717 // Tokens that can only be used as binary operators and a part of 718 // overloaded operator names. 719 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 720 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 721 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 722 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 723 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 724 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 725 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 726 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 727 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 728 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 729 Tok.isNot(tok::lesslessequal) && 730 // Colon is used in labels, base class lists, initializer lists, 731 // range-based for loops, ternary operator, but should never be the 732 // first token in an unwrapped line. 733 Tok.isNot(tok::colon) && 734 // 'noexcept' is a trailing annotation. 735 Tok.isNot(tok::kw_noexcept); 736 } 737 738 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 739 const FormatToken *FormatTok) { 740 // FIXME: This returns true for C/C++ keywords like 'struct'. 741 return FormatTok->is(tok::identifier) && 742 (FormatTok->Tok.getIdentifierInfo() == nullptr || 743 !FormatTok->isOneOf( 744 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 745 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 746 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 747 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 748 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 749 Keywords.kw_instanceof, Keywords.kw_interface, 750 Keywords.kw_throws)); 751 } 752 753 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 754 const FormatToken *FormatTok) { 755 return FormatTok->Tok.isLiteral() || 756 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 757 mustBeJSIdent(Keywords, FormatTok); 758 } 759 760 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 761 // when encountered after a value (see mustBeJSIdentOrValue). 762 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 763 const FormatToken *FormatTok) { 764 return FormatTok->isOneOf( 765 tok::kw_return, Keywords.kw_yield, 766 // conditionals 767 tok::kw_if, tok::kw_else, 768 // loops 769 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 770 // switch/case 771 tok::kw_switch, tok::kw_case, 772 // exceptions 773 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 774 // declaration 775 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 776 Keywords.kw_async, Keywords.kw_function, 777 // import/export 778 Keywords.kw_import, tok::kw_export); 779 } 780 781 // readTokenWithJavaScriptASI reads the next token and terminates the current 782 // line if JavaScript Automatic Semicolon Insertion must 783 // happen between the current token and the next token. 784 // 785 // This method is conservative - it cannot cover all edge cases of JavaScript, 786 // but only aims to correctly handle certain well known cases. It *must not* 787 // return true in speculative cases. 788 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 789 FormatToken *Previous = FormatTok; 790 readToken(); 791 FormatToken *Next = FormatTok; 792 793 bool IsOnSameLine = 794 CommentsBeforeNextToken.empty() 795 ? Next->NewlinesBefore == 0 796 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 797 if (IsOnSameLine) 798 return; 799 800 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 801 bool PreviousStartsTemplateExpr = 802 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 803 if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { 804 // If the token before the previous one is an '@', the previous token is an 805 // annotation and can precede another identifier/value. 806 const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; 807 if (PrePrevious->is(tok::at)) 808 return; 809 } 810 if (Next->is(tok::exclaim) && PreviousMustBeValue) 811 return addUnwrappedLine(); 812 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 813 bool NextEndsTemplateExpr = 814 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 815 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 816 (PreviousMustBeValue || 817 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 818 tok::minusminus))) 819 return addUnwrappedLine(); 820 if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) 821 return addUnwrappedLine(); 822 } 823 824 void UnwrappedLineParser::parseStructuralElement() { 825 assert(!FormatTok->is(tok::l_brace)); 826 if (Style.Language == FormatStyle::LK_TableGen && 827 FormatTok->is(tok::pp_include)) { 828 nextToken(); 829 if (FormatTok->is(tok::string_literal)) 830 nextToken(); 831 addUnwrappedLine(); 832 return; 833 } 834 switch (FormatTok->Tok.getKind()) { 835 case tok::at: 836 nextToken(); 837 if (FormatTok->Tok.is(tok::l_brace)) { 838 nextToken(); 839 parseBracedList(); 840 break; 841 } 842 switch (FormatTok->Tok.getObjCKeywordID()) { 843 case tok::objc_public: 844 case tok::objc_protected: 845 case tok::objc_package: 846 case tok::objc_private: 847 return parseAccessSpecifier(); 848 case tok::objc_interface: 849 case tok::objc_implementation: 850 return parseObjCInterfaceOrImplementation(); 851 case tok::objc_protocol: 852 return parseObjCProtocol(); 853 case tok::objc_end: 854 return; // Handled by the caller. 855 case tok::objc_optional: 856 case tok::objc_required: 857 nextToken(); 858 addUnwrappedLine(); 859 return; 860 case tok::objc_autoreleasepool: 861 nextToken(); 862 if (FormatTok->Tok.is(tok::l_brace)) { 863 if (Style.BraceWrapping.AfterObjCDeclaration) 864 addUnwrappedLine(); 865 parseBlock(/*MustBeDeclaration=*/false); 866 } 867 addUnwrappedLine(); 868 return; 869 case tok::objc_try: 870 // This branch isn't strictly necessary (the kw_try case below would 871 // do this too after the tok::at is parsed above). But be explicit. 872 parseTryCatch(); 873 return; 874 default: 875 break; 876 } 877 break; 878 case tok::kw_asm: 879 nextToken(); 880 if (FormatTok->is(tok::l_brace)) { 881 FormatTok->Type = TT_InlineASMBrace; 882 nextToken(); 883 while (FormatTok && FormatTok->isNot(tok::eof)) { 884 if (FormatTok->is(tok::r_brace)) { 885 FormatTok->Type = TT_InlineASMBrace; 886 nextToken(); 887 addUnwrappedLine(); 888 break; 889 } 890 FormatTok->Finalized = true; 891 nextToken(); 892 } 893 } 894 break; 895 case tok::kw_namespace: 896 parseNamespace(); 897 return; 898 case tok::kw_inline: 899 nextToken(); 900 if (FormatTok->Tok.is(tok::kw_namespace)) { 901 parseNamespace(); 902 return; 903 } 904 break; 905 case tok::kw_public: 906 case tok::kw_protected: 907 case tok::kw_private: 908 if (Style.Language == FormatStyle::LK_Java || 909 Style.Language == FormatStyle::LK_JavaScript) 910 nextToken(); 911 else 912 parseAccessSpecifier(); 913 return; 914 case tok::kw_if: 915 parseIfThenElse(); 916 return; 917 case tok::kw_for: 918 case tok::kw_while: 919 parseForOrWhileLoop(); 920 return; 921 case tok::kw_do: 922 parseDoWhile(); 923 return; 924 case tok::kw_switch: 925 parseSwitch(); 926 return; 927 case tok::kw_default: 928 nextToken(); 929 parseLabel(); 930 return; 931 case tok::kw_case: 932 parseCaseLabel(); 933 return; 934 case tok::kw_try: 935 case tok::kw___try: 936 parseTryCatch(); 937 return; 938 case tok::kw_extern: 939 nextToken(); 940 if (FormatTok->Tok.is(tok::string_literal)) { 941 nextToken(); 942 if (FormatTok->Tok.is(tok::l_brace)) { 943 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 944 addUnwrappedLine(); 945 return; 946 } 947 } 948 break; 949 case tok::kw_export: 950 if (Style.Language == FormatStyle::LK_JavaScript) { 951 parseJavaScriptEs6ImportExport(); 952 return; 953 } 954 break; 955 case tok::identifier: 956 if (FormatTok->is(TT_ForEachMacro)) { 957 parseForOrWhileLoop(); 958 return; 959 } 960 if (FormatTok->is(TT_MacroBlockBegin)) { 961 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 962 /*MunchSemi=*/false); 963 return; 964 } 965 if (FormatTok->is(Keywords.kw_import)) { 966 if (Style.Language == FormatStyle::LK_JavaScript) { 967 parseJavaScriptEs6ImportExport(); 968 return; 969 } 970 if (Style.Language == FormatStyle::LK_Proto) { 971 nextToken(); 972 if (FormatTok->is(tok::kw_public)) 973 nextToken(); 974 if (!FormatTok->is(tok::string_literal)) 975 return; 976 nextToken(); 977 if (FormatTok->is(tok::semi)) 978 nextToken(); 979 addUnwrappedLine(); 980 return; 981 } 982 } 983 if (Style.isCpp() && 984 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 985 Keywords.kw_slots, Keywords.kw_qslots)) { 986 nextToken(); 987 if (FormatTok->is(tok::colon)) { 988 nextToken(); 989 addUnwrappedLine(); 990 return; 991 } 992 } 993 // In all other cases, parse the declaration. 994 break; 995 default: 996 break; 997 } 998 do { 999 const FormatToken *Previous = getPreviousToken(); 1000 switch (FormatTok->Tok.getKind()) { 1001 case tok::at: 1002 nextToken(); 1003 if (FormatTok->Tok.is(tok::l_brace)) { 1004 nextToken(); 1005 parseBracedList(); 1006 } 1007 break; 1008 case tok::kw_enum: 1009 // Ignore if this is part of "template <enum ...". 1010 if (Previous && Previous->is(tok::less)) { 1011 nextToken(); 1012 break; 1013 } 1014 1015 // parseEnum falls through and does not yet add an unwrapped line as an 1016 // enum definition can start a structural element. 1017 if (!parseEnum()) 1018 break; 1019 // This only applies for C++. 1020 if (!Style.isCpp()) { 1021 addUnwrappedLine(); 1022 return; 1023 } 1024 break; 1025 case tok::kw_typedef: 1026 nextToken(); 1027 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1028 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1029 parseEnum(); 1030 break; 1031 case tok::kw_struct: 1032 case tok::kw_union: 1033 case tok::kw_class: 1034 // parseRecord falls through and does not yet add an unwrapped line as a 1035 // record declaration or definition can start a structural element. 1036 parseRecord(); 1037 // This does not apply for Java and JavaScript. 1038 if (Style.Language == FormatStyle::LK_Java || 1039 Style.Language == FormatStyle::LK_JavaScript) { 1040 if (FormatTok->is(tok::semi)) 1041 nextToken(); 1042 addUnwrappedLine(); 1043 return; 1044 } 1045 break; 1046 case tok::period: 1047 nextToken(); 1048 // In Java, classes have an implicit static member "class". 1049 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1050 FormatTok->is(tok::kw_class)) 1051 nextToken(); 1052 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1053 FormatTok->Tok.getIdentifierInfo()) 1054 // JavaScript only has pseudo keywords, all keywords are allowed to 1055 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1056 nextToken(); 1057 break; 1058 case tok::semi: 1059 nextToken(); 1060 addUnwrappedLine(); 1061 return; 1062 case tok::r_brace: 1063 addUnwrappedLine(); 1064 return; 1065 case tok::l_paren: 1066 parseParens(); 1067 break; 1068 case tok::kw_operator: 1069 nextToken(); 1070 if (FormatTok->isBinaryOperator()) 1071 nextToken(); 1072 break; 1073 case tok::caret: 1074 nextToken(); 1075 if (FormatTok->Tok.isAnyIdentifier() || 1076 FormatTok->isSimpleTypeSpecifier()) 1077 nextToken(); 1078 if (FormatTok->is(tok::l_paren)) 1079 parseParens(); 1080 if (FormatTok->is(tok::l_brace)) 1081 parseChildBlock(); 1082 break; 1083 case tok::l_brace: 1084 if (!tryToParseBracedList()) { 1085 // A block outside of parentheses must be the last part of a 1086 // structural element. 1087 // FIXME: Figure out cases where this is not true, and add projections 1088 // for them (the one we know is missing are lambdas). 1089 if (Style.BraceWrapping.AfterFunction) 1090 addUnwrappedLine(); 1091 FormatTok->Type = TT_FunctionLBrace; 1092 parseBlock(/*MustBeDeclaration=*/false); 1093 addUnwrappedLine(); 1094 return; 1095 } 1096 // Otherwise this was a braced init list, and the structural 1097 // element continues. 1098 break; 1099 case tok::kw_try: 1100 // We arrive here when parsing function-try blocks. 1101 parseTryCatch(); 1102 return; 1103 case tok::identifier: { 1104 if (FormatTok->is(TT_MacroBlockEnd)) { 1105 addUnwrappedLine(); 1106 return; 1107 } 1108 1109 // Function declarations (as opposed to function expressions) are parsed 1110 // on their own unwrapped line by continuing this loop. Function 1111 // expressions (functions that are not on their own line) must not create 1112 // a new unwrapped line, so they are special cased below. 1113 size_t TokenCount = Line->Tokens.size(); 1114 if (Style.Language == FormatStyle::LK_JavaScript && 1115 FormatTok->is(Keywords.kw_function) && 1116 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1117 Keywords.kw_async)))) { 1118 tryToParseJSFunction(); 1119 break; 1120 } 1121 if ((Style.Language == FormatStyle::LK_JavaScript || 1122 Style.Language == FormatStyle::LK_Java) && 1123 FormatTok->is(Keywords.kw_interface)) { 1124 if (Style.Language == FormatStyle::LK_JavaScript) { 1125 // In JavaScript/TypeScript, "interface" can be used as a standalone 1126 // identifier, e.g. in `var interface = 1;`. If "interface" is 1127 // followed by another identifier, it is very like to be an actual 1128 // interface declaration. 1129 unsigned StoredPosition = Tokens->getPosition(); 1130 FormatToken *Next = Tokens->getNextToken(); 1131 FormatTok = Tokens->setPosition(StoredPosition); 1132 if (Next && !mustBeJSIdent(Keywords, Next)) { 1133 nextToken(); 1134 break; 1135 } 1136 } 1137 parseRecord(); 1138 addUnwrappedLine(); 1139 return; 1140 } 1141 1142 // See if the following token should start a new unwrapped line. 1143 StringRef Text = FormatTok->TokenText; 1144 nextToken(); 1145 if (Line->Tokens.size() == 1 && 1146 // JS doesn't have macros, and within classes colons indicate fields, 1147 // not labels. 1148 Style.Language != FormatStyle::LK_JavaScript) { 1149 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1150 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1151 parseLabel(); 1152 return; 1153 } 1154 // Recognize function-like macro usages without trailing semicolon as 1155 // well as free-standing macros like Q_OBJECT. 1156 bool FunctionLike = FormatTok->is(tok::l_paren); 1157 if (FunctionLike) 1158 parseParens(); 1159 1160 bool FollowedByNewline = 1161 CommentsBeforeNextToken.empty() 1162 ? FormatTok->NewlinesBefore > 0 1163 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1164 1165 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1166 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1167 addUnwrappedLine(); 1168 return; 1169 } 1170 } 1171 break; 1172 } 1173 case tok::equal: 1174 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1175 // TT_JsFatArrow. The always start an expression or a child block if 1176 // followed by a curly. 1177 if (FormatTok->is(TT_JsFatArrow)) { 1178 nextToken(); 1179 if (FormatTok->is(tok::l_brace)) 1180 parseChildBlock(); 1181 break; 1182 } 1183 1184 nextToken(); 1185 if (FormatTok->Tok.is(tok::l_brace)) { 1186 nextToken(); 1187 parseBracedList(); 1188 } else if (Style.Language == FormatStyle::LK_Proto && 1189 FormatTok->Tok.is(tok::less)) { 1190 nextToken(); 1191 parseBracedList(/*ContinueOnSemicolons=*/false, 1192 /*ClosingBraceKind=*/tok::greater); 1193 } 1194 break; 1195 case tok::l_square: 1196 parseSquare(); 1197 break; 1198 case tok::kw_new: 1199 parseNew(); 1200 break; 1201 default: 1202 nextToken(); 1203 break; 1204 } 1205 } while (!eof()); 1206 } 1207 1208 bool UnwrappedLineParser::tryToParseLambda() { 1209 if (!Style.isCpp()) { 1210 nextToken(); 1211 return false; 1212 } 1213 const FormatToken* Previous = getPreviousToken(); 1214 if (Previous && 1215 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1216 tok::kw_delete) || 1217 Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { 1218 nextToken(); 1219 return false; 1220 } 1221 assert(FormatTok->is(tok::l_square)); 1222 FormatToken &LSquare = *FormatTok; 1223 if (!tryToParseLambdaIntroducer()) 1224 return false; 1225 1226 while (FormatTok->isNot(tok::l_brace)) { 1227 if (FormatTok->isSimpleTypeSpecifier()) { 1228 nextToken(); 1229 continue; 1230 } 1231 switch (FormatTok->Tok.getKind()) { 1232 case tok::l_brace: 1233 break; 1234 case tok::l_paren: 1235 parseParens(); 1236 break; 1237 case tok::amp: 1238 case tok::star: 1239 case tok::kw_const: 1240 case tok::comma: 1241 case tok::less: 1242 case tok::greater: 1243 case tok::identifier: 1244 case tok::numeric_constant: 1245 case tok::coloncolon: 1246 case tok::kw_mutable: 1247 nextToken(); 1248 break; 1249 case tok::arrow: 1250 FormatTok->Type = TT_LambdaArrow; 1251 nextToken(); 1252 break; 1253 default: 1254 return true; 1255 } 1256 } 1257 LSquare.Type = TT_LambdaLSquare; 1258 parseChildBlock(); 1259 return true; 1260 } 1261 1262 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1263 nextToken(); 1264 if (FormatTok->is(tok::equal)) { 1265 nextToken(); 1266 if (FormatTok->is(tok::r_square)) { 1267 nextToken(); 1268 return true; 1269 } 1270 if (FormatTok->isNot(tok::comma)) 1271 return false; 1272 nextToken(); 1273 } else if (FormatTok->is(tok::amp)) { 1274 nextToken(); 1275 if (FormatTok->is(tok::r_square)) { 1276 nextToken(); 1277 return true; 1278 } 1279 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 1280 return false; 1281 } 1282 if (FormatTok->is(tok::comma)) 1283 nextToken(); 1284 } else if (FormatTok->is(tok::r_square)) { 1285 nextToken(); 1286 return true; 1287 } 1288 do { 1289 if (FormatTok->is(tok::amp)) 1290 nextToken(); 1291 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 1292 return false; 1293 nextToken(); 1294 if (FormatTok->is(tok::ellipsis)) 1295 nextToken(); 1296 if (FormatTok->is(tok::comma)) { 1297 nextToken(); 1298 } else if (FormatTok->is(tok::r_square)) { 1299 nextToken(); 1300 return true; 1301 } else { 1302 return false; 1303 } 1304 } while (!eof()); 1305 return false; 1306 } 1307 1308 void UnwrappedLineParser::tryToParseJSFunction() { 1309 assert(FormatTok->is(Keywords.kw_function) || 1310 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1311 if (FormatTok->is(Keywords.kw_async)) 1312 nextToken(); 1313 // Consume "function". 1314 nextToken(); 1315 1316 // Consume * (generator function). Treat it like C++'s overloaded operators. 1317 if (FormatTok->is(tok::star)) { 1318 FormatTok->Type = TT_OverloadedOperator; 1319 nextToken(); 1320 } 1321 1322 // Consume function name. 1323 if (FormatTok->is(tok::identifier)) 1324 nextToken(); 1325 1326 if (FormatTok->isNot(tok::l_paren)) 1327 return; 1328 1329 // Parse formal parameter list. 1330 parseParens(); 1331 1332 if (FormatTok->is(tok::colon)) { 1333 // Parse a type definition. 1334 nextToken(); 1335 1336 // Eat the type declaration. For braced inline object types, balance braces, 1337 // otherwise just parse until finding an l_brace for the function body. 1338 if (FormatTok->is(tok::l_brace)) 1339 tryToParseBracedList(); 1340 else 1341 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1342 nextToken(); 1343 } 1344 1345 if (FormatTok->is(tok::semi)) 1346 return; 1347 1348 parseChildBlock(); 1349 } 1350 1351 bool UnwrappedLineParser::tryToParseBracedList() { 1352 if (FormatTok->BlockKind == BK_Unknown) 1353 calculateBraceTypes(); 1354 assert(FormatTok->BlockKind != BK_Unknown); 1355 if (FormatTok->BlockKind == BK_Block) 1356 return false; 1357 nextToken(); 1358 parseBracedList(); 1359 return true; 1360 } 1361 1362 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1363 tok::TokenKind ClosingBraceKind) { 1364 bool HasError = false; 1365 1366 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1367 // replace this by using parseAssigmentExpression() inside. 1368 do { 1369 if (Style.Language == FormatStyle::LK_JavaScript) { 1370 if (FormatTok->is(Keywords.kw_function) || 1371 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1372 tryToParseJSFunction(); 1373 continue; 1374 } 1375 if (FormatTok->is(TT_JsFatArrow)) { 1376 nextToken(); 1377 // Fat arrows can be followed by simple expressions or by child blocks 1378 // in curly braces. 1379 if (FormatTok->is(tok::l_brace)) { 1380 parseChildBlock(); 1381 continue; 1382 } 1383 } 1384 if (FormatTok->is(tok::l_brace)) { 1385 // Could be a method inside of a braced list `{a() { return 1; }}`. 1386 if (tryToParseBracedList()) 1387 continue; 1388 parseChildBlock(); 1389 } 1390 } 1391 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1392 nextToken(); 1393 return !HasError; 1394 } 1395 switch (FormatTok->Tok.getKind()) { 1396 case tok::caret: 1397 nextToken(); 1398 if (FormatTok->is(tok::l_brace)) { 1399 parseChildBlock(); 1400 } 1401 break; 1402 case tok::l_square: 1403 tryToParseLambda(); 1404 break; 1405 case tok::l_paren: 1406 parseParens(); 1407 // JavaScript can just have free standing methods and getters/setters in 1408 // object literals. Detect them by a "{" following ")". 1409 if (Style.Language == FormatStyle::LK_JavaScript) { 1410 if (FormatTok->is(tok::l_brace)) 1411 parseChildBlock(); 1412 break; 1413 } 1414 break; 1415 case tok::l_brace: 1416 // Assume there are no blocks inside a braced init list apart 1417 // from the ones we explicitly parse out (like lambdas). 1418 FormatTok->BlockKind = BK_BracedInit; 1419 nextToken(); 1420 parseBracedList(); 1421 break; 1422 case tok::semi: 1423 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1424 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1425 // used for error recovery if we have otherwise determined that this is 1426 // a braced list. 1427 if (Style.Language == FormatStyle::LK_JavaScript) { 1428 nextToken(); 1429 break; 1430 } 1431 HasError = true; 1432 if (!ContinueOnSemicolons) 1433 return !HasError; 1434 nextToken(); 1435 break; 1436 case tok::comma: 1437 nextToken(); 1438 break; 1439 default: 1440 nextToken(); 1441 break; 1442 } 1443 } while (!eof()); 1444 return false; 1445 } 1446 1447 void UnwrappedLineParser::parseParens() { 1448 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1449 nextToken(); 1450 do { 1451 switch (FormatTok->Tok.getKind()) { 1452 case tok::l_paren: 1453 parseParens(); 1454 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1455 parseChildBlock(); 1456 break; 1457 case tok::r_paren: 1458 nextToken(); 1459 return; 1460 case tok::r_brace: 1461 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1462 return; 1463 case tok::l_square: 1464 tryToParseLambda(); 1465 break; 1466 case tok::l_brace: 1467 if (!tryToParseBracedList()) 1468 parseChildBlock(); 1469 break; 1470 case tok::at: 1471 nextToken(); 1472 if (FormatTok->Tok.is(tok::l_brace)) { 1473 nextToken(); 1474 parseBracedList(); 1475 } 1476 break; 1477 case tok::kw_class: 1478 if (Style.Language == FormatStyle::LK_JavaScript) 1479 parseRecord(/*ParseAsExpr=*/true); 1480 else 1481 nextToken(); 1482 break; 1483 case tok::identifier: 1484 if (Style.Language == FormatStyle::LK_JavaScript && 1485 (FormatTok->is(Keywords.kw_function) || 1486 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1487 tryToParseJSFunction(); 1488 else 1489 nextToken(); 1490 break; 1491 default: 1492 nextToken(); 1493 break; 1494 } 1495 } while (!eof()); 1496 } 1497 1498 void UnwrappedLineParser::parseSquare() { 1499 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1500 if (tryToParseLambda()) 1501 return; 1502 do { 1503 switch (FormatTok->Tok.getKind()) { 1504 case tok::l_paren: 1505 parseParens(); 1506 break; 1507 case tok::r_square: 1508 nextToken(); 1509 return; 1510 case tok::r_brace: 1511 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1512 return; 1513 case tok::l_square: 1514 parseSquare(); 1515 break; 1516 case tok::l_brace: { 1517 if (!tryToParseBracedList()) 1518 parseChildBlock(); 1519 break; 1520 } 1521 case tok::at: 1522 nextToken(); 1523 if (FormatTok->Tok.is(tok::l_brace)) { 1524 nextToken(); 1525 parseBracedList(); 1526 } 1527 break; 1528 default: 1529 nextToken(); 1530 break; 1531 } 1532 } while (!eof()); 1533 } 1534 1535 void UnwrappedLineParser::parseIfThenElse() { 1536 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1537 nextToken(); 1538 if (FormatTok->Tok.is(tok::kw_constexpr)) 1539 nextToken(); 1540 if (FormatTok->Tok.is(tok::l_paren)) 1541 parseParens(); 1542 bool NeedsUnwrappedLine = false; 1543 if (FormatTok->Tok.is(tok::l_brace)) { 1544 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1545 parseBlock(/*MustBeDeclaration=*/false); 1546 if (Style.BraceWrapping.BeforeElse) 1547 addUnwrappedLine(); 1548 else 1549 NeedsUnwrappedLine = true; 1550 } else { 1551 addUnwrappedLine(); 1552 ++Line->Level; 1553 parseStructuralElement(); 1554 --Line->Level; 1555 } 1556 if (FormatTok->Tok.is(tok::kw_else)) { 1557 nextToken(); 1558 if (FormatTok->Tok.is(tok::l_brace)) { 1559 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1560 parseBlock(/*MustBeDeclaration=*/false); 1561 addUnwrappedLine(); 1562 } else if (FormatTok->Tok.is(tok::kw_if)) { 1563 parseIfThenElse(); 1564 } else { 1565 addUnwrappedLine(); 1566 ++Line->Level; 1567 parseStructuralElement(); 1568 if (FormatTok->is(tok::eof)) 1569 addUnwrappedLine(); 1570 --Line->Level; 1571 } 1572 } else if (NeedsUnwrappedLine) { 1573 addUnwrappedLine(); 1574 } 1575 } 1576 1577 void UnwrappedLineParser::parseTryCatch() { 1578 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1579 nextToken(); 1580 bool NeedsUnwrappedLine = false; 1581 if (FormatTok->is(tok::colon)) { 1582 // We are in a function try block, what comes is an initializer list. 1583 nextToken(); 1584 while (FormatTok->is(tok::identifier)) { 1585 nextToken(); 1586 if (FormatTok->is(tok::l_paren)) 1587 parseParens(); 1588 if (FormatTok->is(tok::comma)) 1589 nextToken(); 1590 } 1591 } 1592 // Parse try with resource. 1593 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1594 parseParens(); 1595 } 1596 if (FormatTok->is(tok::l_brace)) { 1597 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1598 parseBlock(/*MustBeDeclaration=*/false); 1599 if (Style.BraceWrapping.BeforeCatch) { 1600 addUnwrappedLine(); 1601 } else { 1602 NeedsUnwrappedLine = true; 1603 } 1604 } else if (!FormatTok->is(tok::kw_catch)) { 1605 // The C++ standard requires a compound-statement after a try. 1606 // If there's none, we try to assume there's a structuralElement 1607 // and try to continue. 1608 addUnwrappedLine(); 1609 ++Line->Level; 1610 parseStructuralElement(); 1611 --Line->Level; 1612 } 1613 while (1) { 1614 if (FormatTok->is(tok::at)) 1615 nextToken(); 1616 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1617 tok::kw___finally) || 1618 ((Style.Language == FormatStyle::LK_Java || 1619 Style.Language == FormatStyle::LK_JavaScript) && 1620 FormatTok->is(Keywords.kw_finally)) || 1621 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1622 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1623 break; 1624 nextToken(); 1625 while (FormatTok->isNot(tok::l_brace)) { 1626 if (FormatTok->is(tok::l_paren)) { 1627 parseParens(); 1628 continue; 1629 } 1630 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1631 return; 1632 nextToken(); 1633 } 1634 NeedsUnwrappedLine = false; 1635 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1636 parseBlock(/*MustBeDeclaration=*/false); 1637 if (Style.BraceWrapping.BeforeCatch) 1638 addUnwrappedLine(); 1639 else 1640 NeedsUnwrappedLine = true; 1641 } 1642 if (NeedsUnwrappedLine) 1643 addUnwrappedLine(); 1644 } 1645 1646 void UnwrappedLineParser::parseNamespace() { 1647 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1648 1649 const FormatToken &InitialToken = *FormatTok; 1650 nextToken(); 1651 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1652 nextToken(); 1653 if (FormatTok->Tok.is(tok::l_brace)) { 1654 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1655 addUnwrappedLine(); 1656 1657 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1658 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1659 DeclarationScopeStack.size() > 1); 1660 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1661 // Munch the semicolon after a namespace. This is more common than one would 1662 // think. Puttin the semicolon into its own line is very ugly. 1663 if (FormatTok->Tok.is(tok::semi)) 1664 nextToken(); 1665 addUnwrappedLine(); 1666 } 1667 // FIXME: Add error handling. 1668 } 1669 1670 void UnwrappedLineParser::parseNew() { 1671 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1672 nextToken(); 1673 if (Style.Language != FormatStyle::LK_Java) 1674 return; 1675 1676 // In Java, we can parse everything up to the parens, which aren't optional. 1677 do { 1678 // There should not be a ;, { or } before the new's open paren. 1679 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1680 return; 1681 1682 // Consume the parens. 1683 if (FormatTok->is(tok::l_paren)) { 1684 parseParens(); 1685 1686 // If there is a class body of an anonymous class, consume that as child. 1687 if (FormatTok->is(tok::l_brace)) 1688 parseChildBlock(); 1689 return; 1690 } 1691 nextToken(); 1692 } while (!eof()); 1693 } 1694 1695 void UnwrappedLineParser::parseForOrWhileLoop() { 1696 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1697 "'for', 'while' or foreach macro expected"); 1698 nextToken(); 1699 // JS' for await ( ... 1700 if (Style.Language == FormatStyle::LK_JavaScript && 1701 FormatTok->is(Keywords.kw_await)) 1702 nextToken(); 1703 if (FormatTok->Tok.is(tok::l_paren)) 1704 parseParens(); 1705 if (FormatTok->Tok.is(tok::l_brace)) { 1706 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1707 parseBlock(/*MustBeDeclaration=*/false); 1708 addUnwrappedLine(); 1709 } else { 1710 addUnwrappedLine(); 1711 ++Line->Level; 1712 parseStructuralElement(); 1713 --Line->Level; 1714 } 1715 } 1716 1717 void UnwrappedLineParser::parseDoWhile() { 1718 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1719 nextToken(); 1720 if (FormatTok->Tok.is(tok::l_brace)) { 1721 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1722 parseBlock(/*MustBeDeclaration=*/false); 1723 if (Style.BraceWrapping.IndentBraces) 1724 addUnwrappedLine(); 1725 } else { 1726 addUnwrappedLine(); 1727 ++Line->Level; 1728 parseStructuralElement(); 1729 --Line->Level; 1730 } 1731 1732 // FIXME: Add error handling. 1733 if (!FormatTok->Tok.is(tok::kw_while)) { 1734 addUnwrappedLine(); 1735 return; 1736 } 1737 1738 nextToken(); 1739 parseStructuralElement(); 1740 } 1741 1742 void UnwrappedLineParser::parseLabel() { 1743 nextToken(); 1744 unsigned OldLineLevel = Line->Level; 1745 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1746 --Line->Level; 1747 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1748 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1749 parseBlock(/*MustBeDeclaration=*/false); 1750 if (FormatTok->Tok.is(tok::kw_break)) { 1751 if (Style.BraceWrapping.AfterControlStatement) 1752 addUnwrappedLine(); 1753 parseStructuralElement(); 1754 } 1755 addUnwrappedLine(); 1756 } else { 1757 if (FormatTok->is(tok::semi)) 1758 nextToken(); 1759 addUnwrappedLine(); 1760 } 1761 Line->Level = OldLineLevel; 1762 if (FormatTok->isNot(tok::l_brace)) { 1763 parseStructuralElement(); 1764 addUnwrappedLine(); 1765 } 1766 } 1767 1768 void UnwrappedLineParser::parseCaseLabel() { 1769 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1770 // FIXME: fix handling of complex expressions here. 1771 do { 1772 nextToken(); 1773 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1774 parseLabel(); 1775 } 1776 1777 void UnwrappedLineParser::parseSwitch() { 1778 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1779 nextToken(); 1780 if (FormatTok->Tok.is(tok::l_paren)) 1781 parseParens(); 1782 if (FormatTok->Tok.is(tok::l_brace)) { 1783 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1784 parseBlock(/*MustBeDeclaration=*/false); 1785 addUnwrappedLine(); 1786 } else { 1787 addUnwrappedLine(); 1788 ++Line->Level; 1789 parseStructuralElement(); 1790 --Line->Level; 1791 } 1792 } 1793 1794 void UnwrappedLineParser::parseAccessSpecifier() { 1795 nextToken(); 1796 // Understand Qt's slots. 1797 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1798 nextToken(); 1799 // Otherwise, we don't know what it is, and we'd better keep the next token. 1800 if (FormatTok->Tok.is(tok::colon)) 1801 nextToken(); 1802 addUnwrappedLine(); 1803 } 1804 1805 bool UnwrappedLineParser::parseEnum() { 1806 // Won't be 'enum' for NS_ENUMs. 1807 if (FormatTok->Tok.is(tok::kw_enum)) 1808 nextToken(); 1809 1810 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1811 // declarations. An "enum" keyword followed by a colon would be a syntax 1812 // error and thus assume it is just an identifier. 1813 if (Style.Language == FormatStyle::LK_JavaScript && 1814 FormatTok->isOneOf(tok::colon, tok::question)) 1815 return false; 1816 1817 // Eat up enum class ... 1818 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1819 nextToken(); 1820 1821 while (FormatTok->Tok.getIdentifierInfo() || 1822 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1823 tok::greater, tok::comma, tok::question)) { 1824 nextToken(); 1825 // We can have macros or attributes in between 'enum' and the enum name. 1826 if (FormatTok->is(tok::l_paren)) 1827 parseParens(); 1828 if (FormatTok->is(tok::identifier)) { 1829 nextToken(); 1830 // If there are two identifiers in a row, this is likely an elaborate 1831 // return type. In Java, this can be "implements", etc. 1832 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1833 return false; 1834 } 1835 } 1836 1837 // Just a declaration or something is wrong. 1838 if (FormatTok->isNot(tok::l_brace)) 1839 return true; 1840 FormatTok->BlockKind = BK_Block; 1841 1842 if (Style.Language == FormatStyle::LK_Java) { 1843 // Java enums are different. 1844 parseJavaEnumBody(); 1845 return true; 1846 } 1847 if (Style.Language == FormatStyle::LK_Proto) { 1848 parseBlock(/*MustBeDeclaration=*/true); 1849 return true; 1850 } 1851 1852 // Parse enum body. 1853 nextToken(); 1854 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1855 if (HasError) { 1856 if (FormatTok->is(tok::semi)) 1857 nextToken(); 1858 addUnwrappedLine(); 1859 } 1860 return true; 1861 1862 // There is no addUnwrappedLine() here so that we fall through to parsing a 1863 // structural element afterwards. Thus, in "enum A {} n, m;", 1864 // "} n, m;" will end up in one unwrapped line. 1865 } 1866 1867 void UnwrappedLineParser::parseJavaEnumBody() { 1868 // Determine whether the enum is simple, i.e. does not have a semicolon or 1869 // constants with class bodies. Simple enums can be formatted like braced 1870 // lists, contracted to a single line, etc. 1871 unsigned StoredPosition = Tokens->getPosition(); 1872 bool IsSimple = true; 1873 FormatToken *Tok = Tokens->getNextToken(); 1874 while (Tok) { 1875 if (Tok->is(tok::r_brace)) 1876 break; 1877 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1878 IsSimple = false; 1879 break; 1880 } 1881 // FIXME: This will also mark enums with braces in the arguments to enum 1882 // constants as "not simple". This is probably fine in practice, though. 1883 Tok = Tokens->getNextToken(); 1884 } 1885 FormatTok = Tokens->setPosition(StoredPosition); 1886 1887 if (IsSimple) { 1888 nextToken(); 1889 parseBracedList(); 1890 addUnwrappedLine(); 1891 return; 1892 } 1893 1894 // Parse the body of a more complex enum. 1895 // First add a line for everything up to the "{". 1896 nextToken(); 1897 addUnwrappedLine(); 1898 ++Line->Level; 1899 1900 // Parse the enum constants. 1901 while (FormatTok) { 1902 if (FormatTok->is(tok::l_brace)) { 1903 // Parse the constant's class body. 1904 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1905 /*MunchSemi=*/false); 1906 } else if (FormatTok->is(tok::l_paren)) { 1907 parseParens(); 1908 } else if (FormatTok->is(tok::comma)) { 1909 nextToken(); 1910 addUnwrappedLine(); 1911 } else if (FormatTok->is(tok::semi)) { 1912 nextToken(); 1913 addUnwrappedLine(); 1914 break; 1915 } else if (FormatTok->is(tok::r_brace)) { 1916 addUnwrappedLine(); 1917 break; 1918 } else { 1919 nextToken(); 1920 } 1921 } 1922 1923 // Parse the class body after the enum's ";" if any. 1924 parseLevel(/*HasOpeningBrace=*/true); 1925 nextToken(); 1926 --Line->Level; 1927 addUnwrappedLine(); 1928 } 1929 1930 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 1931 const FormatToken &InitialToken = *FormatTok; 1932 nextToken(); 1933 1934 // The actual identifier can be a nested name specifier, and in macros 1935 // it is often token-pasted. 1936 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 1937 tok::kw___attribute, tok::kw___declspec, 1938 tok::kw_alignas) || 1939 ((Style.Language == FormatStyle::LK_Java || 1940 Style.Language == FormatStyle::LK_JavaScript) && 1941 FormatTok->isOneOf(tok::period, tok::comma))) { 1942 bool IsNonMacroIdentifier = 1943 FormatTok->is(tok::identifier) && 1944 FormatTok->TokenText != FormatTok->TokenText.upper(); 1945 nextToken(); 1946 // We can have macros or attributes in between 'class' and the class name. 1947 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 1948 parseParens(); 1949 } 1950 1951 // Note that parsing away template declarations here leads to incorrectly 1952 // accepting function declarations as record declarations. 1953 // In general, we cannot solve this problem. Consider: 1954 // class A<int> B() {} 1955 // which can be a function definition or a class definition when B() is a 1956 // macro. If we find enough real-world cases where this is a problem, we 1957 // can parse for the 'template' keyword in the beginning of the statement, 1958 // and thus rule out the record production in case there is no template 1959 // (this would still leave us with an ambiguity between template function 1960 // and class declarations). 1961 if (FormatTok->isOneOf(tok::colon, tok::less)) { 1962 while (!eof()) { 1963 if (FormatTok->is(tok::l_brace)) { 1964 calculateBraceTypes(/*ExpectClassBody=*/true); 1965 if (!tryToParseBracedList()) 1966 break; 1967 } 1968 if (FormatTok->Tok.is(tok::semi)) 1969 return; 1970 nextToken(); 1971 } 1972 } 1973 if (FormatTok->Tok.is(tok::l_brace)) { 1974 if (ParseAsExpr) { 1975 parseChildBlock(); 1976 } else { 1977 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1978 addUnwrappedLine(); 1979 1980 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1981 /*MunchSemi=*/false); 1982 } 1983 } 1984 // There is no addUnwrappedLine() here so that we fall through to parsing a 1985 // structural element afterwards. Thus, in "class A {} n, m;", 1986 // "} n, m;" will end up in one unwrapped line. 1987 } 1988 1989 void UnwrappedLineParser::parseObjCProtocolList() { 1990 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1991 do 1992 nextToken(); 1993 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1994 nextToken(); // Skip '>'. 1995 } 1996 1997 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1998 do { 1999 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2000 nextToken(); 2001 addUnwrappedLine(); 2002 break; 2003 } 2004 if (FormatTok->is(tok::l_brace)) { 2005 parseBlock(/*MustBeDeclaration=*/false); 2006 // In ObjC interfaces, nothing should be following the "}". 2007 addUnwrappedLine(); 2008 } else if (FormatTok->is(tok::r_brace)) { 2009 // Ignore stray "}". parseStructuralElement doesn't consume them. 2010 nextToken(); 2011 addUnwrappedLine(); 2012 } else { 2013 parseStructuralElement(); 2014 } 2015 } while (!eof()); 2016 } 2017 2018 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2019 nextToken(); 2020 nextToken(); // interface name 2021 2022 // @interface can be followed by either a base class, or a category. 2023 if (FormatTok->Tok.is(tok::colon)) { 2024 nextToken(); 2025 nextToken(); // base class name 2026 } else if (FormatTok->Tok.is(tok::l_paren)) 2027 // Skip category, if present. 2028 parseParens(); 2029 2030 if (FormatTok->Tok.is(tok::less)) 2031 parseObjCProtocolList(); 2032 2033 if (FormatTok->Tok.is(tok::l_brace)) { 2034 if (Style.BraceWrapping.AfterObjCDeclaration) 2035 addUnwrappedLine(); 2036 parseBlock(/*MustBeDeclaration=*/true); 2037 } 2038 2039 // With instance variables, this puts '}' on its own line. Without instance 2040 // variables, this ends the @interface line. 2041 addUnwrappedLine(); 2042 2043 parseObjCUntilAtEnd(); 2044 } 2045 2046 void UnwrappedLineParser::parseObjCProtocol() { 2047 nextToken(); 2048 nextToken(); // protocol name 2049 2050 if (FormatTok->Tok.is(tok::less)) 2051 parseObjCProtocolList(); 2052 2053 // Check for protocol declaration. 2054 if (FormatTok->Tok.is(tok::semi)) { 2055 nextToken(); 2056 return addUnwrappedLine(); 2057 } 2058 2059 addUnwrappedLine(); 2060 parseObjCUntilAtEnd(); 2061 } 2062 2063 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2064 bool IsImport = FormatTok->is(Keywords.kw_import); 2065 assert(IsImport || FormatTok->is(tok::kw_export)); 2066 nextToken(); 2067 2068 // Consume the "default" in "export default class/function". 2069 if (FormatTok->is(tok::kw_default)) 2070 nextToken(); 2071 2072 // Consume "async function", "function" and "default function", so that these 2073 // get parsed as free-standing JS functions, i.e. do not require a trailing 2074 // semicolon. 2075 if (FormatTok->is(Keywords.kw_async)) 2076 nextToken(); 2077 if (FormatTok->is(Keywords.kw_function)) { 2078 nextToken(); 2079 return; 2080 } 2081 2082 // For imports, `export *`, `export {...}`, consume the rest of the line up 2083 // to the terminating `;`. For everything else, just return and continue 2084 // parsing the structural element, i.e. the declaration or expression for 2085 // `export default`. 2086 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2087 !FormatTok->isStringLiteral()) 2088 return; 2089 2090 while (!eof()) { 2091 if (FormatTok->is(tok::semi)) 2092 return; 2093 if (Line->Tokens.size() == 0) { 2094 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2095 // import statement should terminate. 2096 return; 2097 } 2098 if (FormatTok->is(tok::l_brace)) { 2099 FormatTok->BlockKind = BK_Block; 2100 nextToken(); 2101 parseBracedList(); 2102 } else { 2103 nextToken(); 2104 } 2105 } 2106 } 2107 2108 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2109 StringRef Prefix = "") { 2110 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 2111 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2112 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2113 E = Line.Tokens.end(); 2114 I != E; ++I) { 2115 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2116 << "T=" << I->Tok->Type 2117 << ", OC=" << I->Tok->OriginalColumn << "] "; 2118 } 2119 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2120 E = Line.Tokens.end(); 2121 I != E; ++I) { 2122 const UnwrappedLineNode &Node = *I; 2123 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2124 I = Node.Children.begin(), 2125 E = Node.Children.end(); 2126 I != E; ++I) { 2127 printDebugInfo(*I, "\nChild: "); 2128 } 2129 } 2130 llvm::dbgs() << "\n"; 2131 } 2132 2133 void UnwrappedLineParser::addUnwrappedLine() { 2134 if (Line->Tokens.empty()) 2135 return; 2136 DEBUG({ 2137 if (CurrentLines == &Lines) 2138 printDebugInfo(*Line); 2139 }); 2140 CurrentLines->push_back(std::move(*Line)); 2141 Line->Tokens.clear(); 2142 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2143 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2144 CurrentLines->append( 2145 std::make_move_iterator(PreprocessorDirectives.begin()), 2146 std::make_move_iterator(PreprocessorDirectives.end())); 2147 PreprocessorDirectives.clear(); 2148 } 2149 } 2150 2151 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2152 2153 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2154 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2155 FormatTok.NewlinesBefore > 0; 2156 } 2157 2158 // Checks if \p FormatTok is a line comment that continues the line comment 2159 // section on \p Line. 2160 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2161 const UnwrappedLine &Line, 2162 llvm::Regex &CommentPragmasRegex) { 2163 if (Line.Tokens.empty()) 2164 return false; 2165 2166 StringRef IndentContent = FormatTok.TokenText; 2167 if (FormatTok.TokenText.startswith("//") || 2168 FormatTok.TokenText.startswith("/*")) 2169 IndentContent = FormatTok.TokenText.substr(2); 2170 if (CommentPragmasRegex.match(IndentContent)) 2171 return false; 2172 2173 // If Line starts with a line comment, then FormatTok continues the comment 2174 // section if its original column is greater or equal to the original start 2175 // column of the line. 2176 // 2177 // Define the min column token of a line as follows: if a line ends in '{' or 2178 // contains a '{' followed by a line comment, then the min column token is 2179 // that '{'. Otherwise, the min column token of the line is the first token of 2180 // the line. 2181 // 2182 // If Line starts with a token other than a line comment, then FormatTok 2183 // continues the comment section if its original column is greater than the 2184 // original start column of the min column token of the line. 2185 // 2186 // For example, the second line comment continues the first in these cases: 2187 // 2188 // // first line 2189 // // second line 2190 // 2191 // and: 2192 // 2193 // // first line 2194 // // second line 2195 // 2196 // and: 2197 // 2198 // int i; // first line 2199 // // second line 2200 // 2201 // and: 2202 // 2203 // do { // first line 2204 // // second line 2205 // int i; 2206 // } while (true); 2207 // 2208 // and: 2209 // 2210 // enum { 2211 // a, // first line 2212 // // second line 2213 // b 2214 // }; 2215 // 2216 // The second line comment doesn't continue the first in these cases: 2217 // 2218 // // first line 2219 // // second line 2220 // 2221 // and: 2222 // 2223 // int i; // first line 2224 // // second line 2225 // 2226 // and: 2227 // 2228 // do { // first line 2229 // // second line 2230 // int i; 2231 // } while (true); 2232 // 2233 // and: 2234 // 2235 // enum { 2236 // a, // first line 2237 // // second line 2238 // }; 2239 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2240 2241 // Scan for '{//'. If found, use the column of '{' as a min column for line 2242 // comment section continuation. 2243 const FormatToken *PreviousToken = nullptr; 2244 for (const UnwrappedLineNode &Node : Line.Tokens) { 2245 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2246 isLineComment(*Node.Tok)) { 2247 MinColumnToken = PreviousToken; 2248 break; 2249 } 2250 PreviousToken = Node.Tok; 2251 2252 // Grab the last newline preceding a token in this unwrapped line. 2253 if (Node.Tok->NewlinesBefore > 0) { 2254 MinColumnToken = Node.Tok; 2255 } 2256 } 2257 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2258 MinColumnToken = PreviousToken; 2259 } 2260 2261 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2262 MinColumnToken); 2263 } 2264 2265 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2266 bool JustComments = Line->Tokens.empty(); 2267 for (SmallVectorImpl<FormatToken *>::const_iterator 2268 I = CommentsBeforeNextToken.begin(), 2269 E = CommentsBeforeNextToken.end(); 2270 I != E; ++I) { 2271 // Line comments that belong to the same line comment section are put on the 2272 // same line since later we might want to reflow content between them. 2273 // Additional fine-grained breaking of line comment sections is controlled 2274 // by the class BreakableLineCommentSection in case it is desirable to keep 2275 // several line comment sections in the same unwrapped line. 2276 // 2277 // FIXME: Consider putting separate line comment sections as children to the 2278 // unwrapped line instead. 2279 (*I)->ContinuesLineCommentSection = 2280 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2281 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2282 addUnwrappedLine(); 2283 pushToken(*I); 2284 } 2285 if (NewlineBeforeNext && JustComments) 2286 addUnwrappedLine(); 2287 CommentsBeforeNextToken.clear(); 2288 } 2289 2290 void UnwrappedLineParser::nextToken() { 2291 if (eof()) 2292 return; 2293 flushComments(isOnNewLine(*FormatTok)); 2294 pushToken(FormatTok); 2295 if (Style.Language != FormatStyle::LK_JavaScript) 2296 readToken(); 2297 else 2298 readTokenWithJavaScriptASI(); 2299 } 2300 2301 const FormatToken *UnwrappedLineParser::getPreviousToken() { 2302 // FIXME: This is a dirty way to access the previous token. Find a better 2303 // solution. 2304 if (!Line || Line->Tokens.empty()) 2305 return nullptr; 2306 return Line->Tokens.back().Tok; 2307 } 2308 2309 void UnwrappedLineParser::distributeComments( 2310 const SmallVectorImpl<FormatToken *> &Comments, 2311 const FormatToken *NextTok) { 2312 // Whether or not a line comment token continues a line is controlled by 2313 // the method continuesLineCommentSection, with the following caveat: 2314 // 2315 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2316 // that each comment line from the trail is aligned with the next token, if 2317 // the next token exists. If a trail exists, the beginning of the maximal 2318 // trail is marked as a start of a new comment section. 2319 // 2320 // For example in this code: 2321 // 2322 // int a; // line about a 2323 // // line 1 about b 2324 // // line 2 about b 2325 // int b; 2326 // 2327 // the two lines about b form a maximal trail, so there are two sections, the 2328 // first one consisting of the single comment "// line about a" and the 2329 // second one consisting of the next two comments. 2330 if (Comments.empty()) 2331 return; 2332 bool ShouldPushCommentsInCurrentLine = true; 2333 bool HasTrailAlignedWithNextToken = false; 2334 unsigned StartOfTrailAlignedWithNextToken = 0; 2335 if (NextTok) { 2336 // We are skipping the first element intentionally. 2337 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2338 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2339 HasTrailAlignedWithNextToken = true; 2340 StartOfTrailAlignedWithNextToken = i; 2341 } 2342 } 2343 } 2344 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2345 FormatToken *FormatTok = Comments[i]; 2346 if (HasTrailAlignedWithNextToken && 2347 i == StartOfTrailAlignedWithNextToken) { 2348 FormatTok->ContinuesLineCommentSection = false; 2349 } else { 2350 FormatTok->ContinuesLineCommentSection = 2351 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2352 } 2353 if (!FormatTok->ContinuesLineCommentSection && 2354 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2355 ShouldPushCommentsInCurrentLine = false; 2356 } 2357 if (ShouldPushCommentsInCurrentLine) { 2358 pushToken(FormatTok); 2359 } else { 2360 CommentsBeforeNextToken.push_back(FormatTok); 2361 } 2362 } 2363 } 2364 2365 void UnwrappedLineParser::readToken() { 2366 SmallVector<FormatToken *, 1> Comments; 2367 do { 2368 FormatTok = Tokens->getNextToken(); 2369 assert(FormatTok); 2370 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2371 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2372 distributeComments(Comments, FormatTok); 2373 Comments.clear(); 2374 // If there is an unfinished unwrapped line, we flush the preprocessor 2375 // directives only after that unwrapped line was finished later. 2376 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2377 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2378 // Comments stored before the preprocessor directive need to be output 2379 // before the preprocessor directive, at the same level as the 2380 // preprocessor directive, as we consider them to apply to the directive. 2381 flushComments(isOnNewLine(*FormatTok)); 2382 parsePPDirective(); 2383 } 2384 while (FormatTok->Type == TT_ConflictStart || 2385 FormatTok->Type == TT_ConflictEnd || 2386 FormatTok->Type == TT_ConflictAlternative) { 2387 if (FormatTok->Type == TT_ConflictStart) { 2388 conditionalCompilationStart(/*Unreachable=*/false); 2389 } else if (FormatTok->Type == TT_ConflictAlternative) { 2390 conditionalCompilationAlternative(); 2391 } else if (FormatTok->Type == TT_ConflictEnd) { 2392 conditionalCompilationEnd(); 2393 } 2394 FormatTok = Tokens->getNextToken(); 2395 FormatTok->MustBreakBefore = true; 2396 } 2397 2398 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 2399 !Line->InPPDirective) { 2400 continue; 2401 } 2402 2403 if (!FormatTok->Tok.is(tok::comment)) { 2404 distributeComments(Comments, FormatTok); 2405 Comments.clear(); 2406 return; 2407 } 2408 2409 Comments.push_back(FormatTok); 2410 } while (!eof()); 2411 2412 distributeComments(Comments, nullptr); 2413 Comments.clear(); 2414 } 2415 2416 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2417 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2418 if (MustBreakBeforeNextToken) { 2419 Line->Tokens.back().Tok->MustBreakBefore = true; 2420 MustBreakBeforeNextToken = false; 2421 } 2422 } 2423 2424 } // end namespace format 2425 } // end namespace clang 2426