1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 static bool isLineComment(const FormatToken &FormatTok) { 59 return FormatTok.is(tok::comment) && 60 FormatTok.TokenText.startswith("//"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 TokenSource = this; 86 Line.Level = 0; 87 Line.InPPDirective = true; 88 } 89 90 ~ScopedMacroState() override { 91 TokenSource = PreviousTokenSource; 92 ResetToken = Token; 93 Line.InPPDirective = false; 94 Line.Level = PreviousLineLevel; 95 } 96 97 FormatToken *getNextToken() override { 98 // The \c UnwrappedLineParser guards against this by never calling 99 // \c getNextToken() after it has encountered the first eof token. 100 assert(!eof()); 101 PreviousToken = Token; 102 Token = PreviousTokenSource->getNextToken(); 103 if (eof()) 104 return getFakeEOF(); 105 return Token; 106 } 107 108 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 109 110 FormatToken *setPosition(unsigned Position) override { 111 PreviousToken = nullptr; 112 Token = PreviousTokenSource->setPosition(Position); 113 return Token; 114 } 115 116 private: 117 bool eof() { 118 return Token && Token->HasUnescapedNewline && 119 !continuesLineComment(*Token, PreviousToken, 120 /*MinColumnToken=*/PreviousToken); 121 } 122 123 FormatToken *getFakeEOF() { 124 static bool EOFInitialized = false; 125 static FormatToken FormatTok; 126 if (!EOFInitialized) { 127 FormatTok.Tok.startToken(); 128 FormatTok.Tok.setKind(tok::eof); 129 EOFInitialized = true; 130 } 131 return &FormatTok; 132 } 133 134 UnwrappedLine &Line; 135 FormatTokenSource *&TokenSource; 136 FormatToken *&ResetToken; 137 unsigned PreviousLineLevel; 138 FormatTokenSource *PreviousTokenSource; 139 140 FormatToken *Token; 141 FormatToken *PreviousToken; 142 }; 143 144 } // end anonymous namespace 145 146 class ScopedLineState { 147 public: 148 ScopedLineState(UnwrappedLineParser &Parser, 149 bool SwitchToPreprocessorLines = false) 150 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 151 if (SwitchToPreprocessorLines) 152 Parser.CurrentLines = &Parser.PreprocessorDirectives; 153 else if (!Parser.Line->Tokens.empty()) 154 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 155 PreBlockLine = std::move(Parser.Line); 156 Parser.Line = llvm::make_unique<UnwrappedLine>(); 157 Parser.Line->Level = PreBlockLine->Level; 158 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 159 } 160 161 ~ScopedLineState() { 162 if (!Parser.Line->Tokens.empty()) { 163 Parser.addUnwrappedLine(); 164 } 165 assert(Parser.Line->Tokens.empty()); 166 Parser.Line = std::move(PreBlockLine); 167 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 168 Parser.MustBreakBeforeNextToken = true; 169 Parser.CurrentLines = OriginalLines; 170 } 171 172 private: 173 UnwrappedLineParser &Parser; 174 175 std::unique_ptr<UnwrappedLine> PreBlockLine; 176 SmallVectorImpl<UnwrappedLine> *OriginalLines; 177 }; 178 179 class CompoundStatementIndenter { 180 public: 181 CompoundStatementIndenter(UnwrappedLineParser *Parser, 182 const FormatStyle &Style, unsigned &LineLevel) 183 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 184 if (Style.BraceWrapping.AfterControlStatement) 185 Parser->addUnwrappedLine(); 186 if (Style.BraceWrapping.IndentBraces) 187 ++LineLevel; 188 } 189 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 190 191 private: 192 unsigned &LineLevel; 193 unsigned OldLineLevel; 194 }; 195 196 namespace { 197 198 class IndexedTokenSource : public FormatTokenSource { 199 public: 200 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 201 : Tokens(Tokens), Position(-1) {} 202 203 FormatToken *getNextToken() override { 204 ++Position; 205 return Tokens[Position]; 206 } 207 208 unsigned getPosition() override { 209 assert(Position >= 0); 210 return Position; 211 } 212 213 FormatToken *setPosition(unsigned P) override { 214 Position = P; 215 return Tokens[Position]; 216 } 217 218 void reset() { Position = -1; } 219 220 private: 221 ArrayRef<FormatToken *> Tokens; 222 int Position; 223 }; 224 225 } // end anonymous namespace 226 227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 228 const AdditionalKeywords &Keywords, 229 ArrayRef<FormatToken *> Tokens, 230 UnwrappedLineConsumer &Callback) 231 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 232 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 233 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 234 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} 235 236 void UnwrappedLineParser::reset() { 237 PPBranchLevel = -1; 238 Line.reset(new UnwrappedLine); 239 CommentsBeforeNextToken.clear(); 240 FormatTok = nullptr; 241 MustBreakBeforeNextToken = false; 242 PreprocessorDirectives.clear(); 243 CurrentLines = &Lines; 244 DeclarationScopeStack.clear(); 245 PPStack.clear(); 246 } 247 248 void UnwrappedLineParser::parse() { 249 IndexedTokenSource TokenSource(AllTokens); 250 do { 251 DEBUG(llvm::dbgs() << "----\n"); 252 reset(); 253 Tokens = &TokenSource; 254 TokenSource.reset(); 255 256 readToken(); 257 parseFile(); 258 // Create line with eof token. 259 pushToken(FormatTok); 260 addUnwrappedLine(); 261 262 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 263 E = Lines.end(); 264 I != E; ++I) { 265 Callback.consumeUnwrappedLine(*I); 266 } 267 Callback.finishRun(); 268 Lines.clear(); 269 while (!PPLevelBranchIndex.empty() && 270 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 271 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 272 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 273 } 274 if (!PPLevelBranchIndex.empty()) { 275 ++PPLevelBranchIndex.back(); 276 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 277 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 278 } 279 } while (!PPLevelBranchIndex.empty()); 280 } 281 282 void UnwrappedLineParser::parseFile() { 283 // The top-level context in a file always has declarations, except for pre- 284 // processor directives and JavaScript files. 285 bool MustBeDeclaration = 286 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 287 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 288 MustBeDeclaration); 289 parseLevel(/*HasOpeningBrace=*/false); 290 // Make sure to format the remaining tokens. 291 flushComments(true); 292 addUnwrappedLine(); 293 } 294 295 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 296 bool SwitchLabelEncountered = false; 297 do { 298 tok::TokenKind kind = FormatTok->Tok.getKind(); 299 if (FormatTok->Type == TT_MacroBlockBegin) { 300 kind = tok::l_brace; 301 } else if (FormatTok->Type == TT_MacroBlockEnd) { 302 kind = tok::r_brace; 303 } 304 305 switch (kind) { 306 case tok::comment: 307 nextToken(); 308 addUnwrappedLine(); 309 break; 310 case tok::l_brace: 311 // FIXME: Add parameter whether this can happen - if this happens, we must 312 // be in a non-declaration context. 313 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 314 continue; 315 parseBlock(/*MustBeDeclaration=*/false); 316 addUnwrappedLine(); 317 break; 318 case tok::r_brace: 319 if (HasOpeningBrace) 320 return; 321 nextToken(); 322 addUnwrappedLine(); 323 break; 324 case tok::kw_default: 325 case tok::kw_case: 326 if (!SwitchLabelEncountered && 327 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 328 ++Line->Level; 329 SwitchLabelEncountered = true; 330 parseStructuralElement(); 331 break; 332 default: 333 parseStructuralElement(); 334 break; 335 } 336 } while (!eof()); 337 } 338 339 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 340 // We'll parse forward through the tokens until we hit 341 // a closing brace or eof - note that getNextToken() will 342 // parse macros, so this will magically work inside macro 343 // definitions, too. 344 unsigned StoredPosition = Tokens->getPosition(); 345 FormatToken *Tok = FormatTok; 346 const FormatToken *PrevTok = getPreviousToken(); 347 // Keep a stack of positions of lbrace tokens. We will 348 // update information about whether an lbrace starts a 349 // braced init list or a different block during the loop. 350 SmallVector<FormatToken *, 8> LBraceStack; 351 assert(Tok->Tok.is(tok::l_brace)); 352 do { 353 // Get next non-comment token. 354 FormatToken *NextTok; 355 unsigned ReadTokens = 0; 356 do { 357 NextTok = Tokens->getNextToken(); 358 ++ReadTokens; 359 } while (NextTok->is(tok::comment)); 360 361 switch (Tok->Tok.getKind()) { 362 case tok::l_brace: 363 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 364 if (PrevTok->is(tok::colon)) 365 // A colon indicates this code is in a type, or a braced list 366 // following a label in an object literal ({a: {b: 1}}). The code 367 // below could be confused by semicolons between the individual 368 // members in a type member list, which would normally trigger 369 // BK_Block. In both cases, this must be parsed as an inline braced 370 // init. 371 Tok->BlockKind = BK_BracedInit; 372 else if (PrevTok->is(tok::r_paren)) 373 // `) { }` can only occur in function or method declarations in JS. 374 Tok->BlockKind = BK_Block; 375 } else { 376 Tok->BlockKind = BK_Unknown; 377 } 378 LBraceStack.push_back(Tok); 379 break; 380 case tok::r_brace: 381 if (LBraceStack.empty()) 382 break; 383 if (LBraceStack.back()->BlockKind == BK_Unknown) { 384 bool ProbablyBracedList = false; 385 if (Style.Language == FormatStyle::LK_Proto) { 386 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 387 } else { 388 // Using OriginalColumn to distinguish between ObjC methods and 389 // binary operators is a bit hacky. 390 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 391 NextTok->OriginalColumn == 0; 392 393 // If there is a comma, semicolon or right paren after the closing 394 // brace, we assume this is a braced initializer list. Note that 395 // regardless how we mark inner braces here, we will overwrite the 396 // BlockKind later if we parse a braced list (where all blocks 397 // inside are by default braced lists), or when we explicitly detect 398 // blocks (for example while parsing lambdas). 399 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 400 // braced list in JS. 401 ProbablyBracedList = 402 (Style.Language == FormatStyle::LK_JavaScript && 403 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 404 Keywords.kw_as)) || 405 (Style.isCpp() && NextTok->is(tok::l_paren)) || 406 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 407 tok::r_paren, tok::r_square, tok::l_brace, 408 tok::l_square, tok::ellipsis) || 409 (NextTok->is(tok::identifier) && 410 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 411 (NextTok->is(tok::semi) && 412 (!ExpectClassBody || LBraceStack.size() != 1)) || 413 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 414 } 415 if (ProbablyBracedList) { 416 Tok->BlockKind = BK_BracedInit; 417 LBraceStack.back()->BlockKind = BK_BracedInit; 418 } else { 419 Tok->BlockKind = BK_Block; 420 LBraceStack.back()->BlockKind = BK_Block; 421 } 422 } 423 LBraceStack.pop_back(); 424 break; 425 case tok::at: 426 case tok::semi: 427 case tok::kw_if: 428 case tok::kw_while: 429 case tok::kw_for: 430 case tok::kw_switch: 431 case tok::kw_try: 432 case tok::kw___try: 433 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 434 LBraceStack.back()->BlockKind = BK_Block; 435 break; 436 default: 437 break; 438 } 439 PrevTok = Tok; 440 Tok = NextTok; 441 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 442 443 // Assume other blocks for all unclosed opening braces. 444 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 445 if (LBraceStack[i]->BlockKind == BK_Unknown) 446 LBraceStack[i]->BlockKind = BK_Block; 447 } 448 449 FormatTok = Tokens->setPosition(StoredPosition); 450 } 451 452 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 453 bool MunchSemi) { 454 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 455 "'{' or macro block token expected"); 456 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 457 FormatTok->BlockKind = BK_Block; 458 459 unsigned InitialLevel = Line->Level; 460 nextToken(); 461 462 if (MacroBlock && FormatTok->is(tok::l_paren)) 463 parseParens(); 464 465 addUnwrappedLine(); 466 size_t OpeningLineIndex = CurrentLines->empty() 467 ? (UnwrappedLine::kInvalidIndex) 468 : (CurrentLines->size() - 1); 469 470 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 471 MustBeDeclaration); 472 if (AddLevel) 473 ++Line->Level; 474 parseLevel(/*HasOpeningBrace=*/true); 475 476 if (eof()) 477 return; 478 479 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 480 : !FormatTok->is(tok::r_brace)) { 481 Line->Level = InitialLevel; 482 FormatTok->BlockKind = BK_Block; 483 return; 484 } 485 486 nextToken(); // Munch the closing brace. 487 488 if (MacroBlock && FormatTok->is(tok::l_paren)) 489 parseParens(); 490 491 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 492 nextToken(); 493 Line->Level = InitialLevel; 494 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 495 } 496 497 static bool isGoogScope(const UnwrappedLine &Line) { 498 // FIXME: Closure-library specific stuff should not be hard-coded but be 499 // configurable. 500 if (Line.Tokens.size() < 4) 501 return false; 502 auto I = Line.Tokens.begin(); 503 if (I->Tok->TokenText != "goog") 504 return false; 505 ++I; 506 if (I->Tok->isNot(tok::period)) 507 return false; 508 ++I; 509 if (I->Tok->TokenText != "scope") 510 return false; 511 ++I; 512 return I->Tok->is(tok::l_paren); 513 } 514 515 static bool isIIFE(const UnwrappedLine &Line, 516 const AdditionalKeywords &Keywords) { 517 // Look for the start of an immediately invoked anonymous function. 518 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 519 // This is commonly done in JavaScript to create a new, anonymous scope. 520 // Example: (function() { ... })() 521 if (Line.Tokens.size() < 3) 522 return false; 523 auto I = Line.Tokens.begin(); 524 if (I->Tok->isNot(tok::l_paren)) 525 return false; 526 ++I; 527 if (I->Tok->isNot(Keywords.kw_function)) 528 return false; 529 ++I; 530 return I->Tok->is(tok::l_paren); 531 } 532 533 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 534 const FormatToken &InitialToken) { 535 if (InitialToken.is(tok::kw_namespace)) 536 return Style.BraceWrapping.AfterNamespace; 537 if (InitialToken.is(tok::kw_class)) 538 return Style.BraceWrapping.AfterClass; 539 if (InitialToken.is(tok::kw_union)) 540 return Style.BraceWrapping.AfterUnion; 541 if (InitialToken.is(tok::kw_struct)) 542 return Style.BraceWrapping.AfterStruct; 543 return false; 544 } 545 546 void UnwrappedLineParser::parseChildBlock() { 547 FormatTok->BlockKind = BK_Block; 548 nextToken(); 549 { 550 bool SkipIndent = 551 (Style.Language == FormatStyle::LK_JavaScript && 552 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 553 ScopedLineState LineState(*this); 554 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 555 /*MustBeDeclaration=*/false); 556 Line->Level += SkipIndent ? 0 : 1; 557 parseLevel(/*HasOpeningBrace=*/true); 558 flushComments(isOnNewLine(*FormatTok)); 559 Line->Level -= SkipIndent ? 0 : 1; 560 } 561 nextToken(); 562 } 563 564 void UnwrappedLineParser::parsePPDirective() { 565 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 566 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 567 nextToken(); 568 569 if (!FormatTok->Tok.getIdentifierInfo()) { 570 parsePPUnknown(); 571 return; 572 } 573 574 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 575 case tok::pp_define: 576 parsePPDefine(); 577 return; 578 case tok::pp_if: 579 parsePPIf(/*IfDef=*/false); 580 break; 581 case tok::pp_ifdef: 582 case tok::pp_ifndef: 583 parsePPIf(/*IfDef=*/true); 584 break; 585 case tok::pp_else: 586 parsePPElse(); 587 break; 588 case tok::pp_elif: 589 parsePPElIf(); 590 break; 591 case tok::pp_endif: 592 parsePPEndIf(); 593 break; 594 default: 595 parsePPUnknown(); 596 break; 597 } 598 } 599 600 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 601 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 602 PPStack.push_back(PP_Unreachable); 603 else 604 PPStack.push_back(PP_Conditional); 605 } 606 607 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 608 ++PPBranchLevel; 609 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 610 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 611 PPLevelBranchIndex.push_back(0); 612 PPLevelBranchCount.push_back(0); 613 } 614 PPChainBranchIndex.push(0); 615 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 616 conditionalCompilationCondition(Unreachable || Skip); 617 } 618 619 void UnwrappedLineParser::conditionalCompilationAlternative() { 620 if (!PPStack.empty()) 621 PPStack.pop_back(); 622 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 623 if (!PPChainBranchIndex.empty()) 624 ++PPChainBranchIndex.top(); 625 conditionalCompilationCondition( 626 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 627 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 628 } 629 630 void UnwrappedLineParser::conditionalCompilationEnd() { 631 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 632 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 633 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 634 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 635 } 636 } 637 // Guard against #endif's without #if. 638 if (PPBranchLevel > 0) 639 --PPBranchLevel; 640 if (!PPChainBranchIndex.empty()) 641 PPChainBranchIndex.pop(); 642 if (!PPStack.empty()) 643 PPStack.pop_back(); 644 } 645 646 void UnwrappedLineParser::parsePPIf(bool IfDef) { 647 bool IfNDef = FormatTok->is(tok::pp_ifndef); 648 nextToken(); 649 bool Unreachable = false; 650 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 651 Unreachable = true; 652 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 653 Unreachable = true; 654 conditionalCompilationStart(Unreachable); 655 parsePPUnknown(); 656 } 657 658 void UnwrappedLineParser::parsePPElse() { 659 conditionalCompilationAlternative(); 660 parsePPUnknown(); 661 } 662 663 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 664 665 void UnwrappedLineParser::parsePPEndIf() { 666 conditionalCompilationEnd(); 667 parsePPUnknown(); 668 } 669 670 void UnwrappedLineParser::parsePPDefine() { 671 nextToken(); 672 673 if (FormatTok->Tok.getKind() != tok::identifier) { 674 parsePPUnknown(); 675 return; 676 } 677 nextToken(); 678 if (FormatTok->Tok.getKind() == tok::l_paren && 679 FormatTok->WhitespaceRange.getBegin() == 680 FormatTok->WhitespaceRange.getEnd()) { 681 parseParens(); 682 } 683 addUnwrappedLine(); 684 Line->Level = 1; 685 686 // Errors during a preprocessor directive can only affect the layout of the 687 // preprocessor directive, and thus we ignore them. An alternative approach 688 // would be to use the same approach we use on the file level (no 689 // re-indentation if there was a structural error) within the macro 690 // definition. 691 parseFile(); 692 } 693 694 void UnwrappedLineParser::parsePPUnknown() { 695 do { 696 nextToken(); 697 } while (!eof()); 698 addUnwrappedLine(); 699 } 700 701 // Here we blacklist certain tokens that are not usually the first token in an 702 // unwrapped line. This is used in attempt to distinguish macro calls without 703 // trailing semicolons from other constructs split to several lines. 704 static bool tokenCanStartNewLine(const clang::Token &Tok) { 705 // Semicolon can be a null-statement, l_square can be a start of a macro or 706 // a C++11 attribute, but this doesn't seem to be common. 707 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 708 Tok.isNot(tok::l_square) && 709 // Tokens that can only be used as binary operators and a part of 710 // overloaded operator names. 711 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 712 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 713 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 714 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 715 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 716 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 717 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 718 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 719 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 720 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 721 Tok.isNot(tok::lesslessequal) && 722 // Colon is used in labels, base class lists, initializer lists, 723 // range-based for loops, ternary operator, but should never be the 724 // first token in an unwrapped line. 725 Tok.isNot(tok::colon) && 726 // 'noexcept' is a trailing annotation. 727 Tok.isNot(tok::kw_noexcept); 728 } 729 730 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 731 const FormatToken *FormatTok) { 732 // FIXME: This returns true for C/C++ keywords like 'struct'. 733 return FormatTok->is(tok::identifier) && 734 (FormatTok->Tok.getIdentifierInfo() == nullptr || 735 !FormatTok->isOneOf( 736 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 737 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 738 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 739 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 740 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 741 Keywords.kw_instanceof, Keywords.kw_interface, 742 Keywords.kw_throws)); 743 } 744 745 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 746 const FormatToken *FormatTok) { 747 return FormatTok->Tok.isLiteral() || 748 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 749 mustBeJSIdent(Keywords, FormatTok); 750 } 751 752 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 753 // when encountered after a value (see mustBeJSIdentOrValue). 754 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 755 const FormatToken *FormatTok) { 756 return FormatTok->isOneOf( 757 tok::kw_return, Keywords.kw_yield, 758 // conditionals 759 tok::kw_if, tok::kw_else, 760 // loops 761 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 762 // switch/case 763 tok::kw_switch, tok::kw_case, 764 // exceptions 765 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 766 // declaration 767 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 768 Keywords.kw_async, Keywords.kw_function, 769 // import/export 770 Keywords.kw_import, tok::kw_export); 771 } 772 773 // readTokenWithJavaScriptASI reads the next token and terminates the current 774 // line if JavaScript Automatic Semicolon Insertion must 775 // happen between the current token and the next token. 776 // 777 // This method is conservative - it cannot cover all edge cases of JavaScript, 778 // but only aims to correctly handle certain well known cases. It *must not* 779 // return true in speculative cases. 780 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 781 FormatToken *Previous = FormatTok; 782 readToken(); 783 FormatToken *Next = FormatTok; 784 785 bool IsOnSameLine = 786 CommentsBeforeNextToken.empty() 787 ? Next->NewlinesBefore == 0 788 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 789 if (IsOnSameLine) 790 return; 791 792 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 793 bool PreviousStartsTemplateExpr = 794 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 795 if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { 796 // If the token before the previous one is an '@', the previous token is an 797 // annotation and can precede another identifier/value. 798 const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; 799 if (PrePrevious->is(tok::at)) 800 return; 801 } 802 if (Next->is(tok::exclaim) && PreviousMustBeValue) 803 return addUnwrappedLine(); 804 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 805 bool NextEndsTemplateExpr = 806 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 807 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 808 (PreviousMustBeValue || 809 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 810 tok::minusminus))) 811 return addUnwrappedLine(); 812 if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) 813 return addUnwrappedLine(); 814 } 815 816 void UnwrappedLineParser::parseStructuralElement() { 817 assert(!FormatTok->is(tok::l_brace)); 818 if (Style.Language == FormatStyle::LK_TableGen && 819 FormatTok->is(tok::pp_include)) { 820 nextToken(); 821 if (FormatTok->is(tok::string_literal)) 822 nextToken(); 823 addUnwrappedLine(); 824 return; 825 } 826 switch (FormatTok->Tok.getKind()) { 827 case tok::at: 828 nextToken(); 829 if (FormatTok->Tok.is(tok::l_brace)) { 830 parseBracedList(); 831 break; 832 } 833 switch (FormatTok->Tok.getObjCKeywordID()) { 834 case tok::objc_public: 835 case tok::objc_protected: 836 case tok::objc_package: 837 case tok::objc_private: 838 return parseAccessSpecifier(); 839 case tok::objc_interface: 840 case tok::objc_implementation: 841 return parseObjCInterfaceOrImplementation(); 842 case tok::objc_protocol: 843 return parseObjCProtocol(); 844 case tok::objc_end: 845 return; // Handled by the caller. 846 case tok::objc_optional: 847 case tok::objc_required: 848 nextToken(); 849 addUnwrappedLine(); 850 return; 851 case tok::objc_autoreleasepool: 852 nextToken(); 853 if (FormatTok->Tok.is(tok::l_brace)) { 854 if (Style.BraceWrapping.AfterObjCDeclaration) 855 addUnwrappedLine(); 856 parseBlock(/*MustBeDeclaration=*/false); 857 } 858 addUnwrappedLine(); 859 return; 860 case tok::objc_try: 861 // This branch isn't strictly necessary (the kw_try case below would 862 // do this too after the tok::at is parsed above). But be explicit. 863 parseTryCatch(); 864 return; 865 default: 866 break; 867 } 868 break; 869 case tok::kw_asm: 870 nextToken(); 871 if (FormatTok->is(tok::l_brace)) { 872 FormatTok->Type = TT_InlineASMBrace; 873 nextToken(); 874 while (FormatTok && FormatTok->isNot(tok::eof)) { 875 if (FormatTok->is(tok::r_brace)) { 876 FormatTok->Type = TT_InlineASMBrace; 877 nextToken(); 878 addUnwrappedLine(); 879 break; 880 } 881 FormatTok->Finalized = true; 882 nextToken(); 883 } 884 } 885 break; 886 case tok::kw_namespace: 887 parseNamespace(); 888 return; 889 case tok::kw_inline: 890 nextToken(); 891 if (FormatTok->Tok.is(tok::kw_namespace)) { 892 parseNamespace(); 893 return; 894 } 895 break; 896 case tok::kw_public: 897 case tok::kw_protected: 898 case tok::kw_private: 899 if (Style.Language == FormatStyle::LK_Java || 900 Style.Language == FormatStyle::LK_JavaScript) 901 nextToken(); 902 else 903 parseAccessSpecifier(); 904 return; 905 case tok::kw_if: 906 parseIfThenElse(); 907 return; 908 case tok::kw_for: 909 case tok::kw_while: 910 parseForOrWhileLoop(); 911 return; 912 case tok::kw_do: 913 parseDoWhile(); 914 return; 915 case tok::kw_switch: 916 parseSwitch(); 917 return; 918 case tok::kw_default: 919 nextToken(); 920 parseLabel(); 921 return; 922 case tok::kw_case: 923 parseCaseLabel(); 924 return; 925 case tok::kw_try: 926 case tok::kw___try: 927 parseTryCatch(); 928 return; 929 case tok::kw_extern: 930 nextToken(); 931 if (FormatTok->Tok.is(tok::string_literal)) { 932 nextToken(); 933 if (FormatTok->Tok.is(tok::l_brace)) { 934 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 935 addUnwrappedLine(); 936 return; 937 } 938 } 939 break; 940 case tok::kw_export: 941 if (Style.Language == FormatStyle::LK_JavaScript) { 942 parseJavaScriptEs6ImportExport(); 943 return; 944 } 945 break; 946 case tok::identifier: 947 if (FormatTok->is(TT_ForEachMacro)) { 948 parseForOrWhileLoop(); 949 return; 950 } 951 if (FormatTok->is(TT_MacroBlockBegin)) { 952 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 953 /*MunchSemi=*/false); 954 return; 955 } 956 if (FormatTok->is(Keywords.kw_import)) { 957 if (Style.Language == FormatStyle::LK_JavaScript) { 958 parseJavaScriptEs6ImportExport(); 959 return; 960 } 961 if (Style.Language == FormatStyle::LK_Proto) { 962 nextToken(); 963 if (FormatTok->is(tok::kw_public)) 964 nextToken(); 965 if (!FormatTok->is(tok::string_literal)) 966 return; 967 nextToken(); 968 if (FormatTok->is(tok::semi)) 969 nextToken(); 970 addUnwrappedLine(); 971 return; 972 } 973 } 974 if (Style.isCpp() && 975 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 976 Keywords.kw_slots, Keywords.kw_qslots)) { 977 nextToken(); 978 if (FormatTok->is(tok::colon)) { 979 nextToken(); 980 addUnwrappedLine(); 981 return; 982 } 983 } 984 // In all other cases, parse the declaration. 985 break; 986 default: 987 break; 988 } 989 do { 990 const FormatToken *Previous = getPreviousToken(); 991 switch (FormatTok->Tok.getKind()) { 992 case tok::at: 993 nextToken(); 994 if (FormatTok->Tok.is(tok::l_brace)) 995 parseBracedList(); 996 break; 997 case tok::kw_enum: 998 // Ignore if this is part of "template <enum ...". 999 if (Previous && Previous->is(tok::less)) { 1000 nextToken(); 1001 break; 1002 } 1003 1004 // parseEnum falls through and does not yet add an unwrapped line as an 1005 // enum definition can start a structural element. 1006 if (!parseEnum()) 1007 break; 1008 // This only applies for C++. 1009 if (!Style.isCpp()) { 1010 addUnwrappedLine(); 1011 return; 1012 } 1013 break; 1014 case tok::kw_typedef: 1015 nextToken(); 1016 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1017 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1018 parseEnum(); 1019 break; 1020 case tok::kw_struct: 1021 case tok::kw_union: 1022 case tok::kw_class: 1023 // parseRecord falls through and does not yet add an unwrapped line as a 1024 // record declaration or definition can start a structural element. 1025 parseRecord(); 1026 // This does not apply for Java and JavaScript. 1027 if (Style.Language == FormatStyle::LK_Java || 1028 Style.Language == FormatStyle::LK_JavaScript) { 1029 if (FormatTok->is(tok::semi)) 1030 nextToken(); 1031 addUnwrappedLine(); 1032 return; 1033 } 1034 break; 1035 case tok::period: 1036 nextToken(); 1037 // In Java, classes have an implicit static member "class". 1038 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1039 FormatTok->is(tok::kw_class)) 1040 nextToken(); 1041 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1042 FormatTok->Tok.getIdentifierInfo()) 1043 // JavaScript only has pseudo keywords, all keywords are allowed to 1044 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1045 nextToken(); 1046 break; 1047 case tok::semi: 1048 nextToken(); 1049 addUnwrappedLine(); 1050 return; 1051 case tok::r_brace: 1052 addUnwrappedLine(); 1053 return; 1054 case tok::l_paren: 1055 parseParens(); 1056 break; 1057 case tok::kw_operator: 1058 nextToken(); 1059 if (FormatTok->isBinaryOperator()) 1060 nextToken(); 1061 break; 1062 case tok::caret: 1063 nextToken(); 1064 if (FormatTok->Tok.isAnyIdentifier() || 1065 FormatTok->isSimpleTypeSpecifier()) 1066 nextToken(); 1067 if (FormatTok->is(tok::l_paren)) 1068 parseParens(); 1069 if (FormatTok->is(tok::l_brace)) 1070 parseChildBlock(); 1071 break; 1072 case tok::l_brace: 1073 if (!tryToParseBracedList()) { 1074 // A block outside of parentheses must be the last part of a 1075 // structural element. 1076 // FIXME: Figure out cases where this is not true, and add projections 1077 // for them (the one we know is missing are lambdas). 1078 if (Style.BraceWrapping.AfterFunction) 1079 addUnwrappedLine(); 1080 FormatTok->Type = TT_FunctionLBrace; 1081 parseBlock(/*MustBeDeclaration=*/false); 1082 addUnwrappedLine(); 1083 return; 1084 } 1085 // Otherwise this was a braced init list, and the structural 1086 // element continues. 1087 break; 1088 case tok::kw_try: 1089 // We arrive here when parsing function-try blocks. 1090 parseTryCatch(); 1091 return; 1092 case tok::identifier: { 1093 if (FormatTok->is(TT_MacroBlockEnd)) { 1094 addUnwrappedLine(); 1095 return; 1096 } 1097 1098 // Function declarations (as opposed to function expressions) are parsed 1099 // on their own unwrapped line by continuing this loop. Function 1100 // expressions (functions that are not on their own line) must not create 1101 // a new unwrapped line, so they are special cased below. 1102 size_t TokenCount = Line->Tokens.size(); 1103 if (Style.Language == FormatStyle::LK_JavaScript && 1104 FormatTok->is(Keywords.kw_function) && 1105 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1106 Keywords.kw_async)))) { 1107 tryToParseJSFunction(); 1108 break; 1109 } 1110 if ((Style.Language == FormatStyle::LK_JavaScript || 1111 Style.Language == FormatStyle::LK_Java) && 1112 FormatTok->is(Keywords.kw_interface)) { 1113 if (Style.Language == FormatStyle::LK_JavaScript) { 1114 // In JavaScript/TypeScript, "interface" can be used as a standalone 1115 // identifier, e.g. in `var interface = 1;`. If "interface" is 1116 // followed by another identifier, it is very like to be an actual 1117 // interface declaration. 1118 unsigned StoredPosition = Tokens->getPosition(); 1119 FormatToken *Next = Tokens->getNextToken(); 1120 FormatTok = Tokens->setPosition(StoredPosition); 1121 if (Next && !mustBeJSIdent(Keywords, Next)) { 1122 nextToken(); 1123 break; 1124 } 1125 } 1126 parseRecord(); 1127 addUnwrappedLine(); 1128 return; 1129 } 1130 1131 // See if the following token should start a new unwrapped line. 1132 StringRef Text = FormatTok->TokenText; 1133 nextToken(); 1134 if (Line->Tokens.size() == 1 && 1135 // JS doesn't have macros, and within classes colons indicate fields, 1136 // not labels. 1137 Style.Language != FormatStyle::LK_JavaScript) { 1138 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1139 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1140 parseLabel(); 1141 return; 1142 } 1143 // Recognize function-like macro usages without trailing semicolon as 1144 // well as free-standing macros like Q_OBJECT. 1145 bool FunctionLike = FormatTok->is(tok::l_paren); 1146 if (FunctionLike) 1147 parseParens(); 1148 1149 bool FollowedByNewline = 1150 CommentsBeforeNextToken.empty() 1151 ? FormatTok->NewlinesBefore > 0 1152 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1153 1154 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1155 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1156 addUnwrappedLine(); 1157 return; 1158 } 1159 } 1160 break; 1161 } 1162 case tok::equal: 1163 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1164 // TT_JsFatArrow. The always start an expression or a child block if 1165 // followed by a curly. 1166 if (FormatTok->is(TT_JsFatArrow)) { 1167 nextToken(); 1168 if (FormatTok->is(tok::l_brace)) 1169 parseChildBlock(); 1170 break; 1171 } 1172 1173 nextToken(); 1174 if (FormatTok->Tok.is(tok::l_brace)) { 1175 parseBracedList(); 1176 } 1177 break; 1178 case tok::l_square: 1179 parseSquare(); 1180 break; 1181 case tok::kw_new: 1182 parseNew(); 1183 break; 1184 default: 1185 nextToken(); 1186 break; 1187 } 1188 } while (!eof()); 1189 } 1190 1191 bool UnwrappedLineParser::tryToParseLambda() { 1192 if (!Style.isCpp()) { 1193 nextToken(); 1194 return false; 1195 } 1196 const FormatToken* Previous = getPreviousToken(); 1197 if (Previous && 1198 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1199 tok::kw_delete) || 1200 Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { 1201 nextToken(); 1202 return false; 1203 } 1204 assert(FormatTok->is(tok::l_square)); 1205 FormatToken &LSquare = *FormatTok; 1206 if (!tryToParseLambdaIntroducer()) 1207 return false; 1208 1209 while (FormatTok->isNot(tok::l_brace)) { 1210 if (FormatTok->isSimpleTypeSpecifier()) { 1211 nextToken(); 1212 continue; 1213 } 1214 switch (FormatTok->Tok.getKind()) { 1215 case tok::l_brace: 1216 break; 1217 case tok::l_paren: 1218 parseParens(); 1219 break; 1220 case tok::amp: 1221 case tok::star: 1222 case tok::kw_const: 1223 case tok::comma: 1224 case tok::less: 1225 case tok::greater: 1226 case tok::identifier: 1227 case tok::numeric_constant: 1228 case tok::coloncolon: 1229 case tok::kw_mutable: 1230 nextToken(); 1231 break; 1232 case tok::arrow: 1233 FormatTok->Type = TT_LambdaArrow; 1234 nextToken(); 1235 break; 1236 default: 1237 return true; 1238 } 1239 } 1240 LSquare.Type = TT_LambdaLSquare; 1241 parseChildBlock(); 1242 return true; 1243 } 1244 1245 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1246 nextToken(); 1247 if (FormatTok->is(tok::equal)) { 1248 nextToken(); 1249 if (FormatTok->is(tok::r_square)) { 1250 nextToken(); 1251 return true; 1252 } 1253 if (FormatTok->isNot(tok::comma)) 1254 return false; 1255 nextToken(); 1256 } else if (FormatTok->is(tok::amp)) { 1257 nextToken(); 1258 if (FormatTok->is(tok::r_square)) { 1259 nextToken(); 1260 return true; 1261 } 1262 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 1263 return false; 1264 } 1265 if (FormatTok->is(tok::comma)) 1266 nextToken(); 1267 } else if (FormatTok->is(tok::r_square)) { 1268 nextToken(); 1269 return true; 1270 } 1271 do { 1272 if (FormatTok->is(tok::amp)) 1273 nextToken(); 1274 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 1275 return false; 1276 nextToken(); 1277 if (FormatTok->is(tok::ellipsis)) 1278 nextToken(); 1279 if (FormatTok->is(tok::comma)) { 1280 nextToken(); 1281 } else if (FormatTok->is(tok::r_square)) { 1282 nextToken(); 1283 return true; 1284 } else { 1285 return false; 1286 } 1287 } while (!eof()); 1288 return false; 1289 } 1290 1291 void UnwrappedLineParser::tryToParseJSFunction() { 1292 assert(FormatTok->is(Keywords.kw_function) || 1293 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1294 if (FormatTok->is(Keywords.kw_async)) 1295 nextToken(); 1296 // Consume "function". 1297 nextToken(); 1298 1299 // Consume * (generator function). Treat it like C++'s overloaded operators. 1300 if (FormatTok->is(tok::star)) { 1301 FormatTok->Type = TT_OverloadedOperator; 1302 nextToken(); 1303 } 1304 1305 // Consume function name. 1306 if (FormatTok->is(tok::identifier)) 1307 nextToken(); 1308 1309 if (FormatTok->isNot(tok::l_paren)) 1310 return; 1311 1312 // Parse formal parameter list. 1313 parseParens(); 1314 1315 if (FormatTok->is(tok::colon)) { 1316 // Parse a type definition. 1317 nextToken(); 1318 1319 // Eat the type declaration. For braced inline object types, balance braces, 1320 // otherwise just parse until finding an l_brace for the function body. 1321 if (FormatTok->is(tok::l_brace)) 1322 tryToParseBracedList(); 1323 else 1324 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1325 nextToken(); 1326 } 1327 1328 if (FormatTok->is(tok::semi)) 1329 return; 1330 1331 parseChildBlock(); 1332 } 1333 1334 bool UnwrappedLineParser::tryToParseBracedList() { 1335 if (FormatTok->BlockKind == BK_Unknown) 1336 calculateBraceTypes(); 1337 assert(FormatTok->BlockKind != BK_Unknown); 1338 if (FormatTok->BlockKind == BK_Block) 1339 return false; 1340 parseBracedList(); 1341 return true; 1342 } 1343 1344 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { 1345 bool HasError = false; 1346 nextToken(); 1347 1348 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1349 // replace this by using parseAssigmentExpression() inside. 1350 do { 1351 if (Style.Language == FormatStyle::LK_JavaScript) { 1352 if (FormatTok->is(Keywords.kw_function) || 1353 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1354 tryToParseJSFunction(); 1355 continue; 1356 } 1357 if (FormatTok->is(TT_JsFatArrow)) { 1358 nextToken(); 1359 // Fat arrows can be followed by simple expressions or by child blocks 1360 // in curly braces. 1361 if (FormatTok->is(tok::l_brace)) { 1362 parseChildBlock(); 1363 continue; 1364 } 1365 } 1366 if (FormatTok->is(tok::l_brace)) { 1367 // Could be a method inside of a braced list `{a() { return 1; }}`. 1368 if (tryToParseBracedList()) 1369 continue; 1370 parseChildBlock(); 1371 } 1372 } 1373 switch (FormatTok->Tok.getKind()) { 1374 case tok::caret: 1375 nextToken(); 1376 if (FormatTok->is(tok::l_brace)) { 1377 parseChildBlock(); 1378 } 1379 break; 1380 case tok::l_square: 1381 tryToParseLambda(); 1382 break; 1383 case tok::l_paren: 1384 parseParens(); 1385 // JavaScript can just have free standing methods and getters/setters in 1386 // object literals. Detect them by a "{" following ")". 1387 if (Style.Language == FormatStyle::LK_JavaScript) { 1388 if (FormatTok->is(tok::l_brace)) 1389 parseChildBlock(); 1390 break; 1391 } 1392 break; 1393 case tok::l_brace: 1394 // Assume there are no blocks inside a braced init list apart 1395 // from the ones we explicitly parse out (like lambdas). 1396 FormatTok->BlockKind = BK_BracedInit; 1397 parseBracedList(); 1398 break; 1399 case tok::r_brace: 1400 nextToken(); 1401 return !HasError; 1402 case tok::semi: 1403 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1404 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1405 // used for error recovery if we have otherwise determined that this is 1406 // a braced list. 1407 if (Style.Language == FormatStyle::LK_JavaScript) { 1408 nextToken(); 1409 break; 1410 } 1411 HasError = true; 1412 if (!ContinueOnSemicolons) 1413 return !HasError; 1414 nextToken(); 1415 break; 1416 case tok::comma: 1417 nextToken(); 1418 break; 1419 default: 1420 nextToken(); 1421 break; 1422 } 1423 } while (!eof()); 1424 return false; 1425 } 1426 1427 void UnwrappedLineParser::parseParens() { 1428 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1429 nextToken(); 1430 do { 1431 switch (FormatTok->Tok.getKind()) { 1432 case tok::l_paren: 1433 parseParens(); 1434 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1435 parseChildBlock(); 1436 break; 1437 case tok::r_paren: 1438 nextToken(); 1439 return; 1440 case tok::r_brace: 1441 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1442 return; 1443 case tok::l_square: 1444 tryToParseLambda(); 1445 break; 1446 case tok::l_brace: 1447 if (!tryToParseBracedList()) 1448 parseChildBlock(); 1449 break; 1450 case tok::at: 1451 nextToken(); 1452 if (FormatTok->Tok.is(tok::l_brace)) 1453 parseBracedList(); 1454 break; 1455 case tok::kw_class: 1456 if (Style.Language == FormatStyle::LK_JavaScript) 1457 parseRecord(/*ParseAsExpr=*/true); 1458 else 1459 nextToken(); 1460 break; 1461 case tok::identifier: 1462 if (Style.Language == FormatStyle::LK_JavaScript && 1463 (FormatTok->is(Keywords.kw_function) || 1464 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1465 tryToParseJSFunction(); 1466 else 1467 nextToken(); 1468 break; 1469 default: 1470 nextToken(); 1471 break; 1472 } 1473 } while (!eof()); 1474 } 1475 1476 void UnwrappedLineParser::parseSquare() { 1477 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1478 if (tryToParseLambda()) 1479 return; 1480 do { 1481 switch (FormatTok->Tok.getKind()) { 1482 case tok::l_paren: 1483 parseParens(); 1484 break; 1485 case tok::r_square: 1486 nextToken(); 1487 return; 1488 case tok::r_brace: 1489 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1490 return; 1491 case tok::l_square: 1492 parseSquare(); 1493 break; 1494 case tok::l_brace: { 1495 if (!tryToParseBracedList()) 1496 parseChildBlock(); 1497 break; 1498 } 1499 case tok::at: 1500 nextToken(); 1501 if (FormatTok->Tok.is(tok::l_brace)) 1502 parseBracedList(); 1503 break; 1504 default: 1505 nextToken(); 1506 break; 1507 } 1508 } while (!eof()); 1509 } 1510 1511 void UnwrappedLineParser::parseIfThenElse() { 1512 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1513 nextToken(); 1514 if (FormatTok->Tok.is(tok::l_paren)) 1515 parseParens(); 1516 bool NeedsUnwrappedLine = false; 1517 if (FormatTok->Tok.is(tok::l_brace)) { 1518 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1519 parseBlock(/*MustBeDeclaration=*/false); 1520 if (Style.BraceWrapping.BeforeElse) 1521 addUnwrappedLine(); 1522 else 1523 NeedsUnwrappedLine = true; 1524 } else { 1525 addUnwrappedLine(); 1526 ++Line->Level; 1527 parseStructuralElement(); 1528 --Line->Level; 1529 } 1530 if (FormatTok->Tok.is(tok::kw_else)) { 1531 nextToken(); 1532 if (FormatTok->Tok.is(tok::l_brace)) { 1533 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1534 parseBlock(/*MustBeDeclaration=*/false); 1535 addUnwrappedLine(); 1536 } else if (FormatTok->Tok.is(tok::kw_if)) { 1537 parseIfThenElse(); 1538 } else { 1539 addUnwrappedLine(); 1540 ++Line->Level; 1541 parseStructuralElement(); 1542 if (FormatTok->is(tok::eof)) 1543 addUnwrappedLine(); 1544 --Line->Level; 1545 } 1546 } else if (NeedsUnwrappedLine) { 1547 addUnwrappedLine(); 1548 } 1549 } 1550 1551 void UnwrappedLineParser::parseTryCatch() { 1552 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1553 nextToken(); 1554 bool NeedsUnwrappedLine = false; 1555 if (FormatTok->is(tok::colon)) { 1556 // We are in a function try block, what comes is an initializer list. 1557 nextToken(); 1558 while (FormatTok->is(tok::identifier)) { 1559 nextToken(); 1560 if (FormatTok->is(tok::l_paren)) 1561 parseParens(); 1562 if (FormatTok->is(tok::comma)) 1563 nextToken(); 1564 } 1565 } 1566 // Parse try with resource. 1567 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1568 parseParens(); 1569 } 1570 if (FormatTok->is(tok::l_brace)) { 1571 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1572 parseBlock(/*MustBeDeclaration=*/false); 1573 if (Style.BraceWrapping.BeforeCatch) { 1574 addUnwrappedLine(); 1575 } else { 1576 NeedsUnwrappedLine = true; 1577 } 1578 } else if (!FormatTok->is(tok::kw_catch)) { 1579 // The C++ standard requires a compound-statement after a try. 1580 // If there's none, we try to assume there's a structuralElement 1581 // and try to continue. 1582 addUnwrappedLine(); 1583 ++Line->Level; 1584 parseStructuralElement(); 1585 --Line->Level; 1586 } 1587 while (1) { 1588 if (FormatTok->is(tok::at)) 1589 nextToken(); 1590 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1591 tok::kw___finally) || 1592 ((Style.Language == FormatStyle::LK_Java || 1593 Style.Language == FormatStyle::LK_JavaScript) && 1594 FormatTok->is(Keywords.kw_finally)) || 1595 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1596 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1597 break; 1598 nextToken(); 1599 while (FormatTok->isNot(tok::l_brace)) { 1600 if (FormatTok->is(tok::l_paren)) { 1601 parseParens(); 1602 continue; 1603 } 1604 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1605 return; 1606 nextToken(); 1607 } 1608 NeedsUnwrappedLine = false; 1609 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1610 parseBlock(/*MustBeDeclaration=*/false); 1611 if (Style.BraceWrapping.BeforeCatch) 1612 addUnwrappedLine(); 1613 else 1614 NeedsUnwrappedLine = true; 1615 } 1616 if (NeedsUnwrappedLine) 1617 addUnwrappedLine(); 1618 } 1619 1620 void UnwrappedLineParser::parseNamespace() { 1621 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1622 1623 const FormatToken &InitialToken = *FormatTok; 1624 nextToken(); 1625 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1626 nextToken(); 1627 if (FormatTok->Tok.is(tok::l_brace)) { 1628 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1629 addUnwrappedLine(); 1630 1631 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1632 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1633 DeclarationScopeStack.size() > 1); 1634 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1635 // Munch the semicolon after a namespace. This is more common than one would 1636 // think. Puttin the semicolon into its own line is very ugly. 1637 if (FormatTok->Tok.is(tok::semi)) 1638 nextToken(); 1639 addUnwrappedLine(); 1640 } 1641 // FIXME: Add error handling. 1642 } 1643 1644 void UnwrappedLineParser::parseNew() { 1645 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1646 nextToken(); 1647 if (Style.Language != FormatStyle::LK_Java) 1648 return; 1649 1650 // In Java, we can parse everything up to the parens, which aren't optional. 1651 do { 1652 // There should not be a ;, { or } before the new's open paren. 1653 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1654 return; 1655 1656 // Consume the parens. 1657 if (FormatTok->is(tok::l_paren)) { 1658 parseParens(); 1659 1660 // If there is a class body of an anonymous class, consume that as child. 1661 if (FormatTok->is(tok::l_brace)) 1662 parseChildBlock(); 1663 return; 1664 } 1665 nextToken(); 1666 } while (!eof()); 1667 } 1668 1669 void UnwrappedLineParser::parseForOrWhileLoop() { 1670 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1671 "'for', 'while' or foreach macro expected"); 1672 nextToken(); 1673 // JS' for await ( ... 1674 if (Style.Language == FormatStyle::LK_JavaScript && 1675 FormatTok->is(Keywords.kw_await)) 1676 nextToken(); 1677 if (FormatTok->Tok.is(tok::l_paren)) 1678 parseParens(); 1679 if (FormatTok->Tok.is(tok::l_brace)) { 1680 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1681 parseBlock(/*MustBeDeclaration=*/false); 1682 addUnwrappedLine(); 1683 } else { 1684 addUnwrappedLine(); 1685 ++Line->Level; 1686 parseStructuralElement(); 1687 --Line->Level; 1688 } 1689 } 1690 1691 void UnwrappedLineParser::parseDoWhile() { 1692 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1693 nextToken(); 1694 if (FormatTok->Tok.is(tok::l_brace)) { 1695 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1696 parseBlock(/*MustBeDeclaration=*/false); 1697 if (Style.BraceWrapping.IndentBraces) 1698 addUnwrappedLine(); 1699 } else { 1700 addUnwrappedLine(); 1701 ++Line->Level; 1702 parseStructuralElement(); 1703 --Line->Level; 1704 } 1705 1706 // FIXME: Add error handling. 1707 if (!FormatTok->Tok.is(tok::kw_while)) { 1708 addUnwrappedLine(); 1709 return; 1710 } 1711 1712 nextToken(); 1713 parseStructuralElement(); 1714 } 1715 1716 void UnwrappedLineParser::parseLabel() { 1717 nextToken(); 1718 unsigned OldLineLevel = Line->Level; 1719 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1720 --Line->Level; 1721 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1722 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1723 parseBlock(/*MustBeDeclaration=*/false); 1724 if (FormatTok->Tok.is(tok::kw_break)) { 1725 if (Style.BraceWrapping.AfterControlStatement) 1726 addUnwrappedLine(); 1727 parseStructuralElement(); 1728 } 1729 addUnwrappedLine(); 1730 } else { 1731 if (FormatTok->is(tok::semi)) 1732 nextToken(); 1733 addUnwrappedLine(); 1734 } 1735 Line->Level = OldLineLevel; 1736 if (FormatTok->isNot(tok::l_brace)) { 1737 parseStructuralElement(); 1738 addUnwrappedLine(); 1739 } 1740 } 1741 1742 void UnwrappedLineParser::parseCaseLabel() { 1743 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1744 // FIXME: fix handling of complex expressions here. 1745 do { 1746 nextToken(); 1747 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1748 parseLabel(); 1749 } 1750 1751 void UnwrappedLineParser::parseSwitch() { 1752 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1753 nextToken(); 1754 if (FormatTok->Tok.is(tok::l_paren)) 1755 parseParens(); 1756 if (FormatTok->Tok.is(tok::l_brace)) { 1757 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1758 parseBlock(/*MustBeDeclaration=*/false); 1759 addUnwrappedLine(); 1760 } else { 1761 addUnwrappedLine(); 1762 ++Line->Level; 1763 parseStructuralElement(); 1764 --Line->Level; 1765 } 1766 } 1767 1768 void UnwrappedLineParser::parseAccessSpecifier() { 1769 nextToken(); 1770 // Understand Qt's slots. 1771 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1772 nextToken(); 1773 // Otherwise, we don't know what it is, and we'd better keep the next token. 1774 if (FormatTok->Tok.is(tok::colon)) 1775 nextToken(); 1776 addUnwrappedLine(); 1777 } 1778 1779 bool UnwrappedLineParser::parseEnum() { 1780 // Won't be 'enum' for NS_ENUMs. 1781 if (FormatTok->Tok.is(tok::kw_enum)) 1782 nextToken(); 1783 1784 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1785 // declarations. An "enum" keyword followed by a colon would be a syntax 1786 // error and thus assume it is just an identifier. 1787 if (Style.Language == FormatStyle::LK_JavaScript && 1788 FormatTok->isOneOf(tok::colon, tok::question)) 1789 return false; 1790 1791 // Eat up enum class ... 1792 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1793 nextToken(); 1794 1795 while (FormatTok->Tok.getIdentifierInfo() || 1796 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1797 tok::greater, tok::comma, tok::question)) { 1798 nextToken(); 1799 // We can have macros or attributes in between 'enum' and the enum name. 1800 if (FormatTok->is(tok::l_paren)) 1801 parseParens(); 1802 if (FormatTok->is(tok::identifier)) { 1803 nextToken(); 1804 // If there are two identifiers in a row, this is likely an elaborate 1805 // return type. In Java, this can be "implements", etc. 1806 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1807 return false; 1808 } 1809 } 1810 1811 // Just a declaration or something is wrong. 1812 if (FormatTok->isNot(tok::l_brace)) 1813 return true; 1814 FormatTok->BlockKind = BK_Block; 1815 1816 if (Style.Language == FormatStyle::LK_Java) { 1817 // Java enums are different. 1818 parseJavaEnumBody(); 1819 return true; 1820 } 1821 if (Style.Language == FormatStyle::LK_Proto) { 1822 parseBlock(/*MustBeDeclaration=*/true); 1823 return true; 1824 } 1825 1826 // Parse enum body. 1827 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1828 if (HasError) { 1829 if (FormatTok->is(tok::semi)) 1830 nextToken(); 1831 addUnwrappedLine(); 1832 } 1833 return true; 1834 1835 // There is no addUnwrappedLine() here so that we fall through to parsing a 1836 // structural element afterwards. Thus, in "enum A {} n, m;", 1837 // "} n, m;" will end up in one unwrapped line. 1838 } 1839 1840 void UnwrappedLineParser::parseJavaEnumBody() { 1841 // Determine whether the enum is simple, i.e. does not have a semicolon or 1842 // constants with class bodies. Simple enums can be formatted like braced 1843 // lists, contracted to a single line, etc. 1844 unsigned StoredPosition = Tokens->getPosition(); 1845 bool IsSimple = true; 1846 FormatToken *Tok = Tokens->getNextToken(); 1847 while (Tok) { 1848 if (Tok->is(tok::r_brace)) 1849 break; 1850 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1851 IsSimple = false; 1852 break; 1853 } 1854 // FIXME: This will also mark enums with braces in the arguments to enum 1855 // constants as "not simple". This is probably fine in practice, though. 1856 Tok = Tokens->getNextToken(); 1857 } 1858 FormatTok = Tokens->setPosition(StoredPosition); 1859 1860 if (IsSimple) { 1861 parseBracedList(); 1862 addUnwrappedLine(); 1863 return; 1864 } 1865 1866 // Parse the body of a more complex enum. 1867 // First add a line for everything up to the "{". 1868 nextToken(); 1869 addUnwrappedLine(); 1870 ++Line->Level; 1871 1872 // Parse the enum constants. 1873 while (FormatTok) { 1874 if (FormatTok->is(tok::l_brace)) { 1875 // Parse the constant's class body. 1876 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1877 /*MunchSemi=*/false); 1878 } else if (FormatTok->is(tok::l_paren)) { 1879 parseParens(); 1880 } else if (FormatTok->is(tok::comma)) { 1881 nextToken(); 1882 addUnwrappedLine(); 1883 } else if (FormatTok->is(tok::semi)) { 1884 nextToken(); 1885 addUnwrappedLine(); 1886 break; 1887 } else if (FormatTok->is(tok::r_brace)) { 1888 addUnwrappedLine(); 1889 break; 1890 } else { 1891 nextToken(); 1892 } 1893 } 1894 1895 // Parse the class body after the enum's ";" if any. 1896 parseLevel(/*HasOpeningBrace=*/true); 1897 nextToken(); 1898 --Line->Level; 1899 addUnwrappedLine(); 1900 } 1901 1902 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 1903 const FormatToken &InitialToken = *FormatTok; 1904 nextToken(); 1905 1906 // The actual identifier can be a nested name specifier, and in macros 1907 // it is often token-pasted. 1908 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 1909 tok::kw___attribute, tok::kw___declspec, 1910 tok::kw_alignas) || 1911 ((Style.Language == FormatStyle::LK_Java || 1912 Style.Language == FormatStyle::LK_JavaScript) && 1913 FormatTok->isOneOf(tok::period, tok::comma))) { 1914 bool IsNonMacroIdentifier = 1915 FormatTok->is(tok::identifier) && 1916 FormatTok->TokenText != FormatTok->TokenText.upper(); 1917 nextToken(); 1918 // We can have macros or attributes in between 'class' and the class name. 1919 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 1920 parseParens(); 1921 } 1922 1923 // Note that parsing away template declarations here leads to incorrectly 1924 // accepting function declarations as record declarations. 1925 // In general, we cannot solve this problem. Consider: 1926 // class A<int> B() {} 1927 // which can be a function definition or a class definition when B() is a 1928 // macro. If we find enough real-world cases where this is a problem, we 1929 // can parse for the 'template' keyword in the beginning of the statement, 1930 // and thus rule out the record production in case there is no template 1931 // (this would still leave us with an ambiguity between template function 1932 // and class declarations). 1933 if (FormatTok->isOneOf(tok::colon, tok::less)) { 1934 while (!eof()) { 1935 if (FormatTok->is(tok::l_brace)) { 1936 calculateBraceTypes(/*ExpectClassBody=*/true); 1937 if (!tryToParseBracedList()) 1938 break; 1939 } 1940 if (FormatTok->Tok.is(tok::semi)) 1941 return; 1942 nextToken(); 1943 } 1944 } 1945 if (FormatTok->Tok.is(tok::l_brace)) { 1946 if (ParseAsExpr) { 1947 parseChildBlock(); 1948 } else { 1949 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1950 addUnwrappedLine(); 1951 1952 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1953 /*MunchSemi=*/false); 1954 } 1955 } 1956 // There is no addUnwrappedLine() here so that we fall through to parsing a 1957 // structural element afterwards. Thus, in "class A {} n, m;", 1958 // "} n, m;" will end up in one unwrapped line. 1959 } 1960 1961 void UnwrappedLineParser::parseObjCProtocolList() { 1962 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1963 do 1964 nextToken(); 1965 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1966 nextToken(); // Skip '>'. 1967 } 1968 1969 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1970 do { 1971 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1972 nextToken(); 1973 addUnwrappedLine(); 1974 break; 1975 } 1976 if (FormatTok->is(tok::l_brace)) { 1977 parseBlock(/*MustBeDeclaration=*/false); 1978 // In ObjC interfaces, nothing should be following the "}". 1979 addUnwrappedLine(); 1980 } else if (FormatTok->is(tok::r_brace)) { 1981 // Ignore stray "}". parseStructuralElement doesn't consume them. 1982 nextToken(); 1983 addUnwrappedLine(); 1984 } else { 1985 parseStructuralElement(); 1986 } 1987 } while (!eof()); 1988 } 1989 1990 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1991 nextToken(); 1992 nextToken(); // interface name 1993 1994 // @interface can be followed by either a base class, or a category. 1995 if (FormatTok->Tok.is(tok::colon)) { 1996 nextToken(); 1997 nextToken(); // base class name 1998 } else if (FormatTok->Tok.is(tok::l_paren)) 1999 // Skip category, if present. 2000 parseParens(); 2001 2002 if (FormatTok->Tok.is(tok::less)) 2003 parseObjCProtocolList(); 2004 2005 if (FormatTok->Tok.is(tok::l_brace)) { 2006 if (Style.BraceWrapping.AfterObjCDeclaration) 2007 addUnwrappedLine(); 2008 parseBlock(/*MustBeDeclaration=*/true); 2009 } 2010 2011 // With instance variables, this puts '}' on its own line. Without instance 2012 // variables, this ends the @interface line. 2013 addUnwrappedLine(); 2014 2015 parseObjCUntilAtEnd(); 2016 } 2017 2018 void UnwrappedLineParser::parseObjCProtocol() { 2019 nextToken(); 2020 nextToken(); // protocol name 2021 2022 if (FormatTok->Tok.is(tok::less)) 2023 parseObjCProtocolList(); 2024 2025 // Check for protocol declaration. 2026 if (FormatTok->Tok.is(tok::semi)) { 2027 nextToken(); 2028 return addUnwrappedLine(); 2029 } 2030 2031 addUnwrappedLine(); 2032 parseObjCUntilAtEnd(); 2033 } 2034 2035 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2036 bool IsImport = FormatTok->is(Keywords.kw_import); 2037 assert(IsImport || FormatTok->is(tok::kw_export)); 2038 nextToken(); 2039 2040 // Consume the "default" in "export default class/function". 2041 if (FormatTok->is(tok::kw_default)) 2042 nextToken(); 2043 2044 // Consume "async function", "function" and "default function", so that these 2045 // get parsed as free-standing JS functions, i.e. do not require a trailing 2046 // semicolon. 2047 if (FormatTok->is(Keywords.kw_async)) 2048 nextToken(); 2049 if (FormatTok->is(Keywords.kw_function)) { 2050 nextToken(); 2051 return; 2052 } 2053 2054 // For imports, `export *`, `export {...}`, consume the rest of the line up 2055 // to the terminating `;`. For everything else, just return and continue 2056 // parsing the structural element, i.e. the declaration or expression for 2057 // `export default`. 2058 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2059 !FormatTok->isStringLiteral()) 2060 return; 2061 2062 while (!eof()) { 2063 if (FormatTok->is(tok::semi)) 2064 return; 2065 if (Line->Tokens.size() == 0) { 2066 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2067 // import statement should terminate. 2068 return; 2069 } 2070 if (FormatTok->is(tok::l_brace)) { 2071 FormatTok->BlockKind = BK_Block; 2072 parseBracedList(); 2073 } else { 2074 nextToken(); 2075 } 2076 } 2077 } 2078 2079 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2080 StringRef Prefix = "") { 2081 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 2082 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2083 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2084 E = Line.Tokens.end(); 2085 I != E; ++I) { 2086 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2087 << "T=" << I->Tok->Type 2088 << ", OC=" << I->Tok->OriginalColumn << "] "; 2089 } 2090 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2091 E = Line.Tokens.end(); 2092 I != E; ++I) { 2093 const UnwrappedLineNode &Node = *I; 2094 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2095 I = Node.Children.begin(), 2096 E = Node.Children.end(); 2097 I != E; ++I) { 2098 printDebugInfo(*I, "\nChild: "); 2099 } 2100 } 2101 llvm::dbgs() << "\n"; 2102 } 2103 2104 void UnwrappedLineParser::addUnwrappedLine() { 2105 if (Line->Tokens.empty()) 2106 return; 2107 DEBUG({ 2108 if (CurrentLines == &Lines) 2109 printDebugInfo(*Line); 2110 }); 2111 CurrentLines->push_back(std::move(*Line)); 2112 Line->Tokens.clear(); 2113 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2114 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2115 CurrentLines->append( 2116 std::make_move_iterator(PreprocessorDirectives.begin()), 2117 std::make_move_iterator(PreprocessorDirectives.end())); 2118 PreprocessorDirectives.clear(); 2119 } 2120 } 2121 2122 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2123 2124 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2125 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2126 FormatTok.NewlinesBefore > 0; 2127 } 2128 2129 // Checks if \p FormatTok is a line comment that continues the line comment 2130 // section on \p Line. 2131 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2132 const UnwrappedLine &Line, 2133 llvm::Regex &CommentPragmasRegex) { 2134 if (Line.Tokens.empty()) 2135 return false; 2136 2137 StringRef IndentContent = FormatTok.TokenText; 2138 if (FormatTok.TokenText.startswith("//") || 2139 FormatTok.TokenText.startswith("/*")) 2140 IndentContent = FormatTok.TokenText.substr(2); 2141 if (CommentPragmasRegex.match(IndentContent)) 2142 return false; 2143 2144 // If Line starts with a line comment, then FormatTok continues the comment 2145 // section if its original column is greater or equal to the original start 2146 // column of the line. 2147 // 2148 // Define the min column token of a line as follows: if a line ends in '{' or 2149 // contains a '{' followed by a line comment, then the min column token is 2150 // that '{'. Otherwise, the min column token of the line is the first token of 2151 // the line. 2152 // 2153 // If Line starts with a token other than a line comment, then FormatTok 2154 // continues the comment section if its original column is greater than the 2155 // original start column of the min column token of the line. 2156 // 2157 // For example, the second line comment continues the first in these cases: 2158 // 2159 // // first line 2160 // // second line 2161 // 2162 // and: 2163 // 2164 // // first line 2165 // // second line 2166 // 2167 // and: 2168 // 2169 // int i; // first line 2170 // // second line 2171 // 2172 // and: 2173 // 2174 // do { // first line 2175 // // second line 2176 // int i; 2177 // } while (true); 2178 // 2179 // and: 2180 // 2181 // enum { 2182 // a, // first line 2183 // // second line 2184 // b 2185 // }; 2186 // 2187 // The second line comment doesn't continue the first in these cases: 2188 // 2189 // // first line 2190 // // second line 2191 // 2192 // and: 2193 // 2194 // int i; // first line 2195 // // second line 2196 // 2197 // and: 2198 // 2199 // do { // first line 2200 // // second line 2201 // int i; 2202 // } while (true); 2203 // 2204 // and: 2205 // 2206 // enum { 2207 // a, // first line 2208 // // second line 2209 // }; 2210 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2211 2212 // Scan for '{//'. If found, use the column of '{' as a min column for line 2213 // comment section continuation. 2214 const FormatToken *PreviousToken = nullptr; 2215 for (const UnwrappedLineNode &Node : Line.Tokens) { 2216 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2217 isLineComment(*Node.Tok)) { 2218 MinColumnToken = PreviousToken; 2219 break; 2220 } 2221 PreviousToken = Node.Tok; 2222 2223 // Grab the last newline preceding a token in this unwrapped line. 2224 if (Node.Tok->NewlinesBefore > 0) { 2225 MinColumnToken = Node.Tok; 2226 } 2227 } 2228 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2229 MinColumnToken = PreviousToken; 2230 } 2231 2232 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2233 MinColumnToken); 2234 } 2235 2236 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2237 bool JustComments = Line->Tokens.empty(); 2238 for (SmallVectorImpl<FormatToken *>::const_iterator 2239 I = CommentsBeforeNextToken.begin(), 2240 E = CommentsBeforeNextToken.end(); 2241 I != E; ++I) { 2242 // Line comments that belong to the same line comment section are put on the 2243 // same line since later we might want to reflow content between them. 2244 // Additional fine-grained breaking of line comment sections is controlled 2245 // by the class BreakableLineCommentSection in case it is desirable to keep 2246 // several line comment sections in the same unwrapped line. 2247 // 2248 // FIXME: Consider putting separate line comment sections as children to the 2249 // unwrapped line instead. 2250 (*I)->ContinuesLineCommentSection = 2251 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2252 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2253 addUnwrappedLine(); 2254 pushToken(*I); 2255 } 2256 if (NewlineBeforeNext && JustComments) 2257 addUnwrappedLine(); 2258 CommentsBeforeNextToken.clear(); 2259 } 2260 2261 void UnwrappedLineParser::nextToken() { 2262 if (eof()) 2263 return; 2264 flushComments(isOnNewLine(*FormatTok)); 2265 pushToken(FormatTok); 2266 if (Style.Language != FormatStyle::LK_JavaScript) 2267 readToken(); 2268 else 2269 readTokenWithJavaScriptASI(); 2270 } 2271 2272 const FormatToken *UnwrappedLineParser::getPreviousToken() { 2273 // FIXME: This is a dirty way to access the previous token. Find a better 2274 // solution. 2275 if (!Line || Line->Tokens.empty()) 2276 return nullptr; 2277 return Line->Tokens.back().Tok; 2278 } 2279 2280 void UnwrappedLineParser::distributeComments( 2281 const SmallVectorImpl<FormatToken *> &Comments, 2282 const FormatToken *NextTok) { 2283 // Whether or not a line comment token continues a line is controlled by 2284 // the method continuesLineCommentSection, with the following caveat: 2285 // 2286 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2287 // that each comment line from the trail is aligned with the next token, if 2288 // the next token exists. If a trail exists, the beginning of the maximal 2289 // trail is marked as a start of a new comment section. 2290 // 2291 // For example in this code: 2292 // 2293 // int a; // line about a 2294 // // line 1 about b 2295 // // line 2 about b 2296 // int b; 2297 // 2298 // the two lines about b form a maximal trail, so there are two sections, the 2299 // first one consisting of the single comment "// line about a" and the 2300 // second one consisting of the next two comments. 2301 if (Comments.empty()) 2302 return; 2303 bool ShouldPushCommentsInCurrentLine = true; 2304 bool HasTrailAlignedWithNextToken = false; 2305 unsigned StartOfTrailAlignedWithNextToken = 0; 2306 if (NextTok) { 2307 // We are skipping the first element intentionally. 2308 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2309 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2310 HasTrailAlignedWithNextToken = true; 2311 StartOfTrailAlignedWithNextToken = i; 2312 } 2313 } 2314 } 2315 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2316 FormatToken *FormatTok = Comments[i]; 2317 if (HasTrailAlignedWithNextToken && 2318 i == StartOfTrailAlignedWithNextToken) { 2319 FormatTok->ContinuesLineCommentSection = false; 2320 } else { 2321 FormatTok->ContinuesLineCommentSection = 2322 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2323 } 2324 if (!FormatTok->ContinuesLineCommentSection && 2325 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2326 ShouldPushCommentsInCurrentLine = false; 2327 } 2328 if (ShouldPushCommentsInCurrentLine) { 2329 pushToken(FormatTok); 2330 } else { 2331 CommentsBeforeNextToken.push_back(FormatTok); 2332 } 2333 } 2334 } 2335 2336 void UnwrappedLineParser::readToken() { 2337 SmallVector<FormatToken *, 1> Comments; 2338 do { 2339 FormatTok = Tokens->getNextToken(); 2340 assert(FormatTok); 2341 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2342 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2343 distributeComments(Comments, FormatTok); 2344 Comments.clear(); 2345 // If there is an unfinished unwrapped line, we flush the preprocessor 2346 // directives only after that unwrapped line was finished later. 2347 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2348 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2349 // Comments stored before the preprocessor directive need to be output 2350 // before the preprocessor directive, at the same level as the 2351 // preprocessor directive, as we consider them to apply to the directive. 2352 flushComments(isOnNewLine(*FormatTok)); 2353 parsePPDirective(); 2354 } 2355 while (FormatTok->Type == TT_ConflictStart || 2356 FormatTok->Type == TT_ConflictEnd || 2357 FormatTok->Type == TT_ConflictAlternative) { 2358 if (FormatTok->Type == TT_ConflictStart) { 2359 conditionalCompilationStart(/*Unreachable=*/false); 2360 } else if (FormatTok->Type == TT_ConflictAlternative) { 2361 conditionalCompilationAlternative(); 2362 } else if (FormatTok->Type == TT_ConflictEnd) { 2363 conditionalCompilationEnd(); 2364 } 2365 FormatTok = Tokens->getNextToken(); 2366 FormatTok->MustBreakBefore = true; 2367 } 2368 2369 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 2370 !Line->InPPDirective) { 2371 continue; 2372 } 2373 2374 if (!FormatTok->Tok.is(tok::comment)) { 2375 distributeComments(Comments, FormatTok); 2376 Comments.clear(); 2377 return; 2378 } 2379 2380 Comments.push_back(FormatTok); 2381 } while (!eof()); 2382 2383 distributeComments(Comments, nullptr); 2384 Comments.clear(); 2385 } 2386 2387 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2388 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2389 if (MustBreakBeforeNextToken) { 2390 Line->Tokens.back().Tok->MustBreakBefore = true; 2391 MustBreakBeforeNextToken = false; 2392 } 2393 } 2394 2395 } // end namespace format 2396 } // end namespace clang 2397