1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 static bool isLineComment(const FormatToken &FormatTok) { 59 return FormatTok.is(tok::comment) && 60 FormatTok.TokenText.startswith("//"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 TokenSource = this; 86 Line.Level = 0; 87 Line.InPPDirective = true; 88 } 89 90 ~ScopedMacroState() override { 91 TokenSource = PreviousTokenSource; 92 ResetToken = Token; 93 Line.InPPDirective = false; 94 Line.Level = PreviousLineLevel; 95 } 96 97 FormatToken *getNextToken() override { 98 // The \c UnwrappedLineParser guards against this by never calling 99 // \c getNextToken() after it has encountered the first eof token. 100 assert(!eof()); 101 PreviousToken = Token; 102 Token = PreviousTokenSource->getNextToken(); 103 if (eof()) 104 return getFakeEOF(); 105 return Token; 106 } 107 108 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 109 110 FormatToken *setPosition(unsigned Position) override { 111 PreviousToken = nullptr; 112 Token = PreviousTokenSource->setPosition(Position); 113 return Token; 114 } 115 116 private: 117 bool eof() { 118 return Token && Token->HasUnescapedNewline && 119 !continuesLineComment(*Token, PreviousToken, 120 /*MinColumnToken=*/PreviousToken); 121 } 122 123 FormatToken *getFakeEOF() { 124 static bool EOFInitialized = false; 125 static FormatToken FormatTok; 126 if (!EOFInitialized) { 127 FormatTok.Tok.startToken(); 128 FormatTok.Tok.setKind(tok::eof); 129 EOFInitialized = true; 130 } 131 return &FormatTok; 132 } 133 134 UnwrappedLine &Line; 135 FormatTokenSource *&TokenSource; 136 FormatToken *&ResetToken; 137 unsigned PreviousLineLevel; 138 FormatTokenSource *PreviousTokenSource; 139 140 FormatToken *Token; 141 FormatToken *PreviousToken; 142 }; 143 144 } // end anonymous namespace 145 146 class ScopedLineState { 147 public: 148 ScopedLineState(UnwrappedLineParser &Parser, 149 bool SwitchToPreprocessorLines = false) 150 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 151 if (SwitchToPreprocessorLines) 152 Parser.CurrentLines = &Parser.PreprocessorDirectives; 153 else if (!Parser.Line->Tokens.empty()) 154 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 155 PreBlockLine = std::move(Parser.Line); 156 Parser.Line = llvm::make_unique<UnwrappedLine>(); 157 Parser.Line->Level = PreBlockLine->Level; 158 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 159 } 160 161 ~ScopedLineState() { 162 if (!Parser.Line->Tokens.empty()) { 163 Parser.addUnwrappedLine(); 164 } 165 assert(Parser.Line->Tokens.empty()); 166 Parser.Line = std::move(PreBlockLine); 167 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 168 Parser.MustBreakBeforeNextToken = true; 169 Parser.CurrentLines = OriginalLines; 170 } 171 172 private: 173 UnwrappedLineParser &Parser; 174 175 std::unique_ptr<UnwrappedLine> PreBlockLine; 176 SmallVectorImpl<UnwrappedLine> *OriginalLines; 177 }; 178 179 class CompoundStatementIndenter { 180 public: 181 CompoundStatementIndenter(UnwrappedLineParser *Parser, 182 const FormatStyle &Style, unsigned &LineLevel) 183 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 184 if (Style.BraceWrapping.AfterControlStatement) 185 Parser->addUnwrappedLine(); 186 if (Style.BraceWrapping.IndentBraces) 187 ++LineLevel; 188 } 189 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 190 191 private: 192 unsigned &LineLevel; 193 unsigned OldLineLevel; 194 }; 195 196 namespace { 197 198 class IndexedTokenSource : public FormatTokenSource { 199 public: 200 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 201 : Tokens(Tokens), Position(-1) {} 202 203 FormatToken *getNextToken() override { 204 ++Position; 205 return Tokens[Position]; 206 } 207 208 unsigned getPosition() override { 209 assert(Position >= 0); 210 return Position; 211 } 212 213 FormatToken *setPosition(unsigned P) override { 214 Position = P; 215 return Tokens[Position]; 216 } 217 218 void reset() { Position = -1; } 219 220 private: 221 ArrayRef<FormatToken *> Tokens; 222 int Position; 223 }; 224 225 } // end anonymous namespace 226 227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 228 const AdditionalKeywords &Keywords, 229 ArrayRef<FormatToken *> Tokens, 230 UnwrappedLineConsumer &Callback) 231 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 232 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 233 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 234 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} 235 236 void UnwrappedLineParser::reset() { 237 PPBranchLevel = -1; 238 Line.reset(new UnwrappedLine); 239 CommentsBeforeNextToken.clear(); 240 FormatTok = nullptr; 241 MustBreakBeforeNextToken = false; 242 PreprocessorDirectives.clear(); 243 CurrentLines = &Lines; 244 DeclarationScopeStack.clear(); 245 PPStack.clear(); 246 } 247 248 void UnwrappedLineParser::parse() { 249 IndexedTokenSource TokenSource(AllTokens); 250 do { 251 DEBUG(llvm::dbgs() << "----\n"); 252 reset(); 253 Tokens = &TokenSource; 254 TokenSource.reset(); 255 256 readToken(); 257 parseFile(); 258 // Create line with eof token. 259 pushToken(FormatTok); 260 addUnwrappedLine(); 261 262 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 263 E = Lines.end(); 264 I != E; ++I) { 265 Callback.consumeUnwrappedLine(*I); 266 } 267 Callback.finishRun(); 268 Lines.clear(); 269 while (!PPLevelBranchIndex.empty() && 270 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 271 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 272 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 273 } 274 if (!PPLevelBranchIndex.empty()) { 275 ++PPLevelBranchIndex.back(); 276 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 277 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 278 } 279 } while (!PPLevelBranchIndex.empty()); 280 } 281 282 void UnwrappedLineParser::parseFile() { 283 // The top-level context in a file always has declarations, except for pre- 284 // processor directives and JavaScript files. 285 bool MustBeDeclaration = 286 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 287 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 288 MustBeDeclaration); 289 if (Style.Language == FormatStyle::LK_TextProto) 290 parseBracedList(); 291 else 292 parseLevel(/*HasOpeningBrace=*/false); 293 // Make sure to format the remaining tokens. 294 flushComments(true); 295 addUnwrappedLine(); 296 } 297 298 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 299 bool SwitchLabelEncountered = false; 300 do { 301 tok::TokenKind kind = FormatTok->Tok.getKind(); 302 if (FormatTok->Type == TT_MacroBlockBegin) { 303 kind = tok::l_brace; 304 } else if (FormatTok->Type == TT_MacroBlockEnd) { 305 kind = tok::r_brace; 306 } 307 308 switch (kind) { 309 case tok::comment: 310 nextToken(); 311 addUnwrappedLine(); 312 break; 313 case tok::l_brace: 314 // FIXME: Add parameter whether this can happen - if this happens, we must 315 // be in a non-declaration context. 316 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 317 continue; 318 parseBlock(/*MustBeDeclaration=*/false); 319 addUnwrappedLine(); 320 break; 321 case tok::r_brace: 322 if (HasOpeningBrace) 323 return; 324 nextToken(); 325 addUnwrappedLine(); 326 break; 327 case tok::kw_default: 328 case tok::kw_case: 329 if (!SwitchLabelEncountered && 330 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 331 ++Line->Level; 332 SwitchLabelEncountered = true; 333 parseStructuralElement(); 334 break; 335 default: 336 parseStructuralElement(); 337 break; 338 } 339 } while (!eof()); 340 } 341 342 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 343 // We'll parse forward through the tokens until we hit 344 // a closing brace or eof - note that getNextToken() will 345 // parse macros, so this will magically work inside macro 346 // definitions, too. 347 unsigned StoredPosition = Tokens->getPosition(); 348 FormatToken *Tok = FormatTok; 349 const FormatToken *PrevTok = getPreviousToken(); 350 // Keep a stack of positions of lbrace tokens. We will 351 // update information about whether an lbrace starts a 352 // braced init list or a different block during the loop. 353 SmallVector<FormatToken *, 8> LBraceStack; 354 assert(Tok->Tok.is(tok::l_brace)); 355 do { 356 // Get next non-comment token. 357 FormatToken *NextTok; 358 unsigned ReadTokens = 0; 359 do { 360 NextTok = Tokens->getNextToken(); 361 ++ReadTokens; 362 } while (NextTok->is(tok::comment)); 363 364 switch (Tok->Tok.getKind()) { 365 case tok::l_brace: 366 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 367 if (PrevTok->is(tok::colon)) 368 // A colon indicates this code is in a type, or a braced list 369 // following a label in an object literal ({a: {b: 1}}). The code 370 // below could be confused by semicolons between the individual 371 // members in a type member list, which would normally trigger 372 // BK_Block. In both cases, this must be parsed as an inline braced 373 // init. 374 Tok->BlockKind = BK_BracedInit; 375 else if (PrevTok->is(tok::r_paren)) 376 // `) { }` can only occur in function or method declarations in JS. 377 Tok->BlockKind = BK_Block; 378 } else { 379 Tok->BlockKind = BK_Unknown; 380 } 381 LBraceStack.push_back(Tok); 382 break; 383 case tok::r_brace: 384 if (LBraceStack.empty()) 385 break; 386 if (LBraceStack.back()->BlockKind == BK_Unknown) { 387 bool ProbablyBracedList = false; 388 if (Style.Language == FormatStyle::LK_Proto) { 389 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 390 } else { 391 // Using OriginalColumn to distinguish between ObjC methods and 392 // binary operators is a bit hacky. 393 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 394 NextTok->OriginalColumn == 0; 395 396 // If there is a comma, semicolon or right paren after the closing 397 // brace, we assume this is a braced initializer list. Note that 398 // regardless how we mark inner braces here, we will overwrite the 399 // BlockKind later if we parse a braced list (where all blocks 400 // inside are by default braced lists), or when we explicitly detect 401 // blocks (for example while parsing lambdas). 402 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 403 // braced list in JS. 404 ProbablyBracedList = 405 (Style.Language == FormatStyle::LK_JavaScript && 406 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 407 Keywords.kw_as)) || 408 (Style.isCpp() && NextTok->is(tok::l_paren)) || 409 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 410 tok::r_paren, tok::r_square, tok::l_brace, 411 tok::l_square, tok::ellipsis) || 412 (NextTok->is(tok::identifier) && 413 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 414 (NextTok->is(tok::semi) && 415 (!ExpectClassBody || LBraceStack.size() != 1)) || 416 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 417 } 418 if (ProbablyBracedList) { 419 Tok->BlockKind = BK_BracedInit; 420 LBraceStack.back()->BlockKind = BK_BracedInit; 421 } else { 422 Tok->BlockKind = BK_Block; 423 LBraceStack.back()->BlockKind = BK_Block; 424 } 425 } 426 LBraceStack.pop_back(); 427 break; 428 case tok::at: 429 case tok::semi: 430 case tok::kw_if: 431 case tok::kw_while: 432 case tok::kw_for: 433 case tok::kw_switch: 434 case tok::kw_try: 435 case tok::kw___try: 436 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 437 LBraceStack.back()->BlockKind = BK_Block; 438 break; 439 default: 440 break; 441 } 442 PrevTok = Tok; 443 Tok = NextTok; 444 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 445 446 // Assume other blocks for all unclosed opening braces. 447 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 448 if (LBraceStack[i]->BlockKind == BK_Unknown) 449 LBraceStack[i]->BlockKind = BK_Block; 450 } 451 452 FormatTok = Tokens->setPosition(StoredPosition); 453 } 454 455 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 456 bool MunchSemi) { 457 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 458 "'{' or macro block token expected"); 459 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 460 FormatTok->BlockKind = BK_Block; 461 462 unsigned InitialLevel = Line->Level; 463 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 464 465 if (MacroBlock && FormatTok->is(tok::l_paren)) 466 parseParens(); 467 468 addUnwrappedLine(); 469 size_t OpeningLineIndex = CurrentLines->empty() 470 ? (UnwrappedLine::kInvalidIndex) 471 : (CurrentLines->size() - 1); 472 473 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 474 MustBeDeclaration); 475 if (AddLevel) 476 ++Line->Level; 477 parseLevel(/*HasOpeningBrace=*/true); 478 479 if (eof()) 480 return; 481 482 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 483 : !FormatTok->is(tok::r_brace)) { 484 Line->Level = InitialLevel; 485 FormatTok->BlockKind = BK_Block; 486 return; 487 } 488 489 // Munch the closing brace. 490 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 491 492 if (MacroBlock && FormatTok->is(tok::l_paren)) 493 parseParens(); 494 495 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 496 nextToken(); 497 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 498 Line->Level = InitialLevel; 499 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 500 // Update the opening line to add the forward reference as well 501 (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = 502 CurrentLines->size() - 1; 503 } 504 } 505 506 static bool isGoogScope(const UnwrappedLine &Line) { 507 // FIXME: Closure-library specific stuff should not be hard-coded but be 508 // configurable. 509 if (Line.Tokens.size() < 4) 510 return false; 511 auto I = Line.Tokens.begin(); 512 if (I->Tok->TokenText != "goog") 513 return false; 514 ++I; 515 if (I->Tok->isNot(tok::period)) 516 return false; 517 ++I; 518 if (I->Tok->TokenText != "scope") 519 return false; 520 ++I; 521 return I->Tok->is(tok::l_paren); 522 } 523 524 static bool isIIFE(const UnwrappedLine &Line, 525 const AdditionalKeywords &Keywords) { 526 // Look for the start of an immediately invoked anonymous function. 527 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 528 // This is commonly done in JavaScript to create a new, anonymous scope. 529 // Example: (function() { ... })() 530 if (Line.Tokens.size() < 3) 531 return false; 532 auto I = Line.Tokens.begin(); 533 if (I->Tok->isNot(tok::l_paren)) 534 return false; 535 ++I; 536 if (I->Tok->isNot(Keywords.kw_function)) 537 return false; 538 ++I; 539 return I->Tok->is(tok::l_paren); 540 } 541 542 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 543 const FormatToken &InitialToken) { 544 if (InitialToken.is(tok::kw_namespace)) 545 return Style.BraceWrapping.AfterNamespace; 546 if (InitialToken.is(tok::kw_class)) 547 return Style.BraceWrapping.AfterClass; 548 if (InitialToken.is(tok::kw_union)) 549 return Style.BraceWrapping.AfterUnion; 550 if (InitialToken.is(tok::kw_struct)) 551 return Style.BraceWrapping.AfterStruct; 552 return false; 553 } 554 555 void UnwrappedLineParser::parseChildBlock() { 556 FormatTok->BlockKind = BK_Block; 557 nextToken(); 558 { 559 bool SkipIndent = 560 (Style.Language == FormatStyle::LK_JavaScript && 561 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 562 ScopedLineState LineState(*this); 563 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 564 /*MustBeDeclaration=*/false); 565 Line->Level += SkipIndent ? 0 : 1; 566 parseLevel(/*HasOpeningBrace=*/true); 567 flushComments(isOnNewLine(*FormatTok)); 568 Line->Level -= SkipIndent ? 0 : 1; 569 } 570 nextToken(); 571 } 572 573 void UnwrappedLineParser::parsePPDirective() { 574 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 575 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 576 nextToken(); 577 578 if (!FormatTok->Tok.getIdentifierInfo()) { 579 parsePPUnknown(); 580 return; 581 } 582 583 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 584 case tok::pp_define: 585 parsePPDefine(); 586 return; 587 case tok::pp_if: 588 parsePPIf(/*IfDef=*/false); 589 break; 590 case tok::pp_ifdef: 591 case tok::pp_ifndef: 592 parsePPIf(/*IfDef=*/true); 593 break; 594 case tok::pp_else: 595 parsePPElse(); 596 break; 597 case tok::pp_elif: 598 parsePPElIf(); 599 break; 600 case tok::pp_endif: 601 parsePPEndIf(); 602 break; 603 default: 604 parsePPUnknown(); 605 break; 606 } 607 } 608 609 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 610 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 611 PPStack.push_back(PP_Unreachable); 612 else 613 PPStack.push_back(PP_Conditional); 614 } 615 616 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 617 ++PPBranchLevel; 618 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 619 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 620 PPLevelBranchIndex.push_back(0); 621 PPLevelBranchCount.push_back(0); 622 } 623 PPChainBranchIndex.push(0); 624 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 625 conditionalCompilationCondition(Unreachable || Skip); 626 } 627 628 void UnwrappedLineParser::conditionalCompilationAlternative() { 629 if (!PPStack.empty()) 630 PPStack.pop_back(); 631 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 632 if (!PPChainBranchIndex.empty()) 633 ++PPChainBranchIndex.top(); 634 conditionalCompilationCondition( 635 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 636 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 637 } 638 639 void UnwrappedLineParser::conditionalCompilationEnd() { 640 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 641 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 642 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 643 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 644 } 645 } 646 // Guard against #endif's without #if. 647 if (PPBranchLevel > 0) 648 --PPBranchLevel; 649 if (!PPChainBranchIndex.empty()) 650 PPChainBranchIndex.pop(); 651 if (!PPStack.empty()) 652 PPStack.pop_back(); 653 } 654 655 void UnwrappedLineParser::parsePPIf(bool IfDef) { 656 bool IfNDef = FormatTok->is(tok::pp_ifndef); 657 nextToken(); 658 bool Unreachable = false; 659 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 660 Unreachable = true; 661 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 662 Unreachable = true; 663 conditionalCompilationStart(Unreachable); 664 parsePPUnknown(); 665 } 666 667 void UnwrappedLineParser::parsePPElse() { 668 conditionalCompilationAlternative(); 669 parsePPUnknown(); 670 } 671 672 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 673 674 void UnwrappedLineParser::parsePPEndIf() { 675 conditionalCompilationEnd(); 676 parsePPUnknown(); 677 } 678 679 void UnwrappedLineParser::parsePPDefine() { 680 nextToken(); 681 682 if (FormatTok->Tok.getKind() != tok::identifier) { 683 parsePPUnknown(); 684 return; 685 } 686 nextToken(); 687 if (FormatTok->Tok.getKind() == tok::l_paren && 688 FormatTok->WhitespaceRange.getBegin() == 689 FormatTok->WhitespaceRange.getEnd()) { 690 parseParens(); 691 } 692 addUnwrappedLine(); 693 Line->Level = 1; 694 695 // Errors during a preprocessor directive can only affect the layout of the 696 // preprocessor directive, and thus we ignore them. An alternative approach 697 // would be to use the same approach we use on the file level (no 698 // re-indentation if there was a structural error) within the macro 699 // definition. 700 parseFile(); 701 } 702 703 void UnwrappedLineParser::parsePPUnknown() { 704 do { 705 nextToken(); 706 } while (!eof()); 707 addUnwrappedLine(); 708 } 709 710 // Here we blacklist certain tokens that are not usually the first token in an 711 // unwrapped line. This is used in attempt to distinguish macro calls without 712 // trailing semicolons from other constructs split to several lines. 713 static bool tokenCanStartNewLine(const clang::Token &Tok) { 714 // Semicolon can be a null-statement, l_square can be a start of a macro or 715 // a C++11 attribute, but this doesn't seem to be common. 716 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 717 Tok.isNot(tok::l_square) && 718 // Tokens that can only be used as binary operators and a part of 719 // overloaded operator names. 720 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 721 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 722 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 723 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 724 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 725 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 726 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 727 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 728 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 729 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 730 Tok.isNot(tok::lesslessequal) && 731 // Colon is used in labels, base class lists, initializer lists, 732 // range-based for loops, ternary operator, but should never be the 733 // first token in an unwrapped line. 734 Tok.isNot(tok::colon) && 735 // 'noexcept' is a trailing annotation. 736 Tok.isNot(tok::kw_noexcept); 737 } 738 739 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 740 const FormatToken *FormatTok) { 741 // FIXME: This returns true for C/C++ keywords like 'struct'. 742 return FormatTok->is(tok::identifier) && 743 (FormatTok->Tok.getIdentifierInfo() == nullptr || 744 !FormatTok->isOneOf( 745 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 746 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 747 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 748 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 749 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 750 Keywords.kw_instanceof, Keywords.kw_interface, 751 Keywords.kw_throws, Keywords.kw_from)); 752 } 753 754 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 755 const FormatToken *FormatTok) { 756 return FormatTok->Tok.isLiteral() || 757 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 758 mustBeJSIdent(Keywords, FormatTok); 759 } 760 761 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 762 // when encountered after a value (see mustBeJSIdentOrValue). 763 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 764 const FormatToken *FormatTok) { 765 return FormatTok->isOneOf( 766 tok::kw_return, Keywords.kw_yield, 767 // conditionals 768 tok::kw_if, tok::kw_else, 769 // loops 770 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 771 // switch/case 772 tok::kw_switch, tok::kw_case, 773 // exceptions 774 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 775 // declaration 776 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 777 Keywords.kw_async, Keywords.kw_function, 778 // import/export 779 Keywords.kw_import, tok::kw_export); 780 } 781 782 // readTokenWithJavaScriptASI reads the next token and terminates the current 783 // line if JavaScript Automatic Semicolon Insertion must 784 // happen between the current token and the next token. 785 // 786 // This method is conservative - it cannot cover all edge cases of JavaScript, 787 // but only aims to correctly handle certain well known cases. It *must not* 788 // return true in speculative cases. 789 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 790 FormatToken *Previous = FormatTok; 791 readToken(); 792 FormatToken *Next = FormatTok; 793 794 bool IsOnSameLine = 795 CommentsBeforeNextToken.empty() 796 ? Next->NewlinesBefore == 0 797 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 798 if (IsOnSameLine) 799 return; 800 801 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 802 bool PreviousStartsTemplateExpr = 803 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 804 if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { 805 // If the token before the previous one is an '@', the previous token is an 806 // annotation and can precede another identifier/value. 807 const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; 808 if (PrePrevious->is(tok::at)) 809 return; 810 } 811 if (Next->is(tok::exclaim) && PreviousMustBeValue) 812 return addUnwrappedLine(); 813 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 814 bool NextEndsTemplateExpr = 815 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 816 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 817 (PreviousMustBeValue || 818 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 819 tok::minusminus))) 820 return addUnwrappedLine(); 821 if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) 822 return addUnwrappedLine(); 823 } 824 825 void UnwrappedLineParser::parseStructuralElement() { 826 assert(!FormatTok->is(tok::l_brace)); 827 if (Style.Language == FormatStyle::LK_TableGen && 828 FormatTok->is(tok::pp_include)) { 829 nextToken(); 830 if (FormatTok->is(tok::string_literal)) 831 nextToken(); 832 addUnwrappedLine(); 833 return; 834 } 835 switch (FormatTok->Tok.getKind()) { 836 case tok::at: 837 nextToken(); 838 if (FormatTok->Tok.is(tok::l_brace)) { 839 nextToken(); 840 parseBracedList(); 841 break; 842 } 843 switch (FormatTok->Tok.getObjCKeywordID()) { 844 case tok::objc_public: 845 case tok::objc_protected: 846 case tok::objc_package: 847 case tok::objc_private: 848 return parseAccessSpecifier(); 849 case tok::objc_interface: 850 case tok::objc_implementation: 851 return parseObjCInterfaceOrImplementation(); 852 case tok::objc_protocol: 853 return parseObjCProtocol(); 854 case tok::objc_end: 855 return; // Handled by the caller. 856 case tok::objc_optional: 857 case tok::objc_required: 858 nextToken(); 859 addUnwrappedLine(); 860 return; 861 case tok::objc_autoreleasepool: 862 nextToken(); 863 if (FormatTok->Tok.is(tok::l_brace)) { 864 if (Style.BraceWrapping.AfterObjCDeclaration) 865 addUnwrappedLine(); 866 parseBlock(/*MustBeDeclaration=*/false); 867 } 868 addUnwrappedLine(); 869 return; 870 case tok::objc_try: 871 // This branch isn't strictly necessary (the kw_try case below would 872 // do this too after the tok::at is parsed above). But be explicit. 873 parseTryCatch(); 874 return; 875 default: 876 break; 877 } 878 break; 879 case tok::kw_asm: 880 nextToken(); 881 if (FormatTok->is(tok::l_brace)) { 882 FormatTok->Type = TT_InlineASMBrace; 883 nextToken(); 884 while (FormatTok && FormatTok->isNot(tok::eof)) { 885 if (FormatTok->is(tok::r_brace)) { 886 FormatTok->Type = TT_InlineASMBrace; 887 nextToken(); 888 addUnwrappedLine(); 889 break; 890 } 891 FormatTok->Finalized = true; 892 nextToken(); 893 } 894 } 895 break; 896 case tok::kw_namespace: 897 parseNamespace(); 898 return; 899 case tok::kw_inline: 900 nextToken(); 901 if (FormatTok->Tok.is(tok::kw_namespace)) { 902 parseNamespace(); 903 return; 904 } 905 break; 906 case tok::kw_public: 907 case tok::kw_protected: 908 case tok::kw_private: 909 if (Style.Language == FormatStyle::LK_Java || 910 Style.Language == FormatStyle::LK_JavaScript) 911 nextToken(); 912 else 913 parseAccessSpecifier(); 914 return; 915 case tok::kw_if: 916 parseIfThenElse(); 917 return; 918 case tok::kw_for: 919 case tok::kw_while: 920 parseForOrWhileLoop(); 921 return; 922 case tok::kw_do: 923 parseDoWhile(); 924 return; 925 case tok::kw_switch: 926 parseSwitch(); 927 return; 928 case tok::kw_default: 929 nextToken(); 930 parseLabel(); 931 return; 932 case tok::kw_case: 933 parseCaseLabel(); 934 return; 935 case tok::kw_try: 936 case tok::kw___try: 937 parseTryCatch(); 938 return; 939 case tok::kw_extern: 940 nextToken(); 941 if (FormatTok->Tok.is(tok::string_literal)) { 942 nextToken(); 943 if (FormatTok->Tok.is(tok::l_brace)) { 944 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 945 addUnwrappedLine(); 946 return; 947 } 948 } 949 break; 950 case tok::kw_export: 951 if (Style.Language == FormatStyle::LK_JavaScript) { 952 parseJavaScriptEs6ImportExport(); 953 return; 954 } 955 break; 956 case tok::identifier: 957 if (FormatTok->is(TT_ForEachMacro)) { 958 parseForOrWhileLoop(); 959 return; 960 } 961 if (FormatTok->is(TT_MacroBlockBegin)) { 962 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 963 /*MunchSemi=*/false); 964 return; 965 } 966 if (FormatTok->is(Keywords.kw_import)) { 967 if (Style.Language == FormatStyle::LK_JavaScript) { 968 parseJavaScriptEs6ImportExport(); 969 return; 970 } 971 if (Style.Language == FormatStyle::LK_Proto) { 972 nextToken(); 973 if (FormatTok->is(tok::kw_public)) 974 nextToken(); 975 if (!FormatTok->is(tok::string_literal)) 976 return; 977 nextToken(); 978 if (FormatTok->is(tok::semi)) 979 nextToken(); 980 addUnwrappedLine(); 981 return; 982 } 983 } 984 if (Style.isCpp() && 985 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 986 Keywords.kw_slots, Keywords.kw_qslots)) { 987 nextToken(); 988 if (FormatTok->is(tok::colon)) { 989 nextToken(); 990 addUnwrappedLine(); 991 return; 992 } 993 } 994 // In all other cases, parse the declaration. 995 break; 996 default: 997 break; 998 } 999 do { 1000 const FormatToken *Previous = getPreviousToken(); 1001 switch (FormatTok->Tok.getKind()) { 1002 case tok::at: 1003 nextToken(); 1004 if (FormatTok->Tok.is(tok::l_brace)) { 1005 nextToken(); 1006 parseBracedList(); 1007 } 1008 break; 1009 case tok::kw_enum: 1010 // Ignore if this is part of "template <enum ...". 1011 if (Previous && Previous->is(tok::less)) { 1012 nextToken(); 1013 break; 1014 } 1015 1016 // parseEnum falls through and does not yet add an unwrapped line as an 1017 // enum definition can start a structural element. 1018 if (!parseEnum()) 1019 break; 1020 // This only applies for C++. 1021 if (!Style.isCpp()) { 1022 addUnwrappedLine(); 1023 return; 1024 } 1025 break; 1026 case tok::kw_typedef: 1027 nextToken(); 1028 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1029 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1030 parseEnum(); 1031 break; 1032 case tok::kw_struct: 1033 case tok::kw_union: 1034 case tok::kw_class: 1035 // parseRecord falls through and does not yet add an unwrapped line as a 1036 // record declaration or definition can start a structural element. 1037 parseRecord(); 1038 // This does not apply for Java and JavaScript. 1039 if (Style.Language == FormatStyle::LK_Java || 1040 Style.Language == FormatStyle::LK_JavaScript) { 1041 if (FormatTok->is(tok::semi)) 1042 nextToken(); 1043 addUnwrappedLine(); 1044 return; 1045 } 1046 break; 1047 case tok::period: 1048 nextToken(); 1049 // In Java, classes have an implicit static member "class". 1050 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1051 FormatTok->is(tok::kw_class)) 1052 nextToken(); 1053 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1054 FormatTok->Tok.getIdentifierInfo()) 1055 // JavaScript only has pseudo keywords, all keywords are allowed to 1056 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1057 nextToken(); 1058 break; 1059 case tok::semi: 1060 nextToken(); 1061 addUnwrappedLine(); 1062 return; 1063 case tok::r_brace: 1064 addUnwrappedLine(); 1065 return; 1066 case tok::l_paren: 1067 parseParens(); 1068 break; 1069 case tok::kw_operator: 1070 nextToken(); 1071 if (FormatTok->isBinaryOperator()) 1072 nextToken(); 1073 break; 1074 case tok::caret: 1075 nextToken(); 1076 if (FormatTok->Tok.isAnyIdentifier() || 1077 FormatTok->isSimpleTypeSpecifier()) 1078 nextToken(); 1079 if (FormatTok->is(tok::l_paren)) 1080 parseParens(); 1081 if (FormatTok->is(tok::l_brace)) 1082 parseChildBlock(); 1083 break; 1084 case tok::l_brace: 1085 if (!tryToParseBracedList()) { 1086 // A block outside of parentheses must be the last part of a 1087 // structural element. 1088 // FIXME: Figure out cases where this is not true, and add projections 1089 // for them (the one we know is missing are lambdas). 1090 if (Style.BraceWrapping.AfterFunction) 1091 addUnwrappedLine(); 1092 FormatTok->Type = TT_FunctionLBrace; 1093 parseBlock(/*MustBeDeclaration=*/false); 1094 addUnwrappedLine(); 1095 return; 1096 } 1097 // Otherwise this was a braced init list, and the structural 1098 // element continues. 1099 break; 1100 case tok::kw_try: 1101 // We arrive here when parsing function-try blocks. 1102 parseTryCatch(); 1103 return; 1104 case tok::identifier: { 1105 if (FormatTok->is(TT_MacroBlockEnd)) { 1106 addUnwrappedLine(); 1107 return; 1108 } 1109 1110 // Function declarations (as opposed to function expressions) are parsed 1111 // on their own unwrapped line by continuing this loop. Function 1112 // expressions (functions that are not on their own line) must not create 1113 // a new unwrapped line, so they are special cased below. 1114 size_t TokenCount = Line->Tokens.size(); 1115 if (Style.Language == FormatStyle::LK_JavaScript && 1116 FormatTok->is(Keywords.kw_function) && 1117 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1118 Keywords.kw_async)))) { 1119 tryToParseJSFunction(); 1120 break; 1121 } 1122 if ((Style.Language == FormatStyle::LK_JavaScript || 1123 Style.Language == FormatStyle::LK_Java) && 1124 FormatTok->is(Keywords.kw_interface)) { 1125 if (Style.Language == FormatStyle::LK_JavaScript) { 1126 // In JavaScript/TypeScript, "interface" can be used as a standalone 1127 // identifier, e.g. in `var interface = 1;`. If "interface" is 1128 // followed by another identifier, it is very like to be an actual 1129 // interface declaration. 1130 unsigned StoredPosition = Tokens->getPosition(); 1131 FormatToken *Next = Tokens->getNextToken(); 1132 FormatTok = Tokens->setPosition(StoredPosition); 1133 if (Next && !mustBeJSIdent(Keywords, Next)) { 1134 nextToken(); 1135 break; 1136 } 1137 } 1138 parseRecord(); 1139 addUnwrappedLine(); 1140 return; 1141 } 1142 1143 // See if the following token should start a new unwrapped line. 1144 StringRef Text = FormatTok->TokenText; 1145 nextToken(); 1146 if (Line->Tokens.size() == 1 && 1147 // JS doesn't have macros, and within classes colons indicate fields, 1148 // not labels. 1149 Style.Language != FormatStyle::LK_JavaScript) { 1150 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1151 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1152 parseLabel(); 1153 return; 1154 } 1155 // Recognize function-like macro usages without trailing semicolon as 1156 // well as free-standing macros like Q_OBJECT. 1157 bool FunctionLike = FormatTok->is(tok::l_paren); 1158 if (FunctionLike) 1159 parseParens(); 1160 1161 bool FollowedByNewline = 1162 CommentsBeforeNextToken.empty() 1163 ? FormatTok->NewlinesBefore > 0 1164 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1165 1166 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1167 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1168 addUnwrappedLine(); 1169 return; 1170 } 1171 } 1172 break; 1173 } 1174 case tok::equal: 1175 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1176 // TT_JsFatArrow. The always start an expression or a child block if 1177 // followed by a curly. 1178 if (FormatTok->is(TT_JsFatArrow)) { 1179 nextToken(); 1180 if (FormatTok->is(tok::l_brace)) 1181 parseChildBlock(); 1182 break; 1183 } 1184 1185 nextToken(); 1186 if (FormatTok->Tok.is(tok::l_brace)) { 1187 nextToken(); 1188 parseBracedList(); 1189 } else if (Style.Language == FormatStyle::LK_Proto && 1190 FormatTok->Tok.is(tok::less)) { 1191 nextToken(); 1192 parseBracedList(/*ContinueOnSemicolons=*/false, 1193 /*ClosingBraceKind=*/tok::greater); 1194 } 1195 break; 1196 case tok::l_square: 1197 parseSquare(); 1198 break; 1199 case tok::kw_new: 1200 parseNew(); 1201 break; 1202 default: 1203 nextToken(); 1204 break; 1205 } 1206 } while (!eof()); 1207 } 1208 1209 bool UnwrappedLineParser::tryToParseLambda() { 1210 if (!Style.isCpp()) { 1211 nextToken(); 1212 return false; 1213 } 1214 const FormatToken* Previous = getPreviousToken(); 1215 if (Previous && 1216 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1217 tok::kw_delete) || 1218 Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { 1219 nextToken(); 1220 return false; 1221 } 1222 assert(FormatTok->is(tok::l_square)); 1223 FormatToken &LSquare = *FormatTok; 1224 if (!tryToParseLambdaIntroducer()) 1225 return false; 1226 1227 while (FormatTok->isNot(tok::l_brace)) { 1228 if (FormatTok->isSimpleTypeSpecifier()) { 1229 nextToken(); 1230 continue; 1231 } 1232 switch (FormatTok->Tok.getKind()) { 1233 case tok::l_brace: 1234 break; 1235 case tok::l_paren: 1236 parseParens(); 1237 break; 1238 case tok::amp: 1239 case tok::star: 1240 case tok::kw_const: 1241 case tok::comma: 1242 case tok::less: 1243 case tok::greater: 1244 case tok::identifier: 1245 case tok::numeric_constant: 1246 case tok::coloncolon: 1247 case tok::kw_mutable: 1248 nextToken(); 1249 break; 1250 case tok::arrow: 1251 FormatTok->Type = TT_LambdaArrow; 1252 nextToken(); 1253 break; 1254 default: 1255 return true; 1256 } 1257 } 1258 LSquare.Type = TT_LambdaLSquare; 1259 parseChildBlock(); 1260 return true; 1261 } 1262 1263 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1264 nextToken(); 1265 if (FormatTok->is(tok::equal)) { 1266 nextToken(); 1267 if (FormatTok->is(tok::r_square)) { 1268 nextToken(); 1269 return true; 1270 } 1271 if (FormatTok->isNot(tok::comma)) 1272 return false; 1273 nextToken(); 1274 } else if (FormatTok->is(tok::amp)) { 1275 nextToken(); 1276 if (FormatTok->is(tok::r_square)) { 1277 nextToken(); 1278 return true; 1279 } 1280 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 1281 return false; 1282 } 1283 if (FormatTok->is(tok::comma)) 1284 nextToken(); 1285 } else if (FormatTok->is(tok::r_square)) { 1286 nextToken(); 1287 return true; 1288 } 1289 do { 1290 if (FormatTok->is(tok::amp)) 1291 nextToken(); 1292 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 1293 return false; 1294 nextToken(); 1295 if (FormatTok->is(tok::ellipsis)) 1296 nextToken(); 1297 if (FormatTok->is(tok::comma)) { 1298 nextToken(); 1299 } else if (FormatTok->is(tok::r_square)) { 1300 nextToken(); 1301 return true; 1302 } else { 1303 return false; 1304 } 1305 } while (!eof()); 1306 return false; 1307 } 1308 1309 void UnwrappedLineParser::tryToParseJSFunction() { 1310 assert(FormatTok->is(Keywords.kw_function) || 1311 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1312 if (FormatTok->is(Keywords.kw_async)) 1313 nextToken(); 1314 // Consume "function". 1315 nextToken(); 1316 1317 // Consume * (generator function). Treat it like C++'s overloaded operators. 1318 if (FormatTok->is(tok::star)) { 1319 FormatTok->Type = TT_OverloadedOperator; 1320 nextToken(); 1321 } 1322 1323 // Consume function name. 1324 if (FormatTok->is(tok::identifier)) 1325 nextToken(); 1326 1327 if (FormatTok->isNot(tok::l_paren)) 1328 return; 1329 1330 // Parse formal parameter list. 1331 parseParens(); 1332 1333 if (FormatTok->is(tok::colon)) { 1334 // Parse a type definition. 1335 nextToken(); 1336 1337 // Eat the type declaration. For braced inline object types, balance braces, 1338 // otherwise just parse until finding an l_brace for the function body. 1339 if (FormatTok->is(tok::l_brace)) 1340 tryToParseBracedList(); 1341 else 1342 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1343 nextToken(); 1344 } 1345 1346 if (FormatTok->is(tok::semi)) 1347 return; 1348 1349 parseChildBlock(); 1350 } 1351 1352 bool UnwrappedLineParser::tryToParseBracedList() { 1353 if (FormatTok->BlockKind == BK_Unknown) 1354 calculateBraceTypes(); 1355 assert(FormatTok->BlockKind != BK_Unknown); 1356 if (FormatTok->BlockKind == BK_Block) 1357 return false; 1358 nextToken(); 1359 parseBracedList(); 1360 return true; 1361 } 1362 1363 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1364 tok::TokenKind ClosingBraceKind) { 1365 bool HasError = false; 1366 1367 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1368 // replace this by using parseAssigmentExpression() inside. 1369 do { 1370 if (Style.Language == FormatStyle::LK_JavaScript) { 1371 if (FormatTok->is(Keywords.kw_function) || 1372 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1373 tryToParseJSFunction(); 1374 continue; 1375 } 1376 if (FormatTok->is(TT_JsFatArrow)) { 1377 nextToken(); 1378 // Fat arrows can be followed by simple expressions or by child blocks 1379 // in curly braces. 1380 if (FormatTok->is(tok::l_brace)) { 1381 parseChildBlock(); 1382 continue; 1383 } 1384 } 1385 if (FormatTok->is(tok::l_brace)) { 1386 // Could be a method inside of a braced list `{a() { return 1; }}`. 1387 if (tryToParseBracedList()) 1388 continue; 1389 parseChildBlock(); 1390 } 1391 } 1392 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1393 nextToken(); 1394 return !HasError; 1395 } 1396 switch (FormatTok->Tok.getKind()) { 1397 case tok::caret: 1398 nextToken(); 1399 if (FormatTok->is(tok::l_brace)) { 1400 parseChildBlock(); 1401 } 1402 break; 1403 case tok::l_square: 1404 tryToParseLambda(); 1405 break; 1406 case tok::l_paren: 1407 parseParens(); 1408 // JavaScript can just have free standing methods and getters/setters in 1409 // object literals. Detect them by a "{" following ")". 1410 if (Style.Language == FormatStyle::LK_JavaScript) { 1411 if (FormatTok->is(tok::l_brace)) 1412 parseChildBlock(); 1413 break; 1414 } 1415 break; 1416 case tok::l_brace: 1417 // Assume there are no blocks inside a braced init list apart 1418 // from the ones we explicitly parse out (like lambdas). 1419 FormatTok->BlockKind = BK_BracedInit; 1420 nextToken(); 1421 parseBracedList(); 1422 break; 1423 case tok::semi: 1424 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1425 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1426 // used for error recovery if we have otherwise determined that this is 1427 // a braced list. 1428 if (Style.Language == FormatStyle::LK_JavaScript) { 1429 nextToken(); 1430 break; 1431 } 1432 HasError = true; 1433 if (!ContinueOnSemicolons) 1434 return !HasError; 1435 nextToken(); 1436 break; 1437 case tok::comma: 1438 nextToken(); 1439 break; 1440 default: 1441 nextToken(); 1442 break; 1443 } 1444 } while (!eof()); 1445 return false; 1446 } 1447 1448 void UnwrappedLineParser::parseParens() { 1449 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1450 nextToken(); 1451 do { 1452 switch (FormatTok->Tok.getKind()) { 1453 case tok::l_paren: 1454 parseParens(); 1455 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1456 parseChildBlock(); 1457 break; 1458 case tok::r_paren: 1459 nextToken(); 1460 return; 1461 case tok::r_brace: 1462 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1463 return; 1464 case tok::l_square: 1465 tryToParseLambda(); 1466 break; 1467 case tok::l_brace: 1468 if (!tryToParseBracedList()) 1469 parseChildBlock(); 1470 break; 1471 case tok::at: 1472 nextToken(); 1473 if (FormatTok->Tok.is(tok::l_brace)) { 1474 nextToken(); 1475 parseBracedList(); 1476 } 1477 break; 1478 case tok::kw_class: 1479 if (Style.Language == FormatStyle::LK_JavaScript) 1480 parseRecord(/*ParseAsExpr=*/true); 1481 else 1482 nextToken(); 1483 break; 1484 case tok::identifier: 1485 if (Style.Language == FormatStyle::LK_JavaScript && 1486 (FormatTok->is(Keywords.kw_function) || 1487 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1488 tryToParseJSFunction(); 1489 else 1490 nextToken(); 1491 break; 1492 default: 1493 nextToken(); 1494 break; 1495 } 1496 } while (!eof()); 1497 } 1498 1499 void UnwrappedLineParser::parseSquare() { 1500 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1501 if (tryToParseLambda()) 1502 return; 1503 do { 1504 switch (FormatTok->Tok.getKind()) { 1505 case tok::l_paren: 1506 parseParens(); 1507 break; 1508 case tok::r_square: 1509 nextToken(); 1510 return; 1511 case tok::r_brace: 1512 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1513 return; 1514 case tok::l_square: 1515 parseSquare(); 1516 break; 1517 case tok::l_brace: { 1518 if (!tryToParseBracedList()) 1519 parseChildBlock(); 1520 break; 1521 } 1522 case tok::at: 1523 nextToken(); 1524 if (FormatTok->Tok.is(tok::l_brace)) { 1525 nextToken(); 1526 parseBracedList(); 1527 } 1528 break; 1529 default: 1530 nextToken(); 1531 break; 1532 } 1533 } while (!eof()); 1534 } 1535 1536 void UnwrappedLineParser::parseIfThenElse() { 1537 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1538 nextToken(); 1539 if (FormatTok->Tok.is(tok::kw_constexpr)) 1540 nextToken(); 1541 if (FormatTok->Tok.is(tok::l_paren)) 1542 parseParens(); 1543 bool NeedsUnwrappedLine = false; 1544 if (FormatTok->Tok.is(tok::l_brace)) { 1545 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1546 parseBlock(/*MustBeDeclaration=*/false); 1547 if (Style.BraceWrapping.BeforeElse) 1548 addUnwrappedLine(); 1549 else 1550 NeedsUnwrappedLine = true; 1551 } else { 1552 addUnwrappedLine(); 1553 ++Line->Level; 1554 parseStructuralElement(); 1555 --Line->Level; 1556 } 1557 if (FormatTok->Tok.is(tok::kw_else)) { 1558 nextToken(); 1559 if (FormatTok->Tok.is(tok::l_brace)) { 1560 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1561 parseBlock(/*MustBeDeclaration=*/false); 1562 addUnwrappedLine(); 1563 } else if (FormatTok->Tok.is(tok::kw_if)) { 1564 parseIfThenElse(); 1565 } else { 1566 addUnwrappedLine(); 1567 ++Line->Level; 1568 parseStructuralElement(); 1569 if (FormatTok->is(tok::eof)) 1570 addUnwrappedLine(); 1571 --Line->Level; 1572 } 1573 } else if (NeedsUnwrappedLine) { 1574 addUnwrappedLine(); 1575 } 1576 } 1577 1578 void UnwrappedLineParser::parseTryCatch() { 1579 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1580 nextToken(); 1581 bool NeedsUnwrappedLine = false; 1582 if (FormatTok->is(tok::colon)) { 1583 // We are in a function try block, what comes is an initializer list. 1584 nextToken(); 1585 while (FormatTok->is(tok::identifier)) { 1586 nextToken(); 1587 if (FormatTok->is(tok::l_paren)) 1588 parseParens(); 1589 if (FormatTok->is(tok::comma)) 1590 nextToken(); 1591 } 1592 } 1593 // Parse try with resource. 1594 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1595 parseParens(); 1596 } 1597 if (FormatTok->is(tok::l_brace)) { 1598 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1599 parseBlock(/*MustBeDeclaration=*/false); 1600 if (Style.BraceWrapping.BeforeCatch) { 1601 addUnwrappedLine(); 1602 } else { 1603 NeedsUnwrappedLine = true; 1604 } 1605 } else if (!FormatTok->is(tok::kw_catch)) { 1606 // The C++ standard requires a compound-statement after a try. 1607 // If there's none, we try to assume there's a structuralElement 1608 // and try to continue. 1609 addUnwrappedLine(); 1610 ++Line->Level; 1611 parseStructuralElement(); 1612 --Line->Level; 1613 } 1614 while (1) { 1615 if (FormatTok->is(tok::at)) 1616 nextToken(); 1617 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1618 tok::kw___finally) || 1619 ((Style.Language == FormatStyle::LK_Java || 1620 Style.Language == FormatStyle::LK_JavaScript) && 1621 FormatTok->is(Keywords.kw_finally)) || 1622 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1623 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1624 break; 1625 nextToken(); 1626 while (FormatTok->isNot(tok::l_brace)) { 1627 if (FormatTok->is(tok::l_paren)) { 1628 parseParens(); 1629 continue; 1630 } 1631 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1632 return; 1633 nextToken(); 1634 } 1635 NeedsUnwrappedLine = false; 1636 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1637 parseBlock(/*MustBeDeclaration=*/false); 1638 if (Style.BraceWrapping.BeforeCatch) 1639 addUnwrappedLine(); 1640 else 1641 NeedsUnwrappedLine = true; 1642 } 1643 if (NeedsUnwrappedLine) 1644 addUnwrappedLine(); 1645 } 1646 1647 void UnwrappedLineParser::parseNamespace() { 1648 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1649 1650 const FormatToken &InitialToken = *FormatTok; 1651 nextToken(); 1652 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1653 nextToken(); 1654 if (FormatTok->Tok.is(tok::l_brace)) { 1655 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1656 addUnwrappedLine(); 1657 1658 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1659 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1660 DeclarationScopeStack.size() > 1); 1661 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1662 // Munch the semicolon after a namespace. This is more common than one would 1663 // think. Puttin the semicolon into its own line is very ugly. 1664 if (FormatTok->Tok.is(tok::semi)) 1665 nextToken(); 1666 addUnwrappedLine(); 1667 } 1668 // FIXME: Add error handling. 1669 } 1670 1671 void UnwrappedLineParser::parseNew() { 1672 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1673 nextToken(); 1674 if (Style.Language != FormatStyle::LK_Java) 1675 return; 1676 1677 // In Java, we can parse everything up to the parens, which aren't optional. 1678 do { 1679 // There should not be a ;, { or } before the new's open paren. 1680 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1681 return; 1682 1683 // Consume the parens. 1684 if (FormatTok->is(tok::l_paren)) { 1685 parseParens(); 1686 1687 // If there is a class body of an anonymous class, consume that as child. 1688 if (FormatTok->is(tok::l_brace)) 1689 parseChildBlock(); 1690 return; 1691 } 1692 nextToken(); 1693 } while (!eof()); 1694 } 1695 1696 void UnwrappedLineParser::parseForOrWhileLoop() { 1697 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1698 "'for', 'while' or foreach macro expected"); 1699 nextToken(); 1700 // JS' for await ( ... 1701 if (Style.Language == FormatStyle::LK_JavaScript && 1702 FormatTok->is(Keywords.kw_await)) 1703 nextToken(); 1704 if (FormatTok->Tok.is(tok::l_paren)) 1705 parseParens(); 1706 if (FormatTok->Tok.is(tok::l_brace)) { 1707 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1708 parseBlock(/*MustBeDeclaration=*/false); 1709 addUnwrappedLine(); 1710 } else { 1711 addUnwrappedLine(); 1712 ++Line->Level; 1713 parseStructuralElement(); 1714 --Line->Level; 1715 } 1716 } 1717 1718 void UnwrappedLineParser::parseDoWhile() { 1719 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1720 nextToken(); 1721 if (FormatTok->Tok.is(tok::l_brace)) { 1722 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1723 parseBlock(/*MustBeDeclaration=*/false); 1724 if (Style.BraceWrapping.IndentBraces) 1725 addUnwrappedLine(); 1726 } else { 1727 addUnwrappedLine(); 1728 ++Line->Level; 1729 parseStructuralElement(); 1730 --Line->Level; 1731 } 1732 1733 // FIXME: Add error handling. 1734 if (!FormatTok->Tok.is(tok::kw_while)) { 1735 addUnwrappedLine(); 1736 return; 1737 } 1738 1739 nextToken(); 1740 parseStructuralElement(); 1741 } 1742 1743 void UnwrappedLineParser::parseLabel() { 1744 nextToken(); 1745 unsigned OldLineLevel = Line->Level; 1746 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1747 --Line->Level; 1748 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1749 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1750 parseBlock(/*MustBeDeclaration=*/false); 1751 if (FormatTok->Tok.is(tok::kw_break)) { 1752 if (Style.BraceWrapping.AfterControlStatement) 1753 addUnwrappedLine(); 1754 parseStructuralElement(); 1755 } 1756 addUnwrappedLine(); 1757 } else { 1758 if (FormatTok->is(tok::semi)) 1759 nextToken(); 1760 addUnwrappedLine(); 1761 } 1762 Line->Level = OldLineLevel; 1763 if (FormatTok->isNot(tok::l_brace)) { 1764 parseStructuralElement(); 1765 addUnwrappedLine(); 1766 } 1767 } 1768 1769 void UnwrappedLineParser::parseCaseLabel() { 1770 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1771 // FIXME: fix handling of complex expressions here. 1772 do { 1773 nextToken(); 1774 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1775 parseLabel(); 1776 } 1777 1778 void UnwrappedLineParser::parseSwitch() { 1779 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1780 nextToken(); 1781 if (FormatTok->Tok.is(tok::l_paren)) 1782 parseParens(); 1783 if (FormatTok->Tok.is(tok::l_brace)) { 1784 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1785 parseBlock(/*MustBeDeclaration=*/false); 1786 addUnwrappedLine(); 1787 } else { 1788 addUnwrappedLine(); 1789 ++Line->Level; 1790 parseStructuralElement(); 1791 --Line->Level; 1792 } 1793 } 1794 1795 void UnwrappedLineParser::parseAccessSpecifier() { 1796 nextToken(); 1797 // Understand Qt's slots. 1798 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1799 nextToken(); 1800 // Otherwise, we don't know what it is, and we'd better keep the next token. 1801 if (FormatTok->Tok.is(tok::colon)) 1802 nextToken(); 1803 addUnwrappedLine(); 1804 } 1805 1806 bool UnwrappedLineParser::parseEnum() { 1807 // Won't be 'enum' for NS_ENUMs. 1808 if (FormatTok->Tok.is(tok::kw_enum)) 1809 nextToken(); 1810 1811 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1812 // declarations. An "enum" keyword followed by a colon would be a syntax 1813 // error and thus assume it is just an identifier. 1814 if (Style.Language == FormatStyle::LK_JavaScript && 1815 FormatTok->isOneOf(tok::colon, tok::question)) 1816 return false; 1817 1818 // Eat up enum class ... 1819 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1820 nextToken(); 1821 1822 while (FormatTok->Tok.getIdentifierInfo() || 1823 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1824 tok::greater, tok::comma, tok::question)) { 1825 nextToken(); 1826 // We can have macros or attributes in between 'enum' and the enum name. 1827 if (FormatTok->is(tok::l_paren)) 1828 parseParens(); 1829 if (FormatTok->is(tok::identifier)) { 1830 nextToken(); 1831 // If there are two identifiers in a row, this is likely an elaborate 1832 // return type. In Java, this can be "implements", etc. 1833 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1834 return false; 1835 } 1836 } 1837 1838 // Just a declaration or something is wrong. 1839 if (FormatTok->isNot(tok::l_brace)) 1840 return true; 1841 FormatTok->BlockKind = BK_Block; 1842 1843 if (Style.Language == FormatStyle::LK_Java) { 1844 // Java enums are different. 1845 parseJavaEnumBody(); 1846 return true; 1847 } 1848 if (Style.Language == FormatStyle::LK_Proto) { 1849 parseBlock(/*MustBeDeclaration=*/true); 1850 return true; 1851 } 1852 1853 // Parse enum body. 1854 nextToken(); 1855 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1856 if (HasError) { 1857 if (FormatTok->is(tok::semi)) 1858 nextToken(); 1859 addUnwrappedLine(); 1860 } 1861 return true; 1862 1863 // There is no addUnwrappedLine() here so that we fall through to parsing a 1864 // structural element afterwards. Thus, in "enum A {} n, m;", 1865 // "} n, m;" will end up in one unwrapped line. 1866 } 1867 1868 void UnwrappedLineParser::parseJavaEnumBody() { 1869 // Determine whether the enum is simple, i.e. does not have a semicolon or 1870 // constants with class bodies. Simple enums can be formatted like braced 1871 // lists, contracted to a single line, etc. 1872 unsigned StoredPosition = Tokens->getPosition(); 1873 bool IsSimple = true; 1874 FormatToken *Tok = Tokens->getNextToken(); 1875 while (Tok) { 1876 if (Tok->is(tok::r_brace)) 1877 break; 1878 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1879 IsSimple = false; 1880 break; 1881 } 1882 // FIXME: This will also mark enums with braces in the arguments to enum 1883 // constants as "not simple". This is probably fine in practice, though. 1884 Tok = Tokens->getNextToken(); 1885 } 1886 FormatTok = Tokens->setPosition(StoredPosition); 1887 1888 if (IsSimple) { 1889 nextToken(); 1890 parseBracedList(); 1891 addUnwrappedLine(); 1892 return; 1893 } 1894 1895 // Parse the body of a more complex enum. 1896 // First add a line for everything up to the "{". 1897 nextToken(); 1898 addUnwrappedLine(); 1899 ++Line->Level; 1900 1901 // Parse the enum constants. 1902 while (FormatTok) { 1903 if (FormatTok->is(tok::l_brace)) { 1904 // Parse the constant's class body. 1905 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1906 /*MunchSemi=*/false); 1907 } else if (FormatTok->is(tok::l_paren)) { 1908 parseParens(); 1909 } else if (FormatTok->is(tok::comma)) { 1910 nextToken(); 1911 addUnwrappedLine(); 1912 } else if (FormatTok->is(tok::semi)) { 1913 nextToken(); 1914 addUnwrappedLine(); 1915 break; 1916 } else if (FormatTok->is(tok::r_brace)) { 1917 addUnwrappedLine(); 1918 break; 1919 } else { 1920 nextToken(); 1921 } 1922 } 1923 1924 // Parse the class body after the enum's ";" if any. 1925 parseLevel(/*HasOpeningBrace=*/true); 1926 nextToken(); 1927 --Line->Level; 1928 addUnwrappedLine(); 1929 } 1930 1931 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 1932 const FormatToken &InitialToken = *FormatTok; 1933 nextToken(); 1934 1935 // The actual identifier can be a nested name specifier, and in macros 1936 // it is often token-pasted. 1937 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 1938 tok::kw___attribute, tok::kw___declspec, 1939 tok::kw_alignas) || 1940 ((Style.Language == FormatStyle::LK_Java || 1941 Style.Language == FormatStyle::LK_JavaScript) && 1942 FormatTok->isOneOf(tok::period, tok::comma))) { 1943 bool IsNonMacroIdentifier = 1944 FormatTok->is(tok::identifier) && 1945 FormatTok->TokenText != FormatTok->TokenText.upper(); 1946 nextToken(); 1947 // We can have macros or attributes in between 'class' and the class name. 1948 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 1949 parseParens(); 1950 } 1951 1952 // Note that parsing away template declarations here leads to incorrectly 1953 // accepting function declarations as record declarations. 1954 // In general, we cannot solve this problem. Consider: 1955 // class A<int> B() {} 1956 // which can be a function definition or a class definition when B() is a 1957 // macro. If we find enough real-world cases where this is a problem, we 1958 // can parse for the 'template' keyword in the beginning of the statement, 1959 // and thus rule out the record production in case there is no template 1960 // (this would still leave us with an ambiguity between template function 1961 // and class declarations). 1962 if (FormatTok->isOneOf(tok::colon, tok::less)) { 1963 while (!eof()) { 1964 if (FormatTok->is(tok::l_brace)) { 1965 calculateBraceTypes(/*ExpectClassBody=*/true); 1966 if (!tryToParseBracedList()) 1967 break; 1968 } 1969 if (FormatTok->Tok.is(tok::semi)) 1970 return; 1971 nextToken(); 1972 } 1973 } 1974 if (FormatTok->Tok.is(tok::l_brace)) { 1975 if (ParseAsExpr) { 1976 parseChildBlock(); 1977 } else { 1978 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1979 addUnwrappedLine(); 1980 1981 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1982 /*MunchSemi=*/false); 1983 } 1984 } 1985 // There is no addUnwrappedLine() here so that we fall through to parsing a 1986 // structural element afterwards. Thus, in "class A {} n, m;", 1987 // "} n, m;" will end up in one unwrapped line. 1988 } 1989 1990 void UnwrappedLineParser::parseObjCProtocolList() { 1991 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1992 do 1993 nextToken(); 1994 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1995 nextToken(); // Skip '>'. 1996 } 1997 1998 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1999 do { 2000 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2001 nextToken(); 2002 addUnwrappedLine(); 2003 break; 2004 } 2005 if (FormatTok->is(tok::l_brace)) { 2006 parseBlock(/*MustBeDeclaration=*/false); 2007 // In ObjC interfaces, nothing should be following the "}". 2008 addUnwrappedLine(); 2009 } else if (FormatTok->is(tok::r_brace)) { 2010 // Ignore stray "}". parseStructuralElement doesn't consume them. 2011 nextToken(); 2012 addUnwrappedLine(); 2013 } else { 2014 parseStructuralElement(); 2015 } 2016 } while (!eof()); 2017 } 2018 2019 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2020 nextToken(); 2021 nextToken(); // interface name 2022 2023 // @interface can be followed by either a base class, or a category. 2024 if (FormatTok->Tok.is(tok::colon)) { 2025 nextToken(); 2026 nextToken(); // base class name 2027 } else if (FormatTok->Tok.is(tok::l_paren)) 2028 // Skip category, if present. 2029 parseParens(); 2030 2031 if (FormatTok->Tok.is(tok::less)) 2032 parseObjCProtocolList(); 2033 2034 if (FormatTok->Tok.is(tok::l_brace)) { 2035 if (Style.BraceWrapping.AfterObjCDeclaration) 2036 addUnwrappedLine(); 2037 parseBlock(/*MustBeDeclaration=*/true); 2038 } 2039 2040 // With instance variables, this puts '}' on its own line. Without instance 2041 // variables, this ends the @interface line. 2042 addUnwrappedLine(); 2043 2044 parseObjCUntilAtEnd(); 2045 } 2046 2047 void UnwrappedLineParser::parseObjCProtocol() { 2048 nextToken(); 2049 nextToken(); // protocol name 2050 2051 if (FormatTok->Tok.is(tok::less)) 2052 parseObjCProtocolList(); 2053 2054 // Check for protocol declaration. 2055 if (FormatTok->Tok.is(tok::semi)) { 2056 nextToken(); 2057 return addUnwrappedLine(); 2058 } 2059 2060 addUnwrappedLine(); 2061 parseObjCUntilAtEnd(); 2062 } 2063 2064 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2065 bool IsImport = FormatTok->is(Keywords.kw_import); 2066 assert(IsImport || FormatTok->is(tok::kw_export)); 2067 nextToken(); 2068 2069 // Consume the "default" in "export default class/function". 2070 if (FormatTok->is(tok::kw_default)) 2071 nextToken(); 2072 2073 // Consume "async function", "function" and "default function", so that these 2074 // get parsed as free-standing JS functions, i.e. do not require a trailing 2075 // semicolon. 2076 if (FormatTok->is(Keywords.kw_async)) 2077 nextToken(); 2078 if (FormatTok->is(Keywords.kw_function)) { 2079 nextToken(); 2080 return; 2081 } 2082 2083 // For imports, `export *`, `export {...}`, consume the rest of the line up 2084 // to the terminating `;`. For everything else, just return and continue 2085 // parsing the structural element, i.e. the declaration or expression for 2086 // `export default`. 2087 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2088 !FormatTok->isStringLiteral()) 2089 return; 2090 2091 while (!eof()) { 2092 if (FormatTok->is(tok::semi)) 2093 return; 2094 if (Line->Tokens.size() == 0) { 2095 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2096 // import statement should terminate. 2097 return; 2098 } 2099 if (FormatTok->is(tok::l_brace)) { 2100 FormatTok->BlockKind = BK_Block; 2101 nextToken(); 2102 parseBracedList(); 2103 } else { 2104 nextToken(); 2105 } 2106 } 2107 } 2108 2109 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2110 StringRef Prefix = "") { 2111 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 2112 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2113 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2114 E = Line.Tokens.end(); 2115 I != E; ++I) { 2116 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2117 << "T=" << I->Tok->Type 2118 << ", OC=" << I->Tok->OriginalColumn << "] "; 2119 } 2120 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2121 E = Line.Tokens.end(); 2122 I != E; ++I) { 2123 const UnwrappedLineNode &Node = *I; 2124 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2125 I = Node.Children.begin(), 2126 E = Node.Children.end(); 2127 I != E; ++I) { 2128 printDebugInfo(*I, "\nChild: "); 2129 } 2130 } 2131 llvm::dbgs() << "\n"; 2132 } 2133 2134 void UnwrappedLineParser::addUnwrappedLine() { 2135 if (Line->Tokens.empty()) 2136 return; 2137 DEBUG({ 2138 if (CurrentLines == &Lines) 2139 printDebugInfo(*Line); 2140 }); 2141 CurrentLines->push_back(std::move(*Line)); 2142 Line->Tokens.clear(); 2143 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2144 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2145 CurrentLines->append( 2146 std::make_move_iterator(PreprocessorDirectives.begin()), 2147 std::make_move_iterator(PreprocessorDirectives.end())); 2148 PreprocessorDirectives.clear(); 2149 } 2150 } 2151 2152 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2153 2154 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2155 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2156 FormatTok.NewlinesBefore > 0; 2157 } 2158 2159 // Checks if \p FormatTok is a line comment that continues the line comment 2160 // section on \p Line. 2161 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2162 const UnwrappedLine &Line, 2163 llvm::Regex &CommentPragmasRegex) { 2164 if (Line.Tokens.empty()) 2165 return false; 2166 2167 StringRef IndentContent = FormatTok.TokenText; 2168 if (FormatTok.TokenText.startswith("//") || 2169 FormatTok.TokenText.startswith("/*")) 2170 IndentContent = FormatTok.TokenText.substr(2); 2171 if (CommentPragmasRegex.match(IndentContent)) 2172 return false; 2173 2174 // If Line starts with a line comment, then FormatTok continues the comment 2175 // section if its original column is greater or equal to the original start 2176 // column of the line. 2177 // 2178 // Define the min column token of a line as follows: if a line ends in '{' or 2179 // contains a '{' followed by a line comment, then the min column token is 2180 // that '{'. Otherwise, the min column token of the line is the first token of 2181 // the line. 2182 // 2183 // If Line starts with a token other than a line comment, then FormatTok 2184 // continues the comment section if its original column is greater than the 2185 // original start column of the min column token of the line. 2186 // 2187 // For example, the second line comment continues the first in these cases: 2188 // 2189 // // first line 2190 // // second line 2191 // 2192 // and: 2193 // 2194 // // first line 2195 // // second line 2196 // 2197 // and: 2198 // 2199 // int i; // first line 2200 // // second line 2201 // 2202 // and: 2203 // 2204 // do { // first line 2205 // // second line 2206 // int i; 2207 // } while (true); 2208 // 2209 // and: 2210 // 2211 // enum { 2212 // a, // first line 2213 // // second line 2214 // b 2215 // }; 2216 // 2217 // The second line comment doesn't continue the first in these cases: 2218 // 2219 // // first line 2220 // // second line 2221 // 2222 // and: 2223 // 2224 // int i; // first line 2225 // // second line 2226 // 2227 // and: 2228 // 2229 // do { // first line 2230 // // second line 2231 // int i; 2232 // } while (true); 2233 // 2234 // and: 2235 // 2236 // enum { 2237 // a, // first line 2238 // // second line 2239 // }; 2240 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2241 2242 // Scan for '{//'. If found, use the column of '{' as a min column for line 2243 // comment section continuation. 2244 const FormatToken *PreviousToken = nullptr; 2245 for (const UnwrappedLineNode &Node : Line.Tokens) { 2246 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2247 isLineComment(*Node.Tok)) { 2248 MinColumnToken = PreviousToken; 2249 break; 2250 } 2251 PreviousToken = Node.Tok; 2252 2253 // Grab the last newline preceding a token in this unwrapped line. 2254 if (Node.Tok->NewlinesBefore > 0) { 2255 MinColumnToken = Node.Tok; 2256 } 2257 } 2258 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2259 MinColumnToken = PreviousToken; 2260 } 2261 2262 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2263 MinColumnToken); 2264 } 2265 2266 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2267 bool JustComments = Line->Tokens.empty(); 2268 for (SmallVectorImpl<FormatToken *>::const_iterator 2269 I = CommentsBeforeNextToken.begin(), 2270 E = CommentsBeforeNextToken.end(); 2271 I != E; ++I) { 2272 // Line comments that belong to the same line comment section are put on the 2273 // same line since later we might want to reflow content between them. 2274 // Additional fine-grained breaking of line comment sections is controlled 2275 // by the class BreakableLineCommentSection in case it is desirable to keep 2276 // several line comment sections in the same unwrapped line. 2277 // 2278 // FIXME: Consider putting separate line comment sections as children to the 2279 // unwrapped line instead. 2280 (*I)->ContinuesLineCommentSection = 2281 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2282 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2283 addUnwrappedLine(); 2284 pushToken(*I); 2285 } 2286 if (NewlineBeforeNext && JustComments) 2287 addUnwrappedLine(); 2288 CommentsBeforeNextToken.clear(); 2289 } 2290 2291 void UnwrappedLineParser::nextToken(int LevelDifference) { 2292 if (eof()) 2293 return; 2294 flushComments(isOnNewLine(*FormatTok)); 2295 pushToken(FormatTok); 2296 if (Style.Language != FormatStyle::LK_JavaScript) 2297 readToken(LevelDifference); 2298 else 2299 readTokenWithJavaScriptASI(); 2300 } 2301 2302 const FormatToken *UnwrappedLineParser::getPreviousToken() { 2303 // FIXME: This is a dirty way to access the previous token. Find a better 2304 // solution. 2305 if (!Line || Line->Tokens.empty()) 2306 return nullptr; 2307 return Line->Tokens.back().Tok; 2308 } 2309 2310 void UnwrappedLineParser::distributeComments( 2311 const SmallVectorImpl<FormatToken *> &Comments, 2312 const FormatToken *NextTok) { 2313 // Whether or not a line comment token continues a line is controlled by 2314 // the method continuesLineCommentSection, with the following caveat: 2315 // 2316 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2317 // that each comment line from the trail is aligned with the next token, if 2318 // the next token exists. If a trail exists, the beginning of the maximal 2319 // trail is marked as a start of a new comment section. 2320 // 2321 // For example in this code: 2322 // 2323 // int a; // line about a 2324 // // line 1 about b 2325 // // line 2 about b 2326 // int b; 2327 // 2328 // the two lines about b form a maximal trail, so there are two sections, the 2329 // first one consisting of the single comment "// line about a" and the 2330 // second one consisting of the next two comments. 2331 if (Comments.empty()) 2332 return; 2333 bool ShouldPushCommentsInCurrentLine = true; 2334 bool HasTrailAlignedWithNextToken = false; 2335 unsigned StartOfTrailAlignedWithNextToken = 0; 2336 if (NextTok) { 2337 // We are skipping the first element intentionally. 2338 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2339 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2340 HasTrailAlignedWithNextToken = true; 2341 StartOfTrailAlignedWithNextToken = i; 2342 } 2343 } 2344 } 2345 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2346 FormatToken *FormatTok = Comments[i]; 2347 if (HasTrailAlignedWithNextToken && 2348 i == StartOfTrailAlignedWithNextToken) { 2349 FormatTok->ContinuesLineCommentSection = false; 2350 } else { 2351 FormatTok->ContinuesLineCommentSection = 2352 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2353 } 2354 if (!FormatTok->ContinuesLineCommentSection && 2355 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2356 ShouldPushCommentsInCurrentLine = false; 2357 } 2358 if (ShouldPushCommentsInCurrentLine) { 2359 pushToken(FormatTok); 2360 } else { 2361 CommentsBeforeNextToken.push_back(FormatTok); 2362 } 2363 } 2364 } 2365 2366 void UnwrappedLineParser::readToken(int LevelDifference) { 2367 SmallVector<FormatToken *, 1> Comments; 2368 do { 2369 FormatTok = Tokens->getNextToken(); 2370 assert(FormatTok); 2371 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2372 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2373 distributeComments(Comments, FormatTok); 2374 Comments.clear(); 2375 // If there is an unfinished unwrapped line, we flush the preprocessor 2376 // directives only after that unwrapped line was finished later. 2377 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2378 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2379 assert((LevelDifference >= 0 || 2380 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2381 "LevelDifference makes Line->Level negative"); 2382 Line->Level += LevelDifference; 2383 // Comments stored before the preprocessor directive need to be output 2384 // before the preprocessor directive, at the same level as the 2385 // preprocessor directive, as we consider them to apply to the directive. 2386 flushComments(isOnNewLine(*FormatTok)); 2387 parsePPDirective(); 2388 } 2389 while (FormatTok->Type == TT_ConflictStart || 2390 FormatTok->Type == TT_ConflictEnd || 2391 FormatTok->Type == TT_ConflictAlternative) { 2392 if (FormatTok->Type == TT_ConflictStart) { 2393 conditionalCompilationStart(/*Unreachable=*/false); 2394 } else if (FormatTok->Type == TT_ConflictAlternative) { 2395 conditionalCompilationAlternative(); 2396 } else if (FormatTok->Type == TT_ConflictEnd) { 2397 conditionalCompilationEnd(); 2398 } 2399 FormatTok = Tokens->getNextToken(); 2400 FormatTok->MustBreakBefore = true; 2401 } 2402 2403 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 2404 !Line->InPPDirective) { 2405 continue; 2406 } 2407 2408 if (!FormatTok->Tok.is(tok::comment)) { 2409 distributeComments(Comments, FormatTok); 2410 Comments.clear(); 2411 return; 2412 } 2413 2414 Comments.push_back(FormatTok); 2415 } while (!eof()); 2416 2417 distributeComments(Comments, nullptr); 2418 Comments.clear(); 2419 } 2420 2421 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2422 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2423 if (MustBreakBeforeNextToken) { 2424 Line->Tokens.back().Tok->MustBreakBefore = true; 2425 MustBreakBeforeNextToken = false; 2426 } 2427 } 2428 2429 } // end namespace format 2430 } // end namespace clang 2431