1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 static bool isLineComment(const FormatToken &FormatTok) { 59 return FormatTok.is(tok::comment) && 60 FormatTok.TokenText.startswith("//"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 TokenSource = this; 86 Line.Level = 0; 87 Line.InPPDirective = true; 88 } 89 90 ~ScopedMacroState() override { 91 TokenSource = PreviousTokenSource; 92 ResetToken = Token; 93 Line.InPPDirective = false; 94 Line.Level = PreviousLineLevel; 95 } 96 97 FormatToken *getNextToken() override { 98 // The \c UnwrappedLineParser guards against this by never calling 99 // \c getNextToken() after it has encountered the first eof token. 100 assert(!eof()); 101 PreviousToken = Token; 102 Token = PreviousTokenSource->getNextToken(); 103 if (eof()) 104 return getFakeEOF(); 105 return Token; 106 } 107 108 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 109 110 FormatToken *setPosition(unsigned Position) override { 111 PreviousToken = nullptr; 112 Token = PreviousTokenSource->setPosition(Position); 113 return Token; 114 } 115 116 private: 117 bool eof() { 118 return Token && Token->HasUnescapedNewline && 119 !continuesLineComment(*Token, PreviousToken, 120 /*MinColumnToken=*/PreviousToken); 121 } 122 123 FormatToken *getFakeEOF() { 124 static bool EOFInitialized = false; 125 static FormatToken FormatTok; 126 if (!EOFInitialized) { 127 FormatTok.Tok.startToken(); 128 FormatTok.Tok.setKind(tok::eof); 129 EOFInitialized = true; 130 } 131 return &FormatTok; 132 } 133 134 UnwrappedLine &Line; 135 FormatTokenSource *&TokenSource; 136 FormatToken *&ResetToken; 137 unsigned PreviousLineLevel; 138 FormatTokenSource *PreviousTokenSource; 139 140 FormatToken *Token; 141 FormatToken *PreviousToken; 142 }; 143 144 } // end anonymous namespace 145 146 class ScopedLineState { 147 public: 148 ScopedLineState(UnwrappedLineParser &Parser, 149 bool SwitchToPreprocessorLines = false) 150 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 151 if (SwitchToPreprocessorLines) 152 Parser.CurrentLines = &Parser.PreprocessorDirectives; 153 else if (!Parser.Line->Tokens.empty()) 154 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 155 PreBlockLine = std::move(Parser.Line); 156 Parser.Line = llvm::make_unique<UnwrappedLine>(); 157 Parser.Line->Level = PreBlockLine->Level; 158 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 159 } 160 161 ~ScopedLineState() { 162 if (!Parser.Line->Tokens.empty()) { 163 Parser.addUnwrappedLine(); 164 } 165 assert(Parser.Line->Tokens.empty()); 166 Parser.Line = std::move(PreBlockLine); 167 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 168 Parser.MustBreakBeforeNextToken = true; 169 Parser.CurrentLines = OriginalLines; 170 } 171 172 private: 173 UnwrappedLineParser &Parser; 174 175 std::unique_ptr<UnwrappedLine> PreBlockLine; 176 SmallVectorImpl<UnwrappedLine> *OriginalLines; 177 }; 178 179 class CompoundStatementIndenter { 180 public: 181 CompoundStatementIndenter(UnwrappedLineParser *Parser, 182 const FormatStyle &Style, unsigned &LineLevel) 183 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 184 if (Style.BraceWrapping.AfterControlStatement) 185 Parser->addUnwrappedLine(); 186 if (Style.BraceWrapping.IndentBraces) 187 ++LineLevel; 188 } 189 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 190 191 private: 192 unsigned &LineLevel; 193 unsigned OldLineLevel; 194 }; 195 196 namespace { 197 198 class IndexedTokenSource : public FormatTokenSource { 199 public: 200 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 201 : Tokens(Tokens), Position(-1) {} 202 203 FormatToken *getNextToken() override { 204 ++Position; 205 return Tokens[Position]; 206 } 207 208 unsigned getPosition() override { 209 assert(Position >= 0); 210 return Position; 211 } 212 213 FormatToken *setPosition(unsigned P) override { 214 Position = P; 215 return Tokens[Position]; 216 } 217 218 void reset() { Position = -1; } 219 220 private: 221 ArrayRef<FormatToken *> Tokens; 222 int Position; 223 }; 224 225 } // end anonymous namespace 226 227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 228 const AdditionalKeywords &Keywords, 229 ArrayRef<FormatToken *> Tokens, 230 UnwrappedLineConsumer &Callback) 231 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 232 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 233 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 234 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} 235 236 void UnwrappedLineParser::reset() { 237 PPBranchLevel = -1; 238 Line.reset(new UnwrappedLine); 239 CommentsBeforeNextToken.clear(); 240 FormatTok = nullptr; 241 MustBreakBeforeNextToken = false; 242 PreprocessorDirectives.clear(); 243 CurrentLines = &Lines; 244 DeclarationScopeStack.clear(); 245 PPStack.clear(); 246 } 247 248 void UnwrappedLineParser::parse() { 249 IndexedTokenSource TokenSource(AllTokens); 250 do { 251 DEBUG(llvm::dbgs() << "----\n"); 252 reset(); 253 Tokens = &TokenSource; 254 TokenSource.reset(); 255 256 readToken(); 257 parseFile(); 258 // Create line with eof token. 259 pushToken(FormatTok); 260 addUnwrappedLine(); 261 262 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 263 E = Lines.end(); 264 I != E; ++I) { 265 Callback.consumeUnwrappedLine(*I); 266 } 267 Callback.finishRun(); 268 Lines.clear(); 269 while (!PPLevelBranchIndex.empty() && 270 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 271 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 272 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 273 } 274 if (!PPLevelBranchIndex.empty()) { 275 ++PPLevelBranchIndex.back(); 276 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 277 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 278 } 279 } while (!PPLevelBranchIndex.empty()); 280 } 281 282 void UnwrappedLineParser::parseFile() { 283 // The top-level context in a file always has declarations, except for pre- 284 // processor directives and JavaScript files. 285 bool MustBeDeclaration = 286 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 287 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 288 MustBeDeclaration); 289 if (Style.Language == FormatStyle::LK_TextProto) 290 parseBracedList(); 291 else 292 parseLevel(/*HasOpeningBrace=*/false); 293 // Make sure to format the remaining tokens. 294 flushComments(true); 295 addUnwrappedLine(); 296 } 297 298 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 299 bool SwitchLabelEncountered = false; 300 do { 301 tok::TokenKind kind = FormatTok->Tok.getKind(); 302 if (FormatTok->Type == TT_MacroBlockBegin) { 303 kind = tok::l_brace; 304 } else if (FormatTok->Type == TT_MacroBlockEnd) { 305 kind = tok::r_brace; 306 } 307 308 switch (kind) { 309 case tok::comment: 310 nextToken(); 311 addUnwrappedLine(); 312 break; 313 case tok::l_brace: 314 // FIXME: Add parameter whether this can happen - if this happens, we must 315 // be in a non-declaration context. 316 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 317 continue; 318 parseBlock(/*MustBeDeclaration=*/false); 319 addUnwrappedLine(); 320 break; 321 case tok::r_brace: 322 if (HasOpeningBrace) 323 return; 324 nextToken(); 325 addUnwrappedLine(); 326 break; 327 case tok::kw_default: 328 case tok::kw_case: 329 if (!SwitchLabelEncountered && 330 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 331 ++Line->Level; 332 SwitchLabelEncountered = true; 333 parseStructuralElement(); 334 break; 335 default: 336 parseStructuralElement(); 337 break; 338 } 339 } while (!eof()); 340 } 341 342 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 343 // We'll parse forward through the tokens until we hit 344 // a closing brace or eof - note that getNextToken() will 345 // parse macros, so this will magically work inside macro 346 // definitions, too. 347 unsigned StoredPosition = Tokens->getPosition(); 348 FormatToken *Tok = FormatTok; 349 const FormatToken *PrevTok = getPreviousToken(); 350 // Keep a stack of positions of lbrace tokens. We will 351 // update information about whether an lbrace starts a 352 // braced init list or a different block during the loop. 353 SmallVector<FormatToken *, 8> LBraceStack; 354 assert(Tok->Tok.is(tok::l_brace)); 355 do { 356 // Get next non-comment token. 357 FormatToken *NextTok; 358 unsigned ReadTokens = 0; 359 do { 360 NextTok = Tokens->getNextToken(); 361 ++ReadTokens; 362 } while (NextTok->is(tok::comment)); 363 364 switch (Tok->Tok.getKind()) { 365 case tok::l_brace: 366 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 367 if (PrevTok->is(tok::colon)) 368 // A colon indicates this code is in a type, or a braced list 369 // following a label in an object literal ({a: {b: 1}}). The code 370 // below could be confused by semicolons between the individual 371 // members in a type member list, which would normally trigger 372 // BK_Block. In both cases, this must be parsed as an inline braced 373 // init. 374 Tok->BlockKind = BK_BracedInit; 375 else if (PrevTok->is(tok::r_paren)) 376 // `) { }` can only occur in function or method declarations in JS. 377 Tok->BlockKind = BK_Block; 378 } else { 379 Tok->BlockKind = BK_Unknown; 380 } 381 LBraceStack.push_back(Tok); 382 break; 383 case tok::r_brace: 384 if (LBraceStack.empty()) 385 break; 386 if (LBraceStack.back()->BlockKind == BK_Unknown) { 387 bool ProbablyBracedList = false; 388 if (Style.Language == FormatStyle::LK_Proto) { 389 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 390 } else { 391 // Using OriginalColumn to distinguish between ObjC methods and 392 // binary operators is a bit hacky. 393 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 394 NextTok->OriginalColumn == 0; 395 396 // If there is a comma, semicolon or right paren after the closing 397 // brace, we assume this is a braced initializer list. Note that 398 // regardless how we mark inner braces here, we will overwrite the 399 // BlockKind later if we parse a braced list (where all blocks 400 // inside are by default braced lists), or when we explicitly detect 401 // blocks (for example while parsing lambdas). 402 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 403 // braced list in JS. 404 ProbablyBracedList = 405 (Style.Language == FormatStyle::LK_JavaScript && 406 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 407 Keywords.kw_as)) || 408 (Style.isCpp() && NextTok->is(tok::l_paren)) || 409 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 410 tok::r_paren, tok::r_square, tok::l_brace, 411 tok::l_square, tok::ellipsis) || 412 (NextTok->is(tok::identifier) && 413 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 414 (NextTok->is(tok::semi) && 415 (!ExpectClassBody || LBraceStack.size() != 1)) || 416 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 417 } 418 if (ProbablyBracedList) { 419 Tok->BlockKind = BK_BracedInit; 420 LBraceStack.back()->BlockKind = BK_BracedInit; 421 } else { 422 Tok->BlockKind = BK_Block; 423 LBraceStack.back()->BlockKind = BK_Block; 424 } 425 } 426 LBraceStack.pop_back(); 427 break; 428 case tok::at: 429 case tok::semi: 430 case tok::kw_if: 431 case tok::kw_while: 432 case tok::kw_for: 433 case tok::kw_switch: 434 case tok::kw_try: 435 case tok::kw___try: 436 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 437 LBraceStack.back()->BlockKind = BK_Block; 438 break; 439 default: 440 break; 441 } 442 PrevTok = Tok; 443 Tok = NextTok; 444 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 445 446 // Assume other blocks for all unclosed opening braces. 447 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 448 if (LBraceStack[i]->BlockKind == BK_Unknown) 449 LBraceStack[i]->BlockKind = BK_Block; 450 } 451 452 FormatTok = Tokens->setPosition(StoredPosition); 453 } 454 455 template <class T> 456 static inline void hash_combine(std::size_t &seed, const T &v) { 457 std::hash<T> hasher; 458 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 459 } 460 461 size_t UnwrappedLineParser::computePPHash() const { 462 size_t h = 0; 463 for (const auto &i : PPStack) { 464 hash_combine(h, size_t(i.Kind)); 465 hash_combine(h, i.Line); 466 } 467 return h; 468 } 469 470 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 471 bool MunchSemi) { 472 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 473 "'{' or macro block token expected"); 474 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 475 FormatTok->BlockKind = BK_Block; 476 477 size_t PPStartHash = computePPHash(); 478 479 unsigned InitialLevel = Line->Level; 480 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 481 482 if (MacroBlock && FormatTok->is(tok::l_paren)) 483 parseParens(); 484 485 size_t NbPreprocessorDirectives = 486 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 487 addUnwrappedLine(); 488 size_t OpeningLineIndex = 489 CurrentLines->empty() 490 ? (UnwrappedLine::kInvalidIndex) 491 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 492 493 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 494 MustBeDeclaration); 495 if (AddLevel) 496 ++Line->Level; 497 parseLevel(/*HasOpeningBrace=*/true); 498 499 if (eof()) 500 return; 501 502 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 503 : !FormatTok->is(tok::r_brace)) { 504 Line->Level = InitialLevel; 505 FormatTok->BlockKind = BK_Block; 506 return; 507 } 508 509 size_t PPEndHash = computePPHash(); 510 511 // Munch the closing brace. 512 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 513 514 if (MacroBlock && FormatTok->is(tok::l_paren)) 515 parseParens(); 516 517 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 518 nextToken(); 519 Line->Level = InitialLevel; 520 521 if (PPStartHash == PPEndHash) { 522 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 523 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 524 // Update the opening line to add the forward reference as well 525 (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = 526 CurrentLines->size() - 1; 527 } 528 } 529 } 530 531 static bool isGoogScope(const UnwrappedLine &Line) { 532 // FIXME: Closure-library specific stuff should not be hard-coded but be 533 // configurable. 534 if (Line.Tokens.size() < 4) 535 return false; 536 auto I = Line.Tokens.begin(); 537 if (I->Tok->TokenText != "goog") 538 return false; 539 ++I; 540 if (I->Tok->isNot(tok::period)) 541 return false; 542 ++I; 543 if (I->Tok->TokenText != "scope") 544 return false; 545 ++I; 546 return I->Tok->is(tok::l_paren); 547 } 548 549 static bool isIIFE(const UnwrappedLine &Line, 550 const AdditionalKeywords &Keywords) { 551 // Look for the start of an immediately invoked anonymous function. 552 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 553 // This is commonly done in JavaScript to create a new, anonymous scope. 554 // Example: (function() { ... })() 555 if (Line.Tokens.size() < 3) 556 return false; 557 auto I = Line.Tokens.begin(); 558 if (I->Tok->isNot(tok::l_paren)) 559 return false; 560 ++I; 561 if (I->Tok->isNot(Keywords.kw_function)) 562 return false; 563 ++I; 564 return I->Tok->is(tok::l_paren); 565 } 566 567 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 568 const FormatToken &InitialToken) { 569 if (InitialToken.is(tok::kw_namespace)) 570 return Style.BraceWrapping.AfterNamespace; 571 if (InitialToken.is(tok::kw_class)) 572 return Style.BraceWrapping.AfterClass; 573 if (InitialToken.is(tok::kw_union)) 574 return Style.BraceWrapping.AfterUnion; 575 if (InitialToken.is(tok::kw_struct)) 576 return Style.BraceWrapping.AfterStruct; 577 return false; 578 } 579 580 void UnwrappedLineParser::parseChildBlock() { 581 FormatTok->BlockKind = BK_Block; 582 nextToken(); 583 { 584 bool SkipIndent = 585 (Style.Language == FormatStyle::LK_JavaScript && 586 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 587 ScopedLineState LineState(*this); 588 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 589 /*MustBeDeclaration=*/false); 590 Line->Level += SkipIndent ? 0 : 1; 591 parseLevel(/*HasOpeningBrace=*/true); 592 flushComments(isOnNewLine(*FormatTok)); 593 Line->Level -= SkipIndent ? 0 : 1; 594 } 595 nextToken(); 596 } 597 598 void UnwrappedLineParser::parsePPDirective() { 599 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 600 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 601 nextToken(); 602 603 if (!FormatTok->Tok.getIdentifierInfo()) { 604 parsePPUnknown(); 605 return; 606 } 607 608 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 609 case tok::pp_define: 610 parsePPDefine(); 611 return; 612 case tok::pp_if: 613 parsePPIf(/*IfDef=*/false); 614 break; 615 case tok::pp_ifdef: 616 case tok::pp_ifndef: 617 parsePPIf(/*IfDef=*/true); 618 break; 619 case tok::pp_else: 620 parsePPElse(); 621 break; 622 case tok::pp_elif: 623 parsePPElIf(); 624 break; 625 case tok::pp_endif: 626 parsePPEndIf(); 627 break; 628 default: 629 parsePPUnknown(); 630 break; 631 } 632 } 633 634 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 635 size_t Line = CurrentLines->size(); 636 if (CurrentLines == &PreprocessorDirectives) 637 Line += Lines.size(); 638 639 if (Unreachable || 640 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 641 PPStack.push_back({PP_Unreachable, Line}); 642 else 643 PPStack.push_back({PP_Conditional, Line}); 644 } 645 646 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 647 ++PPBranchLevel; 648 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 649 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 650 PPLevelBranchIndex.push_back(0); 651 PPLevelBranchCount.push_back(0); 652 } 653 PPChainBranchIndex.push(0); 654 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 655 conditionalCompilationCondition(Unreachable || Skip); 656 } 657 658 void UnwrappedLineParser::conditionalCompilationAlternative() { 659 if (!PPStack.empty()) 660 PPStack.pop_back(); 661 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 662 if (!PPChainBranchIndex.empty()) 663 ++PPChainBranchIndex.top(); 664 conditionalCompilationCondition( 665 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 666 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 667 } 668 669 void UnwrappedLineParser::conditionalCompilationEnd() { 670 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 671 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 672 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 673 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 674 } 675 } 676 // Guard against #endif's without #if. 677 if (PPBranchLevel > 0) 678 --PPBranchLevel; 679 if (!PPChainBranchIndex.empty()) 680 PPChainBranchIndex.pop(); 681 if (!PPStack.empty()) 682 PPStack.pop_back(); 683 } 684 685 void UnwrappedLineParser::parsePPIf(bool IfDef) { 686 bool IfNDef = FormatTok->is(tok::pp_ifndef); 687 nextToken(); 688 bool Unreachable = false; 689 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 690 Unreachable = true; 691 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 692 Unreachable = true; 693 conditionalCompilationStart(Unreachable); 694 parsePPUnknown(); 695 } 696 697 void UnwrappedLineParser::parsePPElse() { 698 conditionalCompilationAlternative(); 699 parsePPUnknown(); 700 } 701 702 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 703 704 void UnwrappedLineParser::parsePPEndIf() { 705 conditionalCompilationEnd(); 706 parsePPUnknown(); 707 } 708 709 void UnwrappedLineParser::parsePPDefine() { 710 nextToken(); 711 712 if (FormatTok->Tok.getKind() != tok::identifier) { 713 parsePPUnknown(); 714 return; 715 } 716 nextToken(); 717 if (FormatTok->Tok.getKind() == tok::l_paren && 718 FormatTok->WhitespaceRange.getBegin() == 719 FormatTok->WhitespaceRange.getEnd()) { 720 parseParens(); 721 } 722 addUnwrappedLine(); 723 Line->Level = 1; 724 725 // Errors during a preprocessor directive can only affect the layout of the 726 // preprocessor directive, and thus we ignore them. An alternative approach 727 // would be to use the same approach we use on the file level (no 728 // re-indentation if there was a structural error) within the macro 729 // definition. 730 parseFile(); 731 } 732 733 void UnwrappedLineParser::parsePPUnknown() { 734 do { 735 nextToken(); 736 } while (!eof()); 737 addUnwrappedLine(); 738 } 739 740 // Here we blacklist certain tokens that are not usually the first token in an 741 // unwrapped line. This is used in attempt to distinguish macro calls without 742 // trailing semicolons from other constructs split to several lines. 743 static bool tokenCanStartNewLine(const clang::Token &Tok) { 744 // Semicolon can be a null-statement, l_square can be a start of a macro or 745 // a C++11 attribute, but this doesn't seem to be common. 746 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 747 Tok.isNot(tok::l_square) && 748 // Tokens that can only be used as binary operators and a part of 749 // overloaded operator names. 750 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 751 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 752 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 753 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 754 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 755 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 756 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 757 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 758 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 759 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 760 Tok.isNot(tok::lesslessequal) && 761 // Colon is used in labels, base class lists, initializer lists, 762 // range-based for loops, ternary operator, but should never be the 763 // first token in an unwrapped line. 764 Tok.isNot(tok::colon) && 765 // 'noexcept' is a trailing annotation. 766 Tok.isNot(tok::kw_noexcept); 767 } 768 769 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 770 const FormatToken *FormatTok) { 771 // FIXME: This returns true for C/C++ keywords like 'struct'. 772 return FormatTok->is(tok::identifier) && 773 (FormatTok->Tok.getIdentifierInfo() == nullptr || 774 !FormatTok->isOneOf( 775 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 776 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 777 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 778 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 779 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 780 Keywords.kw_instanceof, Keywords.kw_interface, 781 Keywords.kw_throws, Keywords.kw_from)); 782 } 783 784 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 785 const FormatToken *FormatTok) { 786 return FormatTok->Tok.isLiteral() || 787 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 788 mustBeJSIdent(Keywords, FormatTok); 789 } 790 791 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 792 // when encountered after a value (see mustBeJSIdentOrValue). 793 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 794 const FormatToken *FormatTok) { 795 return FormatTok->isOneOf( 796 tok::kw_return, Keywords.kw_yield, 797 // conditionals 798 tok::kw_if, tok::kw_else, 799 // loops 800 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 801 // switch/case 802 tok::kw_switch, tok::kw_case, 803 // exceptions 804 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 805 // declaration 806 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 807 Keywords.kw_async, Keywords.kw_function, 808 // import/export 809 Keywords.kw_import, tok::kw_export); 810 } 811 812 // readTokenWithJavaScriptASI reads the next token and terminates the current 813 // line if JavaScript Automatic Semicolon Insertion must 814 // happen between the current token and the next token. 815 // 816 // This method is conservative - it cannot cover all edge cases of JavaScript, 817 // but only aims to correctly handle certain well known cases. It *must not* 818 // return true in speculative cases. 819 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 820 FormatToken *Previous = FormatTok; 821 readToken(); 822 FormatToken *Next = FormatTok; 823 824 bool IsOnSameLine = 825 CommentsBeforeNextToken.empty() 826 ? Next->NewlinesBefore == 0 827 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 828 if (IsOnSameLine) 829 return; 830 831 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 832 bool PreviousStartsTemplateExpr = 833 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 834 if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { 835 // If the token before the previous one is an '@', the previous token is an 836 // annotation and can precede another identifier/value. 837 const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; 838 if (PrePrevious->is(tok::at)) 839 return; 840 } 841 if (Next->is(tok::exclaim) && PreviousMustBeValue) 842 return addUnwrappedLine(); 843 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 844 bool NextEndsTemplateExpr = 845 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 846 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 847 (PreviousMustBeValue || 848 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 849 tok::minusminus))) 850 return addUnwrappedLine(); 851 if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) 852 return addUnwrappedLine(); 853 } 854 855 void UnwrappedLineParser::parseStructuralElement() { 856 assert(!FormatTok->is(tok::l_brace)); 857 if (Style.Language == FormatStyle::LK_TableGen && 858 FormatTok->is(tok::pp_include)) { 859 nextToken(); 860 if (FormatTok->is(tok::string_literal)) 861 nextToken(); 862 addUnwrappedLine(); 863 return; 864 } 865 switch (FormatTok->Tok.getKind()) { 866 case tok::at: 867 nextToken(); 868 if (FormatTok->Tok.is(tok::l_brace)) { 869 nextToken(); 870 parseBracedList(); 871 break; 872 } 873 switch (FormatTok->Tok.getObjCKeywordID()) { 874 case tok::objc_public: 875 case tok::objc_protected: 876 case tok::objc_package: 877 case tok::objc_private: 878 return parseAccessSpecifier(); 879 case tok::objc_interface: 880 case tok::objc_implementation: 881 return parseObjCInterfaceOrImplementation(); 882 case tok::objc_protocol: 883 return parseObjCProtocol(); 884 case tok::objc_end: 885 return; // Handled by the caller. 886 case tok::objc_optional: 887 case tok::objc_required: 888 nextToken(); 889 addUnwrappedLine(); 890 return; 891 case tok::objc_autoreleasepool: 892 nextToken(); 893 if (FormatTok->Tok.is(tok::l_brace)) { 894 if (Style.BraceWrapping.AfterObjCDeclaration) 895 addUnwrappedLine(); 896 parseBlock(/*MustBeDeclaration=*/false); 897 } 898 addUnwrappedLine(); 899 return; 900 case tok::objc_try: 901 // This branch isn't strictly necessary (the kw_try case below would 902 // do this too after the tok::at is parsed above). But be explicit. 903 parseTryCatch(); 904 return; 905 default: 906 break; 907 } 908 break; 909 case tok::kw_asm: 910 nextToken(); 911 if (FormatTok->is(tok::l_brace)) { 912 FormatTok->Type = TT_InlineASMBrace; 913 nextToken(); 914 while (FormatTok && FormatTok->isNot(tok::eof)) { 915 if (FormatTok->is(tok::r_brace)) { 916 FormatTok->Type = TT_InlineASMBrace; 917 nextToken(); 918 addUnwrappedLine(); 919 break; 920 } 921 FormatTok->Finalized = true; 922 nextToken(); 923 } 924 } 925 break; 926 case tok::kw_namespace: 927 parseNamespace(); 928 return; 929 case tok::kw_inline: 930 nextToken(); 931 if (FormatTok->Tok.is(tok::kw_namespace)) { 932 parseNamespace(); 933 return; 934 } 935 break; 936 case tok::kw_public: 937 case tok::kw_protected: 938 case tok::kw_private: 939 if (Style.Language == FormatStyle::LK_Java || 940 Style.Language == FormatStyle::LK_JavaScript) 941 nextToken(); 942 else 943 parseAccessSpecifier(); 944 return; 945 case tok::kw_if: 946 parseIfThenElse(); 947 return; 948 case tok::kw_for: 949 case tok::kw_while: 950 parseForOrWhileLoop(); 951 return; 952 case tok::kw_do: 953 parseDoWhile(); 954 return; 955 case tok::kw_switch: 956 parseSwitch(); 957 return; 958 case tok::kw_default: 959 nextToken(); 960 parseLabel(); 961 return; 962 case tok::kw_case: 963 parseCaseLabel(); 964 return; 965 case tok::kw_try: 966 case tok::kw___try: 967 parseTryCatch(); 968 return; 969 case tok::kw_extern: 970 nextToken(); 971 if (FormatTok->Tok.is(tok::string_literal)) { 972 nextToken(); 973 if (FormatTok->Tok.is(tok::l_brace)) { 974 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 975 addUnwrappedLine(); 976 return; 977 } 978 } 979 break; 980 case tok::kw_export: 981 if (Style.Language == FormatStyle::LK_JavaScript) { 982 parseJavaScriptEs6ImportExport(); 983 return; 984 } 985 break; 986 case tok::identifier: 987 if (FormatTok->is(TT_ForEachMacro)) { 988 parseForOrWhileLoop(); 989 return; 990 } 991 if (FormatTok->is(TT_MacroBlockBegin)) { 992 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 993 /*MunchSemi=*/false); 994 return; 995 } 996 if (FormatTok->is(Keywords.kw_import)) { 997 if (Style.Language == FormatStyle::LK_JavaScript) { 998 parseJavaScriptEs6ImportExport(); 999 return; 1000 } 1001 if (Style.Language == FormatStyle::LK_Proto) { 1002 nextToken(); 1003 if (FormatTok->is(tok::kw_public)) 1004 nextToken(); 1005 if (!FormatTok->is(tok::string_literal)) 1006 return; 1007 nextToken(); 1008 if (FormatTok->is(tok::semi)) 1009 nextToken(); 1010 addUnwrappedLine(); 1011 return; 1012 } 1013 } 1014 if (Style.isCpp() && 1015 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1016 Keywords.kw_slots, Keywords.kw_qslots)) { 1017 nextToken(); 1018 if (FormatTok->is(tok::colon)) { 1019 nextToken(); 1020 addUnwrappedLine(); 1021 return; 1022 } 1023 } 1024 // In all other cases, parse the declaration. 1025 break; 1026 default: 1027 break; 1028 } 1029 do { 1030 const FormatToken *Previous = getPreviousToken(); 1031 switch (FormatTok->Tok.getKind()) { 1032 case tok::at: 1033 nextToken(); 1034 if (FormatTok->Tok.is(tok::l_brace)) { 1035 nextToken(); 1036 parseBracedList(); 1037 } 1038 break; 1039 case tok::kw_enum: 1040 // Ignore if this is part of "template <enum ...". 1041 if (Previous && Previous->is(tok::less)) { 1042 nextToken(); 1043 break; 1044 } 1045 1046 // parseEnum falls through and does not yet add an unwrapped line as an 1047 // enum definition can start a structural element. 1048 if (!parseEnum()) 1049 break; 1050 // This only applies for C++. 1051 if (!Style.isCpp()) { 1052 addUnwrappedLine(); 1053 return; 1054 } 1055 break; 1056 case tok::kw_typedef: 1057 nextToken(); 1058 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1059 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1060 parseEnum(); 1061 break; 1062 case tok::kw_struct: 1063 case tok::kw_union: 1064 case tok::kw_class: 1065 // parseRecord falls through and does not yet add an unwrapped line as a 1066 // record declaration or definition can start a structural element. 1067 parseRecord(); 1068 // This does not apply for Java and JavaScript. 1069 if (Style.Language == FormatStyle::LK_Java || 1070 Style.Language == FormatStyle::LK_JavaScript) { 1071 if (FormatTok->is(tok::semi)) 1072 nextToken(); 1073 addUnwrappedLine(); 1074 return; 1075 } 1076 break; 1077 case tok::period: 1078 nextToken(); 1079 // In Java, classes have an implicit static member "class". 1080 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1081 FormatTok->is(tok::kw_class)) 1082 nextToken(); 1083 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1084 FormatTok->Tok.getIdentifierInfo()) 1085 // JavaScript only has pseudo keywords, all keywords are allowed to 1086 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1087 nextToken(); 1088 break; 1089 case tok::semi: 1090 nextToken(); 1091 addUnwrappedLine(); 1092 return; 1093 case tok::r_brace: 1094 addUnwrappedLine(); 1095 return; 1096 case tok::l_paren: 1097 parseParens(); 1098 break; 1099 case tok::kw_operator: 1100 nextToken(); 1101 if (FormatTok->isBinaryOperator()) 1102 nextToken(); 1103 break; 1104 case tok::caret: 1105 nextToken(); 1106 if (FormatTok->Tok.isAnyIdentifier() || 1107 FormatTok->isSimpleTypeSpecifier()) 1108 nextToken(); 1109 if (FormatTok->is(tok::l_paren)) 1110 parseParens(); 1111 if (FormatTok->is(tok::l_brace)) 1112 parseChildBlock(); 1113 break; 1114 case tok::l_brace: 1115 if (!tryToParseBracedList()) { 1116 // A block outside of parentheses must be the last part of a 1117 // structural element. 1118 // FIXME: Figure out cases where this is not true, and add projections 1119 // for them (the one we know is missing are lambdas). 1120 if (Style.BraceWrapping.AfterFunction) 1121 addUnwrappedLine(); 1122 FormatTok->Type = TT_FunctionLBrace; 1123 parseBlock(/*MustBeDeclaration=*/false); 1124 addUnwrappedLine(); 1125 return; 1126 } 1127 // Otherwise this was a braced init list, and the structural 1128 // element continues. 1129 break; 1130 case tok::kw_try: 1131 // We arrive here when parsing function-try blocks. 1132 parseTryCatch(); 1133 return; 1134 case tok::identifier: { 1135 if (FormatTok->is(TT_MacroBlockEnd)) { 1136 addUnwrappedLine(); 1137 return; 1138 } 1139 1140 // Function declarations (as opposed to function expressions) are parsed 1141 // on their own unwrapped line by continuing this loop. Function 1142 // expressions (functions that are not on their own line) must not create 1143 // a new unwrapped line, so they are special cased below. 1144 size_t TokenCount = Line->Tokens.size(); 1145 if (Style.Language == FormatStyle::LK_JavaScript && 1146 FormatTok->is(Keywords.kw_function) && 1147 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1148 Keywords.kw_async)))) { 1149 tryToParseJSFunction(); 1150 break; 1151 } 1152 if ((Style.Language == FormatStyle::LK_JavaScript || 1153 Style.Language == FormatStyle::LK_Java) && 1154 FormatTok->is(Keywords.kw_interface)) { 1155 if (Style.Language == FormatStyle::LK_JavaScript) { 1156 // In JavaScript/TypeScript, "interface" can be used as a standalone 1157 // identifier, e.g. in `var interface = 1;`. If "interface" is 1158 // followed by another identifier, it is very like to be an actual 1159 // interface declaration. 1160 unsigned StoredPosition = Tokens->getPosition(); 1161 FormatToken *Next = Tokens->getNextToken(); 1162 FormatTok = Tokens->setPosition(StoredPosition); 1163 if (Next && !mustBeJSIdent(Keywords, Next)) { 1164 nextToken(); 1165 break; 1166 } 1167 } 1168 parseRecord(); 1169 addUnwrappedLine(); 1170 return; 1171 } 1172 1173 // See if the following token should start a new unwrapped line. 1174 StringRef Text = FormatTok->TokenText; 1175 nextToken(); 1176 if (Line->Tokens.size() == 1 && 1177 // JS doesn't have macros, and within classes colons indicate fields, 1178 // not labels. 1179 Style.Language != FormatStyle::LK_JavaScript) { 1180 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1181 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1182 parseLabel(); 1183 return; 1184 } 1185 // Recognize function-like macro usages without trailing semicolon as 1186 // well as free-standing macros like Q_OBJECT. 1187 bool FunctionLike = FormatTok->is(tok::l_paren); 1188 if (FunctionLike) 1189 parseParens(); 1190 1191 bool FollowedByNewline = 1192 CommentsBeforeNextToken.empty() 1193 ? FormatTok->NewlinesBefore > 0 1194 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1195 1196 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1197 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1198 addUnwrappedLine(); 1199 return; 1200 } 1201 } 1202 break; 1203 } 1204 case tok::equal: 1205 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1206 // TT_JsFatArrow. The always start an expression or a child block if 1207 // followed by a curly. 1208 if (FormatTok->is(TT_JsFatArrow)) { 1209 nextToken(); 1210 if (FormatTok->is(tok::l_brace)) 1211 parseChildBlock(); 1212 break; 1213 } 1214 1215 nextToken(); 1216 if (FormatTok->Tok.is(tok::l_brace)) { 1217 nextToken(); 1218 parseBracedList(); 1219 } else if (Style.Language == FormatStyle::LK_Proto && 1220 FormatTok->Tok.is(tok::less)) { 1221 nextToken(); 1222 parseBracedList(/*ContinueOnSemicolons=*/false, 1223 /*ClosingBraceKind=*/tok::greater); 1224 } 1225 break; 1226 case tok::l_square: 1227 parseSquare(); 1228 break; 1229 case tok::kw_new: 1230 parseNew(); 1231 break; 1232 default: 1233 nextToken(); 1234 break; 1235 } 1236 } while (!eof()); 1237 } 1238 1239 bool UnwrappedLineParser::tryToParseLambda() { 1240 if (!Style.isCpp()) { 1241 nextToken(); 1242 return false; 1243 } 1244 const FormatToken* Previous = getPreviousToken(); 1245 if (Previous && 1246 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1247 tok::kw_delete) || 1248 Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { 1249 nextToken(); 1250 return false; 1251 } 1252 assert(FormatTok->is(tok::l_square)); 1253 FormatToken &LSquare = *FormatTok; 1254 if (!tryToParseLambdaIntroducer()) 1255 return false; 1256 1257 while (FormatTok->isNot(tok::l_brace)) { 1258 if (FormatTok->isSimpleTypeSpecifier()) { 1259 nextToken(); 1260 continue; 1261 } 1262 switch (FormatTok->Tok.getKind()) { 1263 case tok::l_brace: 1264 break; 1265 case tok::l_paren: 1266 parseParens(); 1267 break; 1268 case tok::amp: 1269 case tok::star: 1270 case tok::kw_const: 1271 case tok::comma: 1272 case tok::less: 1273 case tok::greater: 1274 case tok::identifier: 1275 case tok::numeric_constant: 1276 case tok::coloncolon: 1277 case tok::kw_mutable: 1278 nextToken(); 1279 break; 1280 case tok::arrow: 1281 FormatTok->Type = TT_LambdaArrow; 1282 nextToken(); 1283 break; 1284 default: 1285 return true; 1286 } 1287 } 1288 LSquare.Type = TT_LambdaLSquare; 1289 parseChildBlock(); 1290 return true; 1291 } 1292 1293 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1294 nextToken(); 1295 if (FormatTok->is(tok::equal)) { 1296 nextToken(); 1297 if (FormatTok->is(tok::r_square)) { 1298 nextToken(); 1299 return true; 1300 } 1301 if (FormatTok->isNot(tok::comma)) 1302 return false; 1303 nextToken(); 1304 } else if (FormatTok->is(tok::amp)) { 1305 nextToken(); 1306 if (FormatTok->is(tok::r_square)) { 1307 nextToken(); 1308 return true; 1309 } 1310 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 1311 return false; 1312 } 1313 if (FormatTok->is(tok::comma)) 1314 nextToken(); 1315 } else if (FormatTok->is(tok::r_square)) { 1316 nextToken(); 1317 return true; 1318 } 1319 do { 1320 if (FormatTok->is(tok::amp)) 1321 nextToken(); 1322 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 1323 return false; 1324 nextToken(); 1325 if (FormatTok->is(tok::ellipsis)) 1326 nextToken(); 1327 if (FormatTok->is(tok::comma)) { 1328 nextToken(); 1329 } else if (FormatTok->is(tok::r_square)) { 1330 nextToken(); 1331 return true; 1332 } else { 1333 return false; 1334 } 1335 } while (!eof()); 1336 return false; 1337 } 1338 1339 void UnwrappedLineParser::tryToParseJSFunction() { 1340 assert(FormatTok->is(Keywords.kw_function) || 1341 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1342 if (FormatTok->is(Keywords.kw_async)) 1343 nextToken(); 1344 // Consume "function". 1345 nextToken(); 1346 1347 // Consume * (generator function). Treat it like C++'s overloaded operators. 1348 if (FormatTok->is(tok::star)) { 1349 FormatTok->Type = TT_OverloadedOperator; 1350 nextToken(); 1351 } 1352 1353 // Consume function name. 1354 if (FormatTok->is(tok::identifier)) 1355 nextToken(); 1356 1357 if (FormatTok->isNot(tok::l_paren)) 1358 return; 1359 1360 // Parse formal parameter list. 1361 parseParens(); 1362 1363 if (FormatTok->is(tok::colon)) { 1364 // Parse a type definition. 1365 nextToken(); 1366 1367 // Eat the type declaration. For braced inline object types, balance braces, 1368 // otherwise just parse until finding an l_brace for the function body. 1369 if (FormatTok->is(tok::l_brace)) 1370 tryToParseBracedList(); 1371 else 1372 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1373 nextToken(); 1374 } 1375 1376 if (FormatTok->is(tok::semi)) 1377 return; 1378 1379 parseChildBlock(); 1380 } 1381 1382 bool UnwrappedLineParser::tryToParseBracedList() { 1383 if (FormatTok->BlockKind == BK_Unknown) 1384 calculateBraceTypes(); 1385 assert(FormatTok->BlockKind != BK_Unknown); 1386 if (FormatTok->BlockKind == BK_Block) 1387 return false; 1388 nextToken(); 1389 parseBracedList(); 1390 return true; 1391 } 1392 1393 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1394 tok::TokenKind ClosingBraceKind) { 1395 bool HasError = false; 1396 1397 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1398 // replace this by using parseAssigmentExpression() inside. 1399 do { 1400 if (Style.Language == FormatStyle::LK_JavaScript) { 1401 if (FormatTok->is(Keywords.kw_function) || 1402 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1403 tryToParseJSFunction(); 1404 continue; 1405 } 1406 if (FormatTok->is(TT_JsFatArrow)) { 1407 nextToken(); 1408 // Fat arrows can be followed by simple expressions or by child blocks 1409 // in curly braces. 1410 if (FormatTok->is(tok::l_brace)) { 1411 parseChildBlock(); 1412 continue; 1413 } 1414 } 1415 if (FormatTok->is(tok::l_brace)) { 1416 // Could be a method inside of a braced list `{a() { return 1; }}`. 1417 if (tryToParseBracedList()) 1418 continue; 1419 parseChildBlock(); 1420 } 1421 } 1422 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1423 nextToken(); 1424 return !HasError; 1425 } 1426 switch (FormatTok->Tok.getKind()) { 1427 case tok::caret: 1428 nextToken(); 1429 if (FormatTok->is(tok::l_brace)) { 1430 parseChildBlock(); 1431 } 1432 break; 1433 case tok::l_square: 1434 tryToParseLambda(); 1435 break; 1436 case tok::l_paren: 1437 parseParens(); 1438 // JavaScript can just have free standing methods and getters/setters in 1439 // object literals. Detect them by a "{" following ")". 1440 if (Style.Language == FormatStyle::LK_JavaScript) { 1441 if (FormatTok->is(tok::l_brace)) 1442 parseChildBlock(); 1443 break; 1444 } 1445 break; 1446 case tok::l_brace: 1447 // Assume there are no blocks inside a braced init list apart 1448 // from the ones we explicitly parse out (like lambdas). 1449 FormatTok->BlockKind = BK_BracedInit; 1450 nextToken(); 1451 parseBracedList(); 1452 break; 1453 case tok::semi: 1454 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1455 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1456 // used for error recovery if we have otherwise determined that this is 1457 // a braced list. 1458 if (Style.Language == FormatStyle::LK_JavaScript) { 1459 nextToken(); 1460 break; 1461 } 1462 HasError = true; 1463 if (!ContinueOnSemicolons) 1464 return !HasError; 1465 nextToken(); 1466 break; 1467 case tok::comma: 1468 nextToken(); 1469 break; 1470 default: 1471 nextToken(); 1472 break; 1473 } 1474 } while (!eof()); 1475 return false; 1476 } 1477 1478 void UnwrappedLineParser::parseParens() { 1479 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1480 nextToken(); 1481 do { 1482 switch (FormatTok->Tok.getKind()) { 1483 case tok::l_paren: 1484 parseParens(); 1485 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1486 parseChildBlock(); 1487 break; 1488 case tok::r_paren: 1489 nextToken(); 1490 return; 1491 case tok::r_brace: 1492 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1493 return; 1494 case tok::l_square: 1495 tryToParseLambda(); 1496 break; 1497 case tok::l_brace: 1498 if (!tryToParseBracedList()) 1499 parseChildBlock(); 1500 break; 1501 case tok::at: 1502 nextToken(); 1503 if (FormatTok->Tok.is(tok::l_brace)) { 1504 nextToken(); 1505 parseBracedList(); 1506 } 1507 break; 1508 case tok::kw_class: 1509 if (Style.Language == FormatStyle::LK_JavaScript) 1510 parseRecord(/*ParseAsExpr=*/true); 1511 else 1512 nextToken(); 1513 break; 1514 case tok::identifier: 1515 if (Style.Language == FormatStyle::LK_JavaScript && 1516 (FormatTok->is(Keywords.kw_function) || 1517 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1518 tryToParseJSFunction(); 1519 else 1520 nextToken(); 1521 break; 1522 default: 1523 nextToken(); 1524 break; 1525 } 1526 } while (!eof()); 1527 } 1528 1529 void UnwrappedLineParser::parseSquare() { 1530 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1531 if (tryToParseLambda()) 1532 return; 1533 do { 1534 switch (FormatTok->Tok.getKind()) { 1535 case tok::l_paren: 1536 parseParens(); 1537 break; 1538 case tok::r_square: 1539 nextToken(); 1540 return; 1541 case tok::r_brace: 1542 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1543 return; 1544 case tok::l_square: 1545 parseSquare(); 1546 break; 1547 case tok::l_brace: { 1548 if (!tryToParseBracedList()) 1549 parseChildBlock(); 1550 break; 1551 } 1552 case tok::at: 1553 nextToken(); 1554 if (FormatTok->Tok.is(tok::l_brace)) { 1555 nextToken(); 1556 parseBracedList(); 1557 } 1558 break; 1559 default: 1560 nextToken(); 1561 break; 1562 } 1563 } while (!eof()); 1564 } 1565 1566 void UnwrappedLineParser::parseIfThenElse() { 1567 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1568 nextToken(); 1569 if (FormatTok->Tok.is(tok::kw_constexpr)) 1570 nextToken(); 1571 if (FormatTok->Tok.is(tok::l_paren)) 1572 parseParens(); 1573 bool NeedsUnwrappedLine = false; 1574 if (FormatTok->Tok.is(tok::l_brace)) { 1575 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1576 parseBlock(/*MustBeDeclaration=*/false); 1577 if (Style.BraceWrapping.BeforeElse) 1578 addUnwrappedLine(); 1579 else 1580 NeedsUnwrappedLine = true; 1581 } else { 1582 addUnwrappedLine(); 1583 ++Line->Level; 1584 parseStructuralElement(); 1585 --Line->Level; 1586 } 1587 if (FormatTok->Tok.is(tok::kw_else)) { 1588 nextToken(); 1589 if (FormatTok->Tok.is(tok::l_brace)) { 1590 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1591 parseBlock(/*MustBeDeclaration=*/false); 1592 addUnwrappedLine(); 1593 } else if (FormatTok->Tok.is(tok::kw_if)) { 1594 parseIfThenElse(); 1595 } else { 1596 addUnwrappedLine(); 1597 ++Line->Level; 1598 parseStructuralElement(); 1599 if (FormatTok->is(tok::eof)) 1600 addUnwrappedLine(); 1601 --Line->Level; 1602 } 1603 } else if (NeedsUnwrappedLine) { 1604 addUnwrappedLine(); 1605 } 1606 } 1607 1608 void UnwrappedLineParser::parseTryCatch() { 1609 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1610 nextToken(); 1611 bool NeedsUnwrappedLine = false; 1612 if (FormatTok->is(tok::colon)) { 1613 // We are in a function try block, what comes is an initializer list. 1614 nextToken(); 1615 while (FormatTok->is(tok::identifier)) { 1616 nextToken(); 1617 if (FormatTok->is(tok::l_paren)) 1618 parseParens(); 1619 if (FormatTok->is(tok::comma)) 1620 nextToken(); 1621 } 1622 } 1623 // Parse try with resource. 1624 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1625 parseParens(); 1626 } 1627 if (FormatTok->is(tok::l_brace)) { 1628 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1629 parseBlock(/*MustBeDeclaration=*/false); 1630 if (Style.BraceWrapping.BeforeCatch) { 1631 addUnwrappedLine(); 1632 } else { 1633 NeedsUnwrappedLine = true; 1634 } 1635 } else if (!FormatTok->is(tok::kw_catch)) { 1636 // The C++ standard requires a compound-statement after a try. 1637 // If there's none, we try to assume there's a structuralElement 1638 // and try to continue. 1639 addUnwrappedLine(); 1640 ++Line->Level; 1641 parseStructuralElement(); 1642 --Line->Level; 1643 } 1644 while (1) { 1645 if (FormatTok->is(tok::at)) 1646 nextToken(); 1647 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1648 tok::kw___finally) || 1649 ((Style.Language == FormatStyle::LK_Java || 1650 Style.Language == FormatStyle::LK_JavaScript) && 1651 FormatTok->is(Keywords.kw_finally)) || 1652 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1653 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1654 break; 1655 nextToken(); 1656 while (FormatTok->isNot(tok::l_brace)) { 1657 if (FormatTok->is(tok::l_paren)) { 1658 parseParens(); 1659 continue; 1660 } 1661 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1662 return; 1663 nextToken(); 1664 } 1665 NeedsUnwrappedLine = false; 1666 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1667 parseBlock(/*MustBeDeclaration=*/false); 1668 if (Style.BraceWrapping.BeforeCatch) 1669 addUnwrappedLine(); 1670 else 1671 NeedsUnwrappedLine = true; 1672 } 1673 if (NeedsUnwrappedLine) 1674 addUnwrappedLine(); 1675 } 1676 1677 void UnwrappedLineParser::parseNamespace() { 1678 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1679 1680 const FormatToken &InitialToken = *FormatTok; 1681 nextToken(); 1682 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1683 nextToken(); 1684 if (FormatTok->Tok.is(tok::l_brace)) { 1685 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1686 addUnwrappedLine(); 1687 1688 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1689 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1690 DeclarationScopeStack.size() > 1); 1691 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1692 // Munch the semicolon after a namespace. This is more common than one would 1693 // think. Puttin the semicolon into its own line is very ugly. 1694 if (FormatTok->Tok.is(tok::semi)) 1695 nextToken(); 1696 addUnwrappedLine(); 1697 } 1698 // FIXME: Add error handling. 1699 } 1700 1701 void UnwrappedLineParser::parseNew() { 1702 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1703 nextToken(); 1704 if (Style.Language != FormatStyle::LK_Java) 1705 return; 1706 1707 // In Java, we can parse everything up to the parens, which aren't optional. 1708 do { 1709 // There should not be a ;, { or } before the new's open paren. 1710 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1711 return; 1712 1713 // Consume the parens. 1714 if (FormatTok->is(tok::l_paren)) { 1715 parseParens(); 1716 1717 // If there is a class body of an anonymous class, consume that as child. 1718 if (FormatTok->is(tok::l_brace)) 1719 parseChildBlock(); 1720 return; 1721 } 1722 nextToken(); 1723 } while (!eof()); 1724 } 1725 1726 void UnwrappedLineParser::parseForOrWhileLoop() { 1727 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1728 "'for', 'while' or foreach macro expected"); 1729 nextToken(); 1730 // JS' for await ( ... 1731 if (Style.Language == FormatStyle::LK_JavaScript && 1732 FormatTok->is(Keywords.kw_await)) 1733 nextToken(); 1734 if (FormatTok->Tok.is(tok::l_paren)) 1735 parseParens(); 1736 if (FormatTok->Tok.is(tok::l_brace)) { 1737 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1738 parseBlock(/*MustBeDeclaration=*/false); 1739 addUnwrappedLine(); 1740 } else { 1741 addUnwrappedLine(); 1742 ++Line->Level; 1743 parseStructuralElement(); 1744 --Line->Level; 1745 } 1746 } 1747 1748 void UnwrappedLineParser::parseDoWhile() { 1749 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1750 nextToken(); 1751 if (FormatTok->Tok.is(tok::l_brace)) { 1752 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1753 parseBlock(/*MustBeDeclaration=*/false); 1754 if (Style.BraceWrapping.IndentBraces) 1755 addUnwrappedLine(); 1756 } else { 1757 addUnwrappedLine(); 1758 ++Line->Level; 1759 parseStructuralElement(); 1760 --Line->Level; 1761 } 1762 1763 // FIXME: Add error handling. 1764 if (!FormatTok->Tok.is(tok::kw_while)) { 1765 addUnwrappedLine(); 1766 return; 1767 } 1768 1769 nextToken(); 1770 parseStructuralElement(); 1771 } 1772 1773 void UnwrappedLineParser::parseLabel() { 1774 nextToken(); 1775 unsigned OldLineLevel = Line->Level; 1776 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1777 --Line->Level; 1778 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1779 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1780 parseBlock(/*MustBeDeclaration=*/false); 1781 if (FormatTok->Tok.is(tok::kw_break)) { 1782 if (Style.BraceWrapping.AfterControlStatement) 1783 addUnwrappedLine(); 1784 parseStructuralElement(); 1785 } 1786 addUnwrappedLine(); 1787 } else { 1788 if (FormatTok->is(tok::semi)) 1789 nextToken(); 1790 addUnwrappedLine(); 1791 } 1792 Line->Level = OldLineLevel; 1793 if (FormatTok->isNot(tok::l_brace)) { 1794 parseStructuralElement(); 1795 addUnwrappedLine(); 1796 } 1797 } 1798 1799 void UnwrappedLineParser::parseCaseLabel() { 1800 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1801 // FIXME: fix handling of complex expressions here. 1802 do { 1803 nextToken(); 1804 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1805 parseLabel(); 1806 } 1807 1808 void UnwrappedLineParser::parseSwitch() { 1809 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1810 nextToken(); 1811 if (FormatTok->Tok.is(tok::l_paren)) 1812 parseParens(); 1813 if (FormatTok->Tok.is(tok::l_brace)) { 1814 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1815 parseBlock(/*MustBeDeclaration=*/false); 1816 addUnwrappedLine(); 1817 } else { 1818 addUnwrappedLine(); 1819 ++Line->Level; 1820 parseStructuralElement(); 1821 --Line->Level; 1822 } 1823 } 1824 1825 void UnwrappedLineParser::parseAccessSpecifier() { 1826 nextToken(); 1827 // Understand Qt's slots. 1828 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1829 nextToken(); 1830 // Otherwise, we don't know what it is, and we'd better keep the next token. 1831 if (FormatTok->Tok.is(tok::colon)) 1832 nextToken(); 1833 addUnwrappedLine(); 1834 } 1835 1836 bool UnwrappedLineParser::parseEnum() { 1837 // Won't be 'enum' for NS_ENUMs. 1838 if (FormatTok->Tok.is(tok::kw_enum)) 1839 nextToken(); 1840 1841 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1842 // declarations. An "enum" keyword followed by a colon would be a syntax 1843 // error and thus assume it is just an identifier. 1844 if (Style.Language == FormatStyle::LK_JavaScript && 1845 FormatTok->isOneOf(tok::colon, tok::question)) 1846 return false; 1847 1848 // Eat up enum class ... 1849 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1850 nextToken(); 1851 1852 while (FormatTok->Tok.getIdentifierInfo() || 1853 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1854 tok::greater, tok::comma, tok::question)) { 1855 nextToken(); 1856 // We can have macros or attributes in between 'enum' and the enum name. 1857 if (FormatTok->is(tok::l_paren)) 1858 parseParens(); 1859 if (FormatTok->is(tok::identifier)) { 1860 nextToken(); 1861 // If there are two identifiers in a row, this is likely an elaborate 1862 // return type. In Java, this can be "implements", etc. 1863 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1864 return false; 1865 } 1866 } 1867 1868 // Just a declaration or something is wrong. 1869 if (FormatTok->isNot(tok::l_brace)) 1870 return true; 1871 FormatTok->BlockKind = BK_Block; 1872 1873 if (Style.Language == FormatStyle::LK_Java) { 1874 // Java enums are different. 1875 parseJavaEnumBody(); 1876 return true; 1877 } 1878 if (Style.Language == FormatStyle::LK_Proto) { 1879 parseBlock(/*MustBeDeclaration=*/true); 1880 return true; 1881 } 1882 1883 // Parse enum body. 1884 nextToken(); 1885 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1886 if (HasError) { 1887 if (FormatTok->is(tok::semi)) 1888 nextToken(); 1889 addUnwrappedLine(); 1890 } 1891 return true; 1892 1893 // There is no addUnwrappedLine() here so that we fall through to parsing a 1894 // structural element afterwards. Thus, in "enum A {} n, m;", 1895 // "} n, m;" will end up in one unwrapped line. 1896 } 1897 1898 void UnwrappedLineParser::parseJavaEnumBody() { 1899 // Determine whether the enum is simple, i.e. does not have a semicolon or 1900 // constants with class bodies. Simple enums can be formatted like braced 1901 // lists, contracted to a single line, etc. 1902 unsigned StoredPosition = Tokens->getPosition(); 1903 bool IsSimple = true; 1904 FormatToken *Tok = Tokens->getNextToken(); 1905 while (Tok) { 1906 if (Tok->is(tok::r_brace)) 1907 break; 1908 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1909 IsSimple = false; 1910 break; 1911 } 1912 // FIXME: This will also mark enums with braces in the arguments to enum 1913 // constants as "not simple". This is probably fine in practice, though. 1914 Tok = Tokens->getNextToken(); 1915 } 1916 FormatTok = Tokens->setPosition(StoredPosition); 1917 1918 if (IsSimple) { 1919 nextToken(); 1920 parseBracedList(); 1921 addUnwrappedLine(); 1922 return; 1923 } 1924 1925 // Parse the body of a more complex enum. 1926 // First add a line for everything up to the "{". 1927 nextToken(); 1928 addUnwrappedLine(); 1929 ++Line->Level; 1930 1931 // Parse the enum constants. 1932 while (FormatTok) { 1933 if (FormatTok->is(tok::l_brace)) { 1934 // Parse the constant's class body. 1935 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1936 /*MunchSemi=*/false); 1937 } else if (FormatTok->is(tok::l_paren)) { 1938 parseParens(); 1939 } else if (FormatTok->is(tok::comma)) { 1940 nextToken(); 1941 addUnwrappedLine(); 1942 } else if (FormatTok->is(tok::semi)) { 1943 nextToken(); 1944 addUnwrappedLine(); 1945 break; 1946 } else if (FormatTok->is(tok::r_brace)) { 1947 addUnwrappedLine(); 1948 break; 1949 } else { 1950 nextToken(); 1951 } 1952 } 1953 1954 // Parse the class body after the enum's ";" if any. 1955 parseLevel(/*HasOpeningBrace=*/true); 1956 nextToken(); 1957 --Line->Level; 1958 addUnwrappedLine(); 1959 } 1960 1961 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 1962 const FormatToken &InitialToken = *FormatTok; 1963 nextToken(); 1964 1965 // The actual identifier can be a nested name specifier, and in macros 1966 // it is often token-pasted. 1967 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 1968 tok::kw___attribute, tok::kw___declspec, 1969 tok::kw_alignas) || 1970 ((Style.Language == FormatStyle::LK_Java || 1971 Style.Language == FormatStyle::LK_JavaScript) && 1972 FormatTok->isOneOf(tok::period, tok::comma))) { 1973 bool IsNonMacroIdentifier = 1974 FormatTok->is(tok::identifier) && 1975 FormatTok->TokenText != FormatTok->TokenText.upper(); 1976 nextToken(); 1977 // We can have macros or attributes in between 'class' and the class name. 1978 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 1979 parseParens(); 1980 } 1981 1982 // Note that parsing away template declarations here leads to incorrectly 1983 // accepting function declarations as record declarations. 1984 // In general, we cannot solve this problem. Consider: 1985 // class A<int> B() {} 1986 // which can be a function definition or a class definition when B() is a 1987 // macro. If we find enough real-world cases where this is a problem, we 1988 // can parse for the 'template' keyword in the beginning of the statement, 1989 // and thus rule out the record production in case there is no template 1990 // (this would still leave us with an ambiguity between template function 1991 // and class declarations). 1992 if (FormatTok->isOneOf(tok::colon, tok::less)) { 1993 while (!eof()) { 1994 if (FormatTok->is(tok::l_brace)) { 1995 calculateBraceTypes(/*ExpectClassBody=*/true); 1996 if (!tryToParseBracedList()) 1997 break; 1998 } 1999 if (FormatTok->Tok.is(tok::semi)) 2000 return; 2001 nextToken(); 2002 } 2003 } 2004 if (FormatTok->Tok.is(tok::l_brace)) { 2005 if (ParseAsExpr) { 2006 parseChildBlock(); 2007 } else { 2008 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2009 addUnwrappedLine(); 2010 2011 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2012 /*MunchSemi=*/false); 2013 } 2014 } 2015 // There is no addUnwrappedLine() here so that we fall through to parsing a 2016 // structural element afterwards. Thus, in "class A {} n, m;", 2017 // "} n, m;" will end up in one unwrapped line. 2018 } 2019 2020 void UnwrappedLineParser::parseObjCProtocolList() { 2021 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2022 do 2023 nextToken(); 2024 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2025 nextToken(); // Skip '>'. 2026 } 2027 2028 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2029 do { 2030 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2031 nextToken(); 2032 addUnwrappedLine(); 2033 break; 2034 } 2035 if (FormatTok->is(tok::l_brace)) { 2036 parseBlock(/*MustBeDeclaration=*/false); 2037 // In ObjC interfaces, nothing should be following the "}". 2038 addUnwrappedLine(); 2039 } else if (FormatTok->is(tok::r_brace)) { 2040 // Ignore stray "}". parseStructuralElement doesn't consume them. 2041 nextToken(); 2042 addUnwrappedLine(); 2043 } else { 2044 parseStructuralElement(); 2045 } 2046 } while (!eof()); 2047 } 2048 2049 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2050 nextToken(); 2051 nextToken(); // interface name 2052 2053 // @interface can be followed by either a base class, or a category. 2054 if (FormatTok->Tok.is(tok::colon)) { 2055 nextToken(); 2056 nextToken(); // base class name 2057 } else if (FormatTok->Tok.is(tok::l_paren)) 2058 // Skip category, if present. 2059 parseParens(); 2060 2061 if (FormatTok->Tok.is(tok::less)) 2062 parseObjCProtocolList(); 2063 2064 if (FormatTok->Tok.is(tok::l_brace)) { 2065 if (Style.BraceWrapping.AfterObjCDeclaration) 2066 addUnwrappedLine(); 2067 parseBlock(/*MustBeDeclaration=*/true); 2068 } 2069 2070 // With instance variables, this puts '}' on its own line. Without instance 2071 // variables, this ends the @interface line. 2072 addUnwrappedLine(); 2073 2074 parseObjCUntilAtEnd(); 2075 } 2076 2077 void UnwrappedLineParser::parseObjCProtocol() { 2078 nextToken(); 2079 nextToken(); // protocol name 2080 2081 if (FormatTok->Tok.is(tok::less)) 2082 parseObjCProtocolList(); 2083 2084 // Check for protocol declaration. 2085 if (FormatTok->Tok.is(tok::semi)) { 2086 nextToken(); 2087 return addUnwrappedLine(); 2088 } 2089 2090 addUnwrappedLine(); 2091 parseObjCUntilAtEnd(); 2092 } 2093 2094 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2095 bool IsImport = FormatTok->is(Keywords.kw_import); 2096 assert(IsImport || FormatTok->is(tok::kw_export)); 2097 nextToken(); 2098 2099 // Consume the "default" in "export default class/function". 2100 if (FormatTok->is(tok::kw_default)) 2101 nextToken(); 2102 2103 // Consume "async function", "function" and "default function", so that these 2104 // get parsed as free-standing JS functions, i.e. do not require a trailing 2105 // semicolon. 2106 if (FormatTok->is(Keywords.kw_async)) 2107 nextToken(); 2108 if (FormatTok->is(Keywords.kw_function)) { 2109 nextToken(); 2110 return; 2111 } 2112 2113 // For imports, `export *`, `export {...}`, consume the rest of the line up 2114 // to the terminating `;`. For everything else, just return and continue 2115 // parsing the structural element, i.e. the declaration or expression for 2116 // `export default`. 2117 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2118 !FormatTok->isStringLiteral()) 2119 return; 2120 2121 while (!eof()) { 2122 if (FormatTok->is(tok::semi)) 2123 return; 2124 if (Line->Tokens.size() == 0) { 2125 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2126 // import statement should terminate. 2127 return; 2128 } 2129 if (FormatTok->is(tok::l_brace)) { 2130 FormatTok->BlockKind = BK_Block; 2131 nextToken(); 2132 parseBracedList(); 2133 } else { 2134 nextToken(); 2135 } 2136 } 2137 } 2138 2139 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2140 StringRef Prefix = "") { 2141 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 2142 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2143 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2144 E = Line.Tokens.end(); 2145 I != E; ++I) { 2146 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2147 << "T=" << I->Tok->Type 2148 << ", OC=" << I->Tok->OriginalColumn << "] "; 2149 } 2150 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2151 E = Line.Tokens.end(); 2152 I != E; ++I) { 2153 const UnwrappedLineNode &Node = *I; 2154 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2155 I = Node.Children.begin(), 2156 E = Node.Children.end(); 2157 I != E; ++I) { 2158 printDebugInfo(*I, "\nChild: "); 2159 } 2160 } 2161 llvm::dbgs() << "\n"; 2162 } 2163 2164 void UnwrappedLineParser::addUnwrappedLine() { 2165 if (Line->Tokens.empty()) 2166 return; 2167 DEBUG({ 2168 if (CurrentLines == &Lines) 2169 printDebugInfo(*Line); 2170 }); 2171 CurrentLines->push_back(std::move(*Line)); 2172 Line->Tokens.clear(); 2173 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2174 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2175 CurrentLines->append( 2176 std::make_move_iterator(PreprocessorDirectives.begin()), 2177 std::make_move_iterator(PreprocessorDirectives.end())); 2178 PreprocessorDirectives.clear(); 2179 } 2180 } 2181 2182 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2183 2184 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2185 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2186 FormatTok.NewlinesBefore > 0; 2187 } 2188 2189 // Checks if \p FormatTok is a line comment that continues the line comment 2190 // section on \p Line. 2191 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2192 const UnwrappedLine &Line, 2193 llvm::Regex &CommentPragmasRegex) { 2194 if (Line.Tokens.empty()) 2195 return false; 2196 2197 StringRef IndentContent = FormatTok.TokenText; 2198 if (FormatTok.TokenText.startswith("//") || 2199 FormatTok.TokenText.startswith("/*")) 2200 IndentContent = FormatTok.TokenText.substr(2); 2201 if (CommentPragmasRegex.match(IndentContent)) 2202 return false; 2203 2204 // If Line starts with a line comment, then FormatTok continues the comment 2205 // section if its original column is greater or equal to the original start 2206 // column of the line. 2207 // 2208 // Define the min column token of a line as follows: if a line ends in '{' or 2209 // contains a '{' followed by a line comment, then the min column token is 2210 // that '{'. Otherwise, the min column token of the line is the first token of 2211 // the line. 2212 // 2213 // If Line starts with a token other than a line comment, then FormatTok 2214 // continues the comment section if its original column is greater than the 2215 // original start column of the min column token of the line. 2216 // 2217 // For example, the second line comment continues the first in these cases: 2218 // 2219 // // first line 2220 // // second line 2221 // 2222 // and: 2223 // 2224 // // first line 2225 // // second line 2226 // 2227 // and: 2228 // 2229 // int i; // first line 2230 // // second line 2231 // 2232 // and: 2233 // 2234 // do { // first line 2235 // // second line 2236 // int i; 2237 // } while (true); 2238 // 2239 // and: 2240 // 2241 // enum { 2242 // a, // first line 2243 // // second line 2244 // b 2245 // }; 2246 // 2247 // The second line comment doesn't continue the first in these cases: 2248 // 2249 // // first line 2250 // // second line 2251 // 2252 // and: 2253 // 2254 // int i; // first line 2255 // // second line 2256 // 2257 // and: 2258 // 2259 // do { // first line 2260 // // second line 2261 // int i; 2262 // } while (true); 2263 // 2264 // and: 2265 // 2266 // enum { 2267 // a, // first line 2268 // // second line 2269 // }; 2270 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2271 2272 // Scan for '{//'. If found, use the column of '{' as a min column for line 2273 // comment section continuation. 2274 const FormatToken *PreviousToken = nullptr; 2275 for (const UnwrappedLineNode &Node : Line.Tokens) { 2276 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2277 isLineComment(*Node.Tok)) { 2278 MinColumnToken = PreviousToken; 2279 break; 2280 } 2281 PreviousToken = Node.Tok; 2282 2283 // Grab the last newline preceding a token in this unwrapped line. 2284 if (Node.Tok->NewlinesBefore > 0) { 2285 MinColumnToken = Node.Tok; 2286 } 2287 } 2288 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2289 MinColumnToken = PreviousToken; 2290 } 2291 2292 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2293 MinColumnToken); 2294 } 2295 2296 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2297 bool JustComments = Line->Tokens.empty(); 2298 for (SmallVectorImpl<FormatToken *>::const_iterator 2299 I = CommentsBeforeNextToken.begin(), 2300 E = CommentsBeforeNextToken.end(); 2301 I != E; ++I) { 2302 // Line comments that belong to the same line comment section are put on the 2303 // same line since later we might want to reflow content between them. 2304 // Additional fine-grained breaking of line comment sections is controlled 2305 // by the class BreakableLineCommentSection in case it is desirable to keep 2306 // several line comment sections in the same unwrapped line. 2307 // 2308 // FIXME: Consider putting separate line comment sections as children to the 2309 // unwrapped line instead. 2310 (*I)->ContinuesLineCommentSection = 2311 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2312 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2313 addUnwrappedLine(); 2314 pushToken(*I); 2315 } 2316 if (NewlineBeforeNext && JustComments) 2317 addUnwrappedLine(); 2318 CommentsBeforeNextToken.clear(); 2319 } 2320 2321 void UnwrappedLineParser::nextToken(int LevelDifference) { 2322 if (eof()) 2323 return; 2324 flushComments(isOnNewLine(*FormatTok)); 2325 pushToken(FormatTok); 2326 if (Style.Language != FormatStyle::LK_JavaScript) 2327 readToken(LevelDifference); 2328 else 2329 readTokenWithJavaScriptASI(); 2330 } 2331 2332 const FormatToken *UnwrappedLineParser::getPreviousToken() { 2333 // FIXME: This is a dirty way to access the previous token. Find a better 2334 // solution. 2335 if (!Line || Line->Tokens.empty()) 2336 return nullptr; 2337 return Line->Tokens.back().Tok; 2338 } 2339 2340 void UnwrappedLineParser::distributeComments( 2341 const SmallVectorImpl<FormatToken *> &Comments, 2342 const FormatToken *NextTok) { 2343 // Whether or not a line comment token continues a line is controlled by 2344 // the method continuesLineCommentSection, with the following caveat: 2345 // 2346 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2347 // that each comment line from the trail is aligned with the next token, if 2348 // the next token exists. If a trail exists, the beginning of the maximal 2349 // trail is marked as a start of a new comment section. 2350 // 2351 // For example in this code: 2352 // 2353 // int a; // line about a 2354 // // line 1 about b 2355 // // line 2 about b 2356 // int b; 2357 // 2358 // the two lines about b form a maximal trail, so there are two sections, the 2359 // first one consisting of the single comment "// line about a" and the 2360 // second one consisting of the next two comments. 2361 if (Comments.empty()) 2362 return; 2363 bool ShouldPushCommentsInCurrentLine = true; 2364 bool HasTrailAlignedWithNextToken = false; 2365 unsigned StartOfTrailAlignedWithNextToken = 0; 2366 if (NextTok) { 2367 // We are skipping the first element intentionally. 2368 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2369 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2370 HasTrailAlignedWithNextToken = true; 2371 StartOfTrailAlignedWithNextToken = i; 2372 } 2373 } 2374 } 2375 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2376 FormatToken *FormatTok = Comments[i]; 2377 if (HasTrailAlignedWithNextToken && 2378 i == StartOfTrailAlignedWithNextToken) { 2379 FormatTok->ContinuesLineCommentSection = false; 2380 } else { 2381 FormatTok->ContinuesLineCommentSection = 2382 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2383 } 2384 if (!FormatTok->ContinuesLineCommentSection && 2385 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2386 ShouldPushCommentsInCurrentLine = false; 2387 } 2388 if (ShouldPushCommentsInCurrentLine) { 2389 pushToken(FormatTok); 2390 } else { 2391 CommentsBeforeNextToken.push_back(FormatTok); 2392 } 2393 } 2394 } 2395 2396 void UnwrappedLineParser::readToken(int LevelDifference) { 2397 SmallVector<FormatToken *, 1> Comments; 2398 do { 2399 FormatTok = Tokens->getNextToken(); 2400 assert(FormatTok); 2401 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2402 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2403 distributeComments(Comments, FormatTok); 2404 Comments.clear(); 2405 // If there is an unfinished unwrapped line, we flush the preprocessor 2406 // directives only after that unwrapped line was finished later. 2407 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2408 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2409 assert((LevelDifference >= 0 || 2410 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2411 "LevelDifference makes Line->Level negative"); 2412 Line->Level += LevelDifference; 2413 // Comments stored before the preprocessor directive need to be output 2414 // before the preprocessor directive, at the same level as the 2415 // preprocessor directive, as we consider them to apply to the directive. 2416 flushComments(isOnNewLine(*FormatTok)); 2417 parsePPDirective(); 2418 } 2419 while (FormatTok->Type == TT_ConflictStart || 2420 FormatTok->Type == TT_ConflictEnd || 2421 FormatTok->Type == TT_ConflictAlternative) { 2422 if (FormatTok->Type == TT_ConflictStart) { 2423 conditionalCompilationStart(/*Unreachable=*/false); 2424 } else if (FormatTok->Type == TT_ConflictAlternative) { 2425 conditionalCompilationAlternative(); 2426 } else if (FormatTok->Type == TT_ConflictEnd) { 2427 conditionalCompilationEnd(); 2428 } 2429 FormatTok = Tokens->getNextToken(); 2430 FormatTok->MustBreakBefore = true; 2431 } 2432 2433 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2434 !Line->InPPDirective) { 2435 continue; 2436 } 2437 2438 if (!FormatTok->Tok.is(tok::comment)) { 2439 distributeComments(Comments, FormatTok); 2440 Comments.clear(); 2441 return; 2442 } 2443 2444 Comments.push_back(FormatTok); 2445 } while (!eof()); 2446 2447 distributeComments(Comments, nullptr); 2448 Comments.clear(); 2449 } 2450 2451 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2452 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2453 if (MustBreakBeforeNextToken) { 2454 Line->Tokens.back().Tok->MustBreakBefore = true; 2455 MustBreakBeforeNextToken = false; 2456 } 2457 } 2458 2459 } // end namespace format 2460 } // end namespace clang 2461