1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 class ScopedMacroState : public FormatTokenSource { 59 public: 60 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 61 FormatToken *&ResetToken) 62 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 63 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 64 Token(nullptr) { 65 TokenSource = this; 66 Line.Level = 0; 67 Line.InPPDirective = true; 68 } 69 70 ~ScopedMacroState() override { 71 TokenSource = PreviousTokenSource; 72 ResetToken = Token; 73 Line.InPPDirective = false; 74 Line.Level = PreviousLineLevel; 75 } 76 77 FormatToken *getNextToken() override { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 88 89 FormatToken *setPosition(unsigned Position) override { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94 private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 114 FormatToken *Token; 115 }; 116 117 } // end anonymous namespace 118 119 class ScopedLineState { 120 public: 121 ScopedLineState(UnwrappedLineParser &Parser, 122 bool SwitchToPreprocessorLines = false) 123 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 124 if (SwitchToPreprocessorLines) 125 Parser.CurrentLines = &Parser.PreprocessorDirectives; 126 else if (!Parser.Line->Tokens.empty()) 127 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 128 PreBlockLine = std::move(Parser.Line); 129 Parser.Line = llvm::make_unique<UnwrappedLine>(); 130 Parser.Line->Level = PreBlockLine->Level; 131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 132 } 133 134 ~ScopedLineState() { 135 if (!Parser.Line->Tokens.empty()) { 136 Parser.addUnwrappedLine(); 137 } 138 assert(Parser.Line->Tokens.empty()); 139 Parser.Line = std::move(PreBlockLine); 140 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 141 Parser.MustBreakBeforeNextToken = true; 142 Parser.CurrentLines = OriginalLines; 143 } 144 145 private: 146 UnwrappedLineParser &Parser; 147 148 std::unique_ptr<UnwrappedLine> PreBlockLine; 149 SmallVectorImpl<UnwrappedLine> *OriginalLines; 150 }; 151 152 class CompoundStatementIndenter { 153 public: 154 CompoundStatementIndenter(UnwrappedLineParser *Parser, 155 const FormatStyle &Style, unsigned &LineLevel) 156 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 157 if (Style.BraceWrapping.AfterControlStatement) 158 Parser->addUnwrappedLine(); 159 if (Style.BraceWrapping.IndentBraces) 160 ++LineLevel; 161 } 162 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 163 164 private: 165 unsigned &LineLevel; 166 unsigned OldLineLevel; 167 }; 168 169 namespace { 170 171 class IndexedTokenSource : public FormatTokenSource { 172 public: 173 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 174 : Tokens(Tokens), Position(-1) {} 175 176 FormatToken *getNextToken() override { 177 ++Position; 178 return Tokens[Position]; 179 } 180 181 unsigned getPosition() override { 182 assert(Position >= 0); 183 return Position; 184 } 185 186 FormatToken *setPosition(unsigned P) override { 187 Position = P; 188 return Tokens[Position]; 189 } 190 191 void reset() { Position = -1; } 192 193 private: 194 ArrayRef<FormatToken *> Tokens; 195 int Position; 196 }; 197 198 } // end anonymous namespace 199 200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 201 const AdditionalKeywords &Keywords, 202 ArrayRef<FormatToken *> Tokens, 203 UnwrappedLineConsumer &Callback) 204 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 205 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 206 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 207 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} 208 209 void UnwrappedLineParser::reset() { 210 PPBranchLevel = -1; 211 Line.reset(new UnwrappedLine); 212 CommentsBeforeNextToken.clear(); 213 FormatTok = nullptr; 214 MustBreakBeforeNextToken = false; 215 PreprocessorDirectives.clear(); 216 CurrentLines = &Lines; 217 DeclarationScopeStack.clear(); 218 PPStack.clear(); 219 } 220 221 void UnwrappedLineParser::parse() { 222 IndexedTokenSource TokenSource(AllTokens); 223 do { 224 DEBUG(llvm::dbgs() << "----\n"); 225 reset(); 226 Tokens = &TokenSource; 227 TokenSource.reset(); 228 229 readToken(); 230 parseFile(); 231 // Create line with eof token. 232 pushToken(FormatTok); 233 addUnwrappedLine(); 234 235 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 236 E = Lines.end(); 237 I != E; ++I) { 238 Callback.consumeUnwrappedLine(*I); 239 } 240 Callback.finishRun(); 241 Lines.clear(); 242 while (!PPLevelBranchIndex.empty() && 243 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 244 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 245 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 246 } 247 if (!PPLevelBranchIndex.empty()) { 248 ++PPLevelBranchIndex.back(); 249 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 250 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 251 } 252 } while (!PPLevelBranchIndex.empty()); 253 } 254 255 void UnwrappedLineParser::parseFile() { 256 // The top-level context in a file always has declarations, except for pre- 257 // processor directives and JavaScript files. 258 bool MustBeDeclaration = 259 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 260 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 261 MustBeDeclaration); 262 parseLevel(/*HasOpeningBrace=*/false); 263 // Make sure to format the remaining tokens. 264 flushComments(true); 265 addUnwrappedLine(); 266 } 267 268 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 269 bool SwitchLabelEncountered = false; 270 do { 271 tok::TokenKind kind = FormatTok->Tok.getKind(); 272 if (FormatTok->Type == TT_MacroBlockBegin) { 273 kind = tok::l_brace; 274 } else if (FormatTok->Type == TT_MacroBlockEnd) { 275 kind = tok::r_brace; 276 } 277 278 switch (kind) { 279 case tok::comment: 280 nextToken(); 281 addUnwrappedLine(); 282 break; 283 case tok::l_brace: 284 // FIXME: Add parameter whether this can happen - if this happens, we must 285 // be in a non-declaration context. 286 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 287 continue; 288 parseBlock(/*MustBeDeclaration=*/false); 289 addUnwrappedLine(); 290 break; 291 case tok::r_brace: 292 if (HasOpeningBrace) 293 return; 294 nextToken(); 295 addUnwrappedLine(); 296 break; 297 case tok::kw_default: 298 case tok::kw_case: 299 if (!SwitchLabelEncountered && 300 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 301 ++Line->Level; 302 SwitchLabelEncountered = true; 303 parseStructuralElement(); 304 break; 305 default: 306 parseStructuralElement(); 307 break; 308 } 309 } while (!eof()); 310 } 311 312 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 313 // We'll parse forward through the tokens until we hit 314 // a closing brace or eof - note that getNextToken() will 315 // parse macros, so this will magically work inside macro 316 // definitions, too. 317 unsigned StoredPosition = Tokens->getPosition(); 318 FormatToken *Tok = FormatTok; 319 const FormatToken *PrevTok = getPreviousToken(); 320 // Keep a stack of positions of lbrace tokens. We will 321 // update information about whether an lbrace starts a 322 // braced init list or a different block during the loop. 323 SmallVector<FormatToken *, 8> LBraceStack; 324 assert(Tok->Tok.is(tok::l_brace)); 325 do { 326 // Get next non-comment token. 327 FormatToken *NextTok; 328 unsigned ReadTokens = 0; 329 do { 330 NextTok = Tokens->getNextToken(); 331 ++ReadTokens; 332 } while (NextTok->is(tok::comment)); 333 334 switch (Tok->Tok.getKind()) { 335 case tok::l_brace: 336 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok && 337 PrevTok->is(tok::colon)) 338 // A colon indicates this code is in a type, or a braced list following 339 // a label in an object literal ({a: {b: 1}}). 340 // The code below could be confused by semicolons between the individual 341 // members in a type member list, which would normally trigger BK_Block. 342 // In both cases, this must be parsed as an inline braced init. 343 Tok->BlockKind = BK_BracedInit; 344 else 345 Tok->BlockKind = BK_Unknown; 346 LBraceStack.push_back(Tok); 347 break; 348 case tok::r_brace: 349 if (LBraceStack.empty()) 350 break; 351 if (LBraceStack.back()->BlockKind == BK_Unknown) { 352 bool ProbablyBracedList = false; 353 if (Style.Language == FormatStyle::LK_Proto) { 354 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 355 } else { 356 // Using OriginalColumn to distinguish between ObjC methods and 357 // binary operators is a bit hacky. 358 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 359 NextTok->OriginalColumn == 0; 360 361 // If there is a comma, semicolon or right paren after the closing 362 // brace, we assume this is a braced initializer list. Note that 363 // regardless how we mark inner braces here, we will overwrite the 364 // BlockKind later if we parse a braced list (where all blocks 365 // inside are by default braced lists), or when we explicitly detect 366 // blocks (for example while parsing lambdas). 367 ProbablyBracedList = 368 (Style.Language == FormatStyle::LK_JavaScript && 369 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 370 Keywords.kw_as)) || 371 (Style.isCpp() && NextTok->is(tok::l_paren)) || 372 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 373 tok::r_paren, tok::r_square, tok::l_brace, 374 tok::l_square, tok::ellipsis) || 375 (NextTok->is(tok::identifier) && 376 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 377 (NextTok->is(tok::semi) && 378 (!ExpectClassBody || LBraceStack.size() != 1)) || 379 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 380 } 381 if (ProbablyBracedList) { 382 Tok->BlockKind = BK_BracedInit; 383 LBraceStack.back()->BlockKind = BK_BracedInit; 384 } else { 385 Tok->BlockKind = BK_Block; 386 LBraceStack.back()->BlockKind = BK_Block; 387 } 388 } 389 LBraceStack.pop_back(); 390 break; 391 case tok::at: 392 case tok::semi: 393 case tok::kw_if: 394 case tok::kw_while: 395 case tok::kw_for: 396 case tok::kw_switch: 397 case tok::kw_try: 398 case tok::kw___try: 399 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 400 LBraceStack.back()->BlockKind = BK_Block; 401 break; 402 default: 403 break; 404 } 405 PrevTok = Tok; 406 Tok = NextTok; 407 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 408 409 // Assume other blocks for all unclosed opening braces. 410 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 411 if (LBraceStack[i]->BlockKind == BK_Unknown) 412 LBraceStack[i]->BlockKind = BK_Block; 413 } 414 415 FormatTok = Tokens->setPosition(StoredPosition); 416 } 417 418 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 419 bool MunchSemi) { 420 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 421 "'{' or macro block token expected"); 422 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 423 FormatTok->BlockKind = BK_Block; 424 425 unsigned InitialLevel = Line->Level; 426 nextToken(); 427 428 if (MacroBlock && FormatTok->is(tok::l_paren)) 429 parseParens(); 430 431 addUnwrappedLine(); 432 size_t OpeningLineIndex = 433 Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1); 434 435 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 436 MustBeDeclaration); 437 if (AddLevel) 438 ++Line->Level; 439 parseLevel(/*HasOpeningBrace=*/true); 440 441 if (eof()) 442 return; 443 444 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 445 : !FormatTok->is(tok::r_brace)) { 446 Line->Level = InitialLevel; 447 FormatTok->BlockKind = BK_Block; 448 return; 449 } 450 451 nextToken(); // Munch the closing brace. 452 453 if (MacroBlock && FormatTok->is(tok::l_paren)) 454 parseParens(); 455 456 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 457 nextToken(); 458 Line->Level = InitialLevel; 459 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 460 } 461 462 static bool isGoogScope(const UnwrappedLine &Line) { 463 // FIXME: Closure-library specific stuff should not be hard-coded but be 464 // configurable. 465 if (Line.Tokens.size() < 4) 466 return false; 467 auto I = Line.Tokens.begin(); 468 if (I->Tok->TokenText != "goog") 469 return false; 470 ++I; 471 if (I->Tok->isNot(tok::period)) 472 return false; 473 ++I; 474 if (I->Tok->TokenText != "scope") 475 return false; 476 ++I; 477 return I->Tok->is(tok::l_paren); 478 } 479 480 static bool isIIFE(const UnwrappedLine &Line, 481 const AdditionalKeywords &Keywords) { 482 // Look for the start of an immediately invoked anonymous function. 483 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 484 // This is commonly done in JavaScript to create a new, anonymous scope. 485 // Example: (function() { ... })() 486 if (Line.Tokens.size() < 3) 487 return false; 488 auto I = Line.Tokens.begin(); 489 if (I->Tok->isNot(tok::l_paren)) 490 return false; 491 ++I; 492 if (I->Tok->isNot(Keywords.kw_function)) 493 return false; 494 ++I; 495 return I->Tok->is(tok::l_paren); 496 } 497 498 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 499 const FormatToken &InitialToken) { 500 if (InitialToken.is(tok::kw_namespace)) 501 return Style.BraceWrapping.AfterNamespace; 502 if (InitialToken.is(tok::kw_class)) 503 return Style.BraceWrapping.AfterClass; 504 if (InitialToken.is(tok::kw_union)) 505 return Style.BraceWrapping.AfterUnion; 506 if (InitialToken.is(tok::kw_struct)) 507 return Style.BraceWrapping.AfterStruct; 508 return false; 509 } 510 511 void UnwrappedLineParser::parseChildBlock() { 512 FormatTok->BlockKind = BK_Block; 513 nextToken(); 514 { 515 bool SkipIndent = 516 (Style.Language == FormatStyle::LK_JavaScript && 517 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 518 ScopedLineState LineState(*this); 519 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 520 /*MustBeDeclaration=*/false); 521 Line->Level += SkipIndent ? 0 : 1; 522 parseLevel(/*HasOpeningBrace=*/true); 523 flushComments(isOnNewLine(*FormatTok)); 524 Line->Level -= SkipIndent ? 0 : 1; 525 } 526 nextToken(); 527 } 528 529 void UnwrappedLineParser::parsePPDirective() { 530 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 531 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 532 nextToken(); 533 534 if (!FormatTok->Tok.getIdentifierInfo()) { 535 parsePPUnknown(); 536 return; 537 } 538 539 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 540 case tok::pp_define: 541 parsePPDefine(); 542 return; 543 case tok::pp_if: 544 parsePPIf(/*IfDef=*/false); 545 break; 546 case tok::pp_ifdef: 547 case tok::pp_ifndef: 548 parsePPIf(/*IfDef=*/true); 549 break; 550 case tok::pp_else: 551 parsePPElse(); 552 break; 553 case tok::pp_elif: 554 parsePPElIf(); 555 break; 556 case tok::pp_endif: 557 parsePPEndIf(); 558 break; 559 default: 560 parsePPUnknown(); 561 break; 562 } 563 } 564 565 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 566 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 567 PPStack.push_back(PP_Unreachable); 568 else 569 PPStack.push_back(PP_Conditional); 570 } 571 572 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 573 ++PPBranchLevel; 574 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 575 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 576 PPLevelBranchIndex.push_back(0); 577 PPLevelBranchCount.push_back(0); 578 } 579 PPChainBranchIndex.push(0); 580 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 581 conditionalCompilationCondition(Unreachable || Skip); 582 } 583 584 void UnwrappedLineParser::conditionalCompilationAlternative() { 585 if (!PPStack.empty()) 586 PPStack.pop_back(); 587 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 588 if (!PPChainBranchIndex.empty()) 589 ++PPChainBranchIndex.top(); 590 conditionalCompilationCondition( 591 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 592 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 593 } 594 595 void UnwrappedLineParser::conditionalCompilationEnd() { 596 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 597 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 598 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 599 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 600 } 601 } 602 // Guard against #endif's without #if. 603 if (PPBranchLevel > 0) 604 --PPBranchLevel; 605 if (!PPChainBranchIndex.empty()) 606 PPChainBranchIndex.pop(); 607 if (!PPStack.empty()) 608 PPStack.pop_back(); 609 } 610 611 void UnwrappedLineParser::parsePPIf(bool IfDef) { 612 bool IfNDef = FormatTok->is(tok::pp_ifndef); 613 nextToken(); 614 bool Unreachable = false; 615 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 616 Unreachable = true; 617 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 618 Unreachable = true; 619 conditionalCompilationStart(Unreachable); 620 parsePPUnknown(); 621 } 622 623 void UnwrappedLineParser::parsePPElse() { 624 conditionalCompilationAlternative(); 625 parsePPUnknown(); 626 } 627 628 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 629 630 void UnwrappedLineParser::parsePPEndIf() { 631 conditionalCompilationEnd(); 632 parsePPUnknown(); 633 } 634 635 void UnwrappedLineParser::parsePPDefine() { 636 nextToken(); 637 638 if (FormatTok->Tok.getKind() != tok::identifier) { 639 parsePPUnknown(); 640 return; 641 } 642 nextToken(); 643 if (FormatTok->Tok.getKind() == tok::l_paren && 644 FormatTok->WhitespaceRange.getBegin() == 645 FormatTok->WhitespaceRange.getEnd()) { 646 parseParens(); 647 } 648 addUnwrappedLine(); 649 Line->Level = 1; 650 651 // Errors during a preprocessor directive can only affect the layout of the 652 // preprocessor directive, and thus we ignore them. An alternative approach 653 // would be to use the same approach we use on the file level (no 654 // re-indentation if there was a structural error) within the macro 655 // definition. 656 parseFile(); 657 } 658 659 void UnwrappedLineParser::parsePPUnknown() { 660 do { 661 nextToken(); 662 } while (!eof()); 663 addUnwrappedLine(); 664 } 665 666 // Here we blacklist certain tokens that are not usually the first token in an 667 // unwrapped line. This is used in attempt to distinguish macro calls without 668 // trailing semicolons from other constructs split to several lines. 669 static bool tokenCanStartNewLine(const clang::Token &Tok) { 670 // Semicolon can be a null-statement, l_square can be a start of a macro or 671 // a C++11 attribute, but this doesn't seem to be common. 672 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 673 Tok.isNot(tok::l_square) && 674 // Tokens that can only be used as binary operators and a part of 675 // overloaded operator names. 676 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 677 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 678 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 679 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 680 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 681 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 682 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 683 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 684 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 685 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 686 Tok.isNot(tok::lesslessequal) && 687 // Colon is used in labels, base class lists, initializer lists, 688 // range-based for loops, ternary operator, but should never be the 689 // first token in an unwrapped line. 690 Tok.isNot(tok::colon) && 691 // 'noexcept' is a trailing annotation. 692 Tok.isNot(tok::kw_noexcept); 693 } 694 695 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 696 const FormatToken *FormatTok) { 697 // FIXME: This returns true for C/C++ keywords like 'struct'. 698 return FormatTok->is(tok::identifier) && 699 (FormatTok->Tok.getIdentifierInfo() == nullptr || 700 !FormatTok->isOneOf( 701 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 702 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 703 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 704 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 705 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 706 Keywords.kw_instanceof, Keywords.kw_interface, 707 Keywords.kw_throws)); 708 } 709 710 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 711 const FormatToken *FormatTok) { 712 return FormatTok->Tok.isLiteral() || 713 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 714 mustBeJSIdent(Keywords, FormatTok); 715 } 716 717 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 718 // when encountered after a value (see mustBeJSIdentOrValue). 719 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 720 const FormatToken *FormatTok) { 721 return FormatTok->isOneOf( 722 tok::kw_return, Keywords.kw_yield, 723 // conditionals 724 tok::kw_if, tok::kw_else, 725 // loops 726 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 727 // switch/case 728 tok::kw_switch, tok::kw_case, 729 // exceptions 730 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 731 // declaration 732 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 733 Keywords.kw_async, Keywords.kw_function, 734 // import/export 735 Keywords.kw_import, tok::kw_export); 736 } 737 738 // readTokenWithJavaScriptASI reads the next token and terminates the current 739 // line if JavaScript Automatic Semicolon Insertion must 740 // happen between the current token and the next token. 741 // 742 // This method is conservative - it cannot cover all edge cases of JavaScript, 743 // but only aims to correctly handle certain well known cases. It *must not* 744 // return true in speculative cases. 745 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 746 FormatToken *Previous = FormatTok; 747 readToken(); 748 FormatToken *Next = FormatTok; 749 750 bool IsOnSameLine = 751 CommentsBeforeNextToken.empty() 752 ? Next->NewlinesBefore == 0 753 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 754 if (IsOnSameLine) 755 return; 756 757 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 758 bool PreviousStartsTemplateExpr = 759 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 760 if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { 761 // If the token before the previous one is an '@', the previous token is an 762 // annotation and can precede another identifier/value. 763 const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; 764 if (PrePrevious->is(tok::at)) 765 return; 766 } 767 if (Next->is(tok::exclaim) && PreviousMustBeValue) 768 return addUnwrappedLine(); 769 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 770 bool NextEndsTemplateExpr = 771 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 772 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 773 (PreviousMustBeValue || 774 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 775 tok::minusminus))) 776 return addUnwrappedLine(); 777 if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) 778 return addUnwrappedLine(); 779 } 780 781 void UnwrappedLineParser::parseStructuralElement() { 782 assert(!FormatTok->is(tok::l_brace)); 783 if (Style.Language == FormatStyle::LK_TableGen && 784 FormatTok->is(tok::pp_include)) { 785 nextToken(); 786 if (FormatTok->is(tok::string_literal)) 787 nextToken(); 788 addUnwrappedLine(); 789 return; 790 } 791 switch (FormatTok->Tok.getKind()) { 792 case tok::at: 793 nextToken(); 794 if (FormatTok->Tok.is(tok::l_brace)) { 795 parseBracedList(); 796 break; 797 } 798 switch (FormatTok->Tok.getObjCKeywordID()) { 799 case tok::objc_public: 800 case tok::objc_protected: 801 case tok::objc_package: 802 case tok::objc_private: 803 return parseAccessSpecifier(); 804 case tok::objc_interface: 805 case tok::objc_implementation: 806 return parseObjCInterfaceOrImplementation(); 807 case tok::objc_protocol: 808 return parseObjCProtocol(); 809 case tok::objc_end: 810 return; // Handled by the caller. 811 case tok::objc_optional: 812 case tok::objc_required: 813 nextToken(); 814 addUnwrappedLine(); 815 return; 816 case tok::objc_autoreleasepool: 817 nextToken(); 818 if (FormatTok->Tok.is(tok::l_brace)) { 819 if (Style.BraceWrapping.AfterObjCDeclaration) 820 addUnwrappedLine(); 821 parseBlock(/*MustBeDeclaration=*/false); 822 } 823 addUnwrappedLine(); 824 return; 825 case tok::objc_try: 826 // This branch isn't strictly necessary (the kw_try case below would 827 // do this too after the tok::at is parsed above). But be explicit. 828 parseTryCatch(); 829 return; 830 default: 831 break; 832 } 833 break; 834 case tok::kw_asm: 835 nextToken(); 836 if (FormatTok->is(tok::l_brace)) { 837 FormatTok->Type = TT_InlineASMBrace; 838 nextToken(); 839 while (FormatTok && FormatTok->isNot(tok::eof)) { 840 if (FormatTok->is(tok::r_brace)) { 841 FormatTok->Type = TT_InlineASMBrace; 842 nextToken(); 843 addUnwrappedLine(); 844 break; 845 } 846 FormatTok->Finalized = true; 847 nextToken(); 848 } 849 } 850 break; 851 case tok::kw_namespace: 852 parseNamespace(); 853 return; 854 case tok::kw_inline: 855 nextToken(); 856 if (FormatTok->Tok.is(tok::kw_namespace)) { 857 parseNamespace(); 858 return; 859 } 860 break; 861 case tok::kw_public: 862 case tok::kw_protected: 863 case tok::kw_private: 864 if (Style.Language == FormatStyle::LK_Java || 865 Style.Language == FormatStyle::LK_JavaScript) 866 nextToken(); 867 else 868 parseAccessSpecifier(); 869 return; 870 case tok::kw_if: 871 parseIfThenElse(); 872 return; 873 case tok::kw_for: 874 case tok::kw_while: 875 parseForOrWhileLoop(); 876 return; 877 case tok::kw_do: 878 parseDoWhile(); 879 return; 880 case tok::kw_switch: 881 parseSwitch(); 882 return; 883 case tok::kw_default: 884 nextToken(); 885 parseLabel(); 886 return; 887 case tok::kw_case: 888 parseCaseLabel(); 889 return; 890 case tok::kw_try: 891 case tok::kw___try: 892 parseTryCatch(); 893 return; 894 case tok::kw_extern: 895 nextToken(); 896 if (FormatTok->Tok.is(tok::string_literal)) { 897 nextToken(); 898 if (FormatTok->Tok.is(tok::l_brace)) { 899 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 900 addUnwrappedLine(); 901 return; 902 } 903 } 904 break; 905 case tok::kw_export: 906 if (Style.Language == FormatStyle::LK_JavaScript) { 907 parseJavaScriptEs6ImportExport(); 908 return; 909 } 910 break; 911 case tok::identifier: 912 if (FormatTok->is(TT_ForEachMacro)) { 913 parseForOrWhileLoop(); 914 return; 915 } 916 if (FormatTok->is(TT_MacroBlockBegin)) { 917 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 918 /*MunchSemi=*/false); 919 return; 920 } 921 if (FormatTok->is(Keywords.kw_import)) { 922 if (Style.Language == FormatStyle::LK_JavaScript) { 923 parseJavaScriptEs6ImportExport(); 924 return; 925 } 926 if (Style.Language == FormatStyle::LK_Proto) { 927 nextToken(); 928 if (FormatTok->is(tok::kw_public)) 929 nextToken(); 930 if (!FormatTok->is(tok::string_literal)) 931 return; 932 nextToken(); 933 if (FormatTok->is(tok::semi)) 934 nextToken(); 935 addUnwrappedLine(); 936 return; 937 } 938 } 939 if (Style.isCpp() && 940 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 941 Keywords.kw_slots, Keywords.kw_qslots)) { 942 nextToken(); 943 if (FormatTok->is(tok::colon)) { 944 nextToken(); 945 addUnwrappedLine(); 946 return; 947 } 948 } 949 // In all other cases, parse the declaration. 950 break; 951 default: 952 break; 953 } 954 do { 955 const FormatToken *Previous = getPreviousToken(); 956 switch (FormatTok->Tok.getKind()) { 957 case tok::at: 958 nextToken(); 959 if (FormatTok->Tok.is(tok::l_brace)) 960 parseBracedList(); 961 break; 962 case tok::kw_enum: 963 // Ignore if this is part of "template <enum ...". 964 if (Previous && Previous->is(tok::less)) { 965 nextToken(); 966 break; 967 } 968 969 // parseEnum falls through and does not yet add an unwrapped line as an 970 // enum definition can start a structural element. 971 if (!parseEnum()) 972 break; 973 // This only applies for C++. 974 if (!Style.isCpp()) { 975 addUnwrappedLine(); 976 return; 977 } 978 break; 979 case tok::kw_typedef: 980 nextToken(); 981 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 982 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 983 parseEnum(); 984 break; 985 case tok::kw_struct: 986 case tok::kw_union: 987 case tok::kw_class: 988 // parseRecord falls through and does not yet add an unwrapped line as a 989 // record declaration or definition can start a structural element. 990 parseRecord(); 991 // This does not apply for Java and JavaScript. 992 if (Style.Language == FormatStyle::LK_Java || 993 Style.Language == FormatStyle::LK_JavaScript) { 994 if (FormatTok->is(tok::semi)) 995 nextToken(); 996 addUnwrappedLine(); 997 return; 998 } 999 break; 1000 case tok::period: 1001 nextToken(); 1002 // In Java, classes have an implicit static member "class". 1003 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1004 FormatTok->is(tok::kw_class)) 1005 nextToken(); 1006 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1007 FormatTok->Tok.getIdentifierInfo()) 1008 // JavaScript only has pseudo keywords, all keywords are allowed to 1009 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1010 nextToken(); 1011 break; 1012 case tok::semi: 1013 nextToken(); 1014 addUnwrappedLine(); 1015 return; 1016 case tok::r_brace: 1017 addUnwrappedLine(); 1018 return; 1019 case tok::l_paren: 1020 parseParens(); 1021 break; 1022 case tok::kw_operator: 1023 nextToken(); 1024 if (FormatTok->isBinaryOperator()) 1025 nextToken(); 1026 break; 1027 case tok::caret: 1028 nextToken(); 1029 if (FormatTok->Tok.isAnyIdentifier() || 1030 FormatTok->isSimpleTypeSpecifier()) 1031 nextToken(); 1032 if (FormatTok->is(tok::l_paren)) 1033 parseParens(); 1034 if (FormatTok->is(tok::l_brace)) 1035 parseChildBlock(); 1036 break; 1037 case tok::l_brace: 1038 if (!tryToParseBracedList()) { 1039 // A block outside of parentheses must be the last part of a 1040 // structural element. 1041 // FIXME: Figure out cases where this is not true, and add projections 1042 // for them (the one we know is missing are lambdas). 1043 if (Style.BraceWrapping.AfterFunction) 1044 addUnwrappedLine(); 1045 FormatTok->Type = TT_FunctionLBrace; 1046 parseBlock(/*MustBeDeclaration=*/false); 1047 addUnwrappedLine(); 1048 return; 1049 } 1050 // Otherwise this was a braced init list, and the structural 1051 // element continues. 1052 break; 1053 case tok::kw_try: 1054 // We arrive here when parsing function-try blocks. 1055 parseTryCatch(); 1056 return; 1057 case tok::identifier: { 1058 if (FormatTok->is(TT_MacroBlockEnd)) { 1059 addUnwrappedLine(); 1060 return; 1061 } 1062 1063 // Function declarations (as opposed to function expressions) are parsed 1064 // on their own unwrapped line by continuing this loop. Function 1065 // expressions (functions that are not on their own line) must not create 1066 // a new unwrapped line, so they are special cased below. 1067 size_t TokenCount = Line->Tokens.size(); 1068 if (Style.Language == FormatStyle::LK_JavaScript && 1069 FormatTok->is(Keywords.kw_function) && 1070 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1071 Keywords.kw_async)))) { 1072 tryToParseJSFunction(); 1073 break; 1074 } 1075 if ((Style.Language == FormatStyle::LK_JavaScript || 1076 Style.Language == FormatStyle::LK_Java) && 1077 FormatTok->is(Keywords.kw_interface)) { 1078 if (Style.Language == FormatStyle::LK_JavaScript) { 1079 // In JavaScript/TypeScript, "interface" can be used as a standalone 1080 // identifier, e.g. in `var interface = 1;`. If "interface" is 1081 // followed by another identifier, it is very like to be an actual 1082 // interface declaration. 1083 unsigned StoredPosition = Tokens->getPosition(); 1084 FormatToken *Next = Tokens->getNextToken(); 1085 FormatTok = Tokens->setPosition(StoredPosition); 1086 if (Next && !mustBeJSIdent(Keywords, Next)) { 1087 nextToken(); 1088 break; 1089 } 1090 } 1091 parseRecord(); 1092 addUnwrappedLine(); 1093 return; 1094 } 1095 1096 // See if the following token should start a new unwrapped line. 1097 StringRef Text = FormatTok->TokenText; 1098 nextToken(); 1099 if (Line->Tokens.size() == 1 && 1100 // JS doesn't have macros, and within classes colons indicate fields, 1101 // not labels. 1102 Style.Language != FormatStyle::LK_JavaScript) { 1103 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1104 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1105 parseLabel(); 1106 return; 1107 } 1108 // Recognize function-like macro usages without trailing semicolon as 1109 // well as free-standing macros like Q_OBJECT. 1110 bool FunctionLike = FormatTok->is(tok::l_paren); 1111 if (FunctionLike) 1112 parseParens(); 1113 1114 bool FollowedByNewline = 1115 CommentsBeforeNextToken.empty() 1116 ? FormatTok->NewlinesBefore > 0 1117 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1118 1119 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1120 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1121 addUnwrappedLine(); 1122 return; 1123 } 1124 } 1125 break; 1126 } 1127 case tok::equal: 1128 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1129 // TT_JsFatArrow. The always start an expression or a child block if 1130 // followed by a curly. 1131 if (FormatTok->is(TT_JsFatArrow)) { 1132 nextToken(); 1133 if (FormatTok->is(tok::l_brace)) 1134 parseChildBlock(); 1135 break; 1136 } 1137 1138 nextToken(); 1139 if (FormatTok->Tok.is(tok::l_brace)) { 1140 parseBracedList(); 1141 } 1142 break; 1143 case tok::l_square: 1144 parseSquare(); 1145 break; 1146 case tok::kw_new: 1147 parseNew(); 1148 break; 1149 default: 1150 nextToken(); 1151 break; 1152 } 1153 } while (!eof()); 1154 } 1155 1156 bool UnwrappedLineParser::tryToParseLambda() { 1157 if (!Style.isCpp()) { 1158 nextToken(); 1159 return false; 1160 } 1161 const FormatToken* Previous = getPreviousToken(); 1162 if (Previous && 1163 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1164 tok::kw_delete) || 1165 Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { 1166 nextToken(); 1167 return false; 1168 } 1169 assert(FormatTok->is(tok::l_square)); 1170 FormatToken &LSquare = *FormatTok; 1171 if (!tryToParseLambdaIntroducer()) 1172 return false; 1173 1174 while (FormatTok->isNot(tok::l_brace)) { 1175 if (FormatTok->isSimpleTypeSpecifier()) { 1176 nextToken(); 1177 continue; 1178 } 1179 switch (FormatTok->Tok.getKind()) { 1180 case tok::l_brace: 1181 break; 1182 case tok::l_paren: 1183 parseParens(); 1184 break; 1185 case tok::amp: 1186 case tok::star: 1187 case tok::kw_const: 1188 case tok::comma: 1189 case tok::less: 1190 case tok::greater: 1191 case tok::identifier: 1192 case tok::numeric_constant: 1193 case tok::coloncolon: 1194 case tok::kw_mutable: 1195 nextToken(); 1196 break; 1197 case tok::arrow: 1198 FormatTok->Type = TT_LambdaArrow; 1199 nextToken(); 1200 break; 1201 default: 1202 return true; 1203 } 1204 } 1205 LSquare.Type = TT_LambdaLSquare; 1206 parseChildBlock(); 1207 return true; 1208 } 1209 1210 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1211 nextToken(); 1212 if (FormatTok->is(tok::equal)) { 1213 nextToken(); 1214 if (FormatTok->is(tok::r_square)) { 1215 nextToken(); 1216 return true; 1217 } 1218 if (FormatTok->isNot(tok::comma)) 1219 return false; 1220 nextToken(); 1221 } else if (FormatTok->is(tok::amp)) { 1222 nextToken(); 1223 if (FormatTok->is(tok::r_square)) { 1224 nextToken(); 1225 return true; 1226 } 1227 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 1228 return false; 1229 } 1230 if (FormatTok->is(tok::comma)) 1231 nextToken(); 1232 } else if (FormatTok->is(tok::r_square)) { 1233 nextToken(); 1234 return true; 1235 } 1236 do { 1237 if (FormatTok->is(tok::amp)) 1238 nextToken(); 1239 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 1240 return false; 1241 nextToken(); 1242 if (FormatTok->is(tok::ellipsis)) 1243 nextToken(); 1244 if (FormatTok->is(tok::comma)) { 1245 nextToken(); 1246 } else if (FormatTok->is(tok::r_square)) { 1247 nextToken(); 1248 return true; 1249 } else { 1250 return false; 1251 } 1252 } while (!eof()); 1253 return false; 1254 } 1255 1256 void UnwrappedLineParser::tryToParseJSFunction() { 1257 assert(FormatTok->is(Keywords.kw_function) || 1258 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1259 if (FormatTok->is(Keywords.kw_async)) 1260 nextToken(); 1261 // Consume "function". 1262 nextToken(); 1263 1264 // Consume * (generator function). Treat it like C++'s overloaded operators. 1265 if (FormatTok->is(tok::star)) { 1266 FormatTok->Type = TT_OverloadedOperator; 1267 nextToken(); 1268 } 1269 1270 // Consume function name. 1271 if (FormatTok->is(tok::identifier)) 1272 nextToken(); 1273 1274 if (FormatTok->isNot(tok::l_paren)) 1275 return; 1276 1277 // Parse formal parameter list. 1278 parseParens(); 1279 1280 if (FormatTok->is(tok::colon)) { 1281 // Parse a type definition. 1282 nextToken(); 1283 1284 // Eat the type declaration. For braced inline object types, balance braces, 1285 // otherwise just parse until finding an l_brace for the function body. 1286 if (FormatTok->is(tok::l_brace)) 1287 tryToParseBracedList(); 1288 else 1289 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1290 nextToken(); 1291 } 1292 1293 if (FormatTok->is(tok::semi)) 1294 return; 1295 1296 parseChildBlock(); 1297 } 1298 1299 bool UnwrappedLineParser::tryToParseBracedList() { 1300 if (FormatTok->BlockKind == BK_Unknown) 1301 calculateBraceTypes(); 1302 assert(FormatTok->BlockKind != BK_Unknown); 1303 if (FormatTok->BlockKind == BK_Block) 1304 return false; 1305 parseBracedList(); 1306 return true; 1307 } 1308 1309 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { 1310 bool HasError = false; 1311 nextToken(); 1312 1313 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1314 // replace this by using parseAssigmentExpression() inside. 1315 do { 1316 if (Style.Language == FormatStyle::LK_JavaScript) { 1317 if (FormatTok->is(Keywords.kw_function) || 1318 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1319 tryToParseJSFunction(); 1320 continue; 1321 } 1322 if (FormatTok->is(TT_JsFatArrow)) { 1323 nextToken(); 1324 // Fat arrows can be followed by simple expressions or by child blocks 1325 // in curly braces. 1326 if (FormatTok->is(tok::l_brace)) { 1327 parseChildBlock(); 1328 continue; 1329 } 1330 } 1331 if (FormatTok->is(tok::l_brace)) { 1332 // Could be a method inside of a braced list `{a() { return 1; }}`. 1333 if (tryToParseBracedList()) 1334 continue; 1335 parseChildBlock(); 1336 } 1337 } 1338 switch (FormatTok->Tok.getKind()) { 1339 case tok::caret: 1340 nextToken(); 1341 if (FormatTok->is(tok::l_brace)) { 1342 parseChildBlock(); 1343 } 1344 break; 1345 case tok::l_square: 1346 tryToParseLambda(); 1347 break; 1348 case tok::l_paren: 1349 parseParens(); 1350 // JavaScript can just have free standing methods and getters/setters in 1351 // object literals. Detect them by a "{" following ")". 1352 if (Style.Language == FormatStyle::LK_JavaScript) { 1353 if (FormatTok->is(tok::l_brace)) 1354 parseChildBlock(); 1355 break; 1356 } 1357 break; 1358 case tok::l_brace: 1359 // Assume there are no blocks inside a braced init list apart 1360 // from the ones we explicitly parse out (like lambdas). 1361 FormatTok->BlockKind = BK_BracedInit; 1362 parseBracedList(); 1363 break; 1364 case tok::r_brace: 1365 nextToken(); 1366 return !HasError; 1367 case tok::semi: 1368 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1369 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1370 // used for error recovery if we have otherwise determined that this is 1371 // a braced list. 1372 if (Style.Language == FormatStyle::LK_JavaScript) { 1373 nextToken(); 1374 break; 1375 } 1376 HasError = true; 1377 if (!ContinueOnSemicolons) 1378 return !HasError; 1379 nextToken(); 1380 break; 1381 case tok::comma: 1382 nextToken(); 1383 break; 1384 default: 1385 nextToken(); 1386 break; 1387 } 1388 } while (!eof()); 1389 return false; 1390 } 1391 1392 void UnwrappedLineParser::parseParens() { 1393 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1394 nextToken(); 1395 do { 1396 switch (FormatTok->Tok.getKind()) { 1397 case tok::l_paren: 1398 parseParens(); 1399 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1400 parseChildBlock(); 1401 break; 1402 case tok::r_paren: 1403 nextToken(); 1404 return; 1405 case tok::r_brace: 1406 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1407 return; 1408 case tok::l_square: 1409 tryToParseLambda(); 1410 break; 1411 case tok::l_brace: 1412 if (!tryToParseBracedList()) 1413 parseChildBlock(); 1414 break; 1415 case tok::at: 1416 nextToken(); 1417 if (FormatTok->Tok.is(tok::l_brace)) 1418 parseBracedList(); 1419 break; 1420 case tok::kw_class: 1421 if (Style.Language == FormatStyle::LK_JavaScript) 1422 parseRecord(/*ParseAsExpr=*/true); 1423 else 1424 nextToken(); 1425 break; 1426 case tok::identifier: 1427 if (Style.Language == FormatStyle::LK_JavaScript && 1428 (FormatTok->is(Keywords.kw_function) || 1429 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1430 tryToParseJSFunction(); 1431 else 1432 nextToken(); 1433 break; 1434 default: 1435 nextToken(); 1436 break; 1437 } 1438 } while (!eof()); 1439 } 1440 1441 void UnwrappedLineParser::parseSquare() { 1442 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1443 if (tryToParseLambda()) 1444 return; 1445 do { 1446 switch (FormatTok->Tok.getKind()) { 1447 case tok::l_paren: 1448 parseParens(); 1449 break; 1450 case tok::r_square: 1451 nextToken(); 1452 return; 1453 case tok::r_brace: 1454 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1455 return; 1456 case tok::l_square: 1457 parseSquare(); 1458 break; 1459 case tok::l_brace: { 1460 if (!tryToParseBracedList()) 1461 parseChildBlock(); 1462 break; 1463 } 1464 case tok::at: 1465 nextToken(); 1466 if (FormatTok->Tok.is(tok::l_brace)) 1467 parseBracedList(); 1468 break; 1469 default: 1470 nextToken(); 1471 break; 1472 } 1473 } while (!eof()); 1474 } 1475 1476 void UnwrappedLineParser::parseIfThenElse() { 1477 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1478 nextToken(); 1479 if (FormatTok->Tok.is(tok::l_paren)) 1480 parseParens(); 1481 bool NeedsUnwrappedLine = false; 1482 if (FormatTok->Tok.is(tok::l_brace)) { 1483 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1484 parseBlock(/*MustBeDeclaration=*/false); 1485 if (Style.BraceWrapping.BeforeElse) 1486 addUnwrappedLine(); 1487 else 1488 NeedsUnwrappedLine = true; 1489 } else { 1490 addUnwrappedLine(); 1491 ++Line->Level; 1492 parseStructuralElement(); 1493 --Line->Level; 1494 } 1495 if (FormatTok->Tok.is(tok::kw_else)) { 1496 nextToken(); 1497 if (FormatTok->Tok.is(tok::l_brace)) { 1498 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1499 parseBlock(/*MustBeDeclaration=*/false); 1500 addUnwrappedLine(); 1501 } else if (FormatTok->Tok.is(tok::kw_if)) { 1502 parseIfThenElse(); 1503 } else { 1504 addUnwrappedLine(); 1505 ++Line->Level; 1506 parseStructuralElement(); 1507 if (FormatTok->is(tok::eof)) 1508 addUnwrappedLine(); 1509 --Line->Level; 1510 } 1511 } else if (NeedsUnwrappedLine) { 1512 addUnwrappedLine(); 1513 } 1514 } 1515 1516 void UnwrappedLineParser::parseTryCatch() { 1517 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1518 nextToken(); 1519 bool NeedsUnwrappedLine = false; 1520 if (FormatTok->is(tok::colon)) { 1521 // We are in a function try block, what comes is an initializer list. 1522 nextToken(); 1523 while (FormatTok->is(tok::identifier)) { 1524 nextToken(); 1525 if (FormatTok->is(tok::l_paren)) 1526 parseParens(); 1527 if (FormatTok->is(tok::comma)) 1528 nextToken(); 1529 } 1530 } 1531 // Parse try with resource. 1532 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1533 parseParens(); 1534 } 1535 if (FormatTok->is(tok::l_brace)) { 1536 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1537 parseBlock(/*MustBeDeclaration=*/false); 1538 if (Style.BraceWrapping.BeforeCatch) { 1539 addUnwrappedLine(); 1540 } else { 1541 NeedsUnwrappedLine = true; 1542 } 1543 } else if (!FormatTok->is(tok::kw_catch)) { 1544 // The C++ standard requires a compound-statement after a try. 1545 // If there's none, we try to assume there's a structuralElement 1546 // and try to continue. 1547 addUnwrappedLine(); 1548 ++Line->Level; 1549 parseStructuralElement(); 1550 --Line->Level; 1551 } 1552 while (1) { 1553 if (FormatTok->is(tok::at)) 1554 nextToken(); 1555 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1556 tok::kw___finally) || 1557 ((Style.Language == FormatStyle::LK_Java || 1558 Style.Language == FormatStyle::LK_JavaScript) && 1559 FormatTok->is(Keywords.kw_finally)) || 1560 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1561 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1562 break; 1563 nextToken(); 1564 while (FormatTok->isNot(tok::l_brace)) { 1565 if (FormatTok->is(tok::l_paren)) { 1566 parseParens(); 1567 continue; 1568 } 1569 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1570 return; 1571 nextToken(); 1572 } 1573 NeedsUnwrappedLine = false; 1574 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1575 parseBlock(/*MustBeDeclaration=*/false); 1576 if (Style.BraceWrapping.BeforeCatch) 1577 addUnwrappedLine(); 1578 else 1579 NeedsUnwrappedLine = true; 1580 } 1581 if (NeedsUnwrappedLine) 1582 addUnwrappedLine(); 1583 } 1584 1585 void UnwrappedLineParser::parseNamespace() { 1586 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1587 1588 const FormatToken &InitialToken = *FormatTok; 1589 nextToken(); 1590 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1591 nextToken(); 1592 if (FormatTok->Tok.is(tok::l_brace)) { 1593 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1594 addUnwrappedLine(); 1595 1596 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1597 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1598 DeclarationScopeStack.size() > 1); 1599 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1600 // Munch the semicolon after a namespace. This is more common than one would 1601 // think. Puttin the semicolon into its own line is very ugly. 1602 if (FormatTok->Tok.is(tok::semi)) 1603 nextToken(); 1604 addUnwrappedLine(); 1605 } 1606 // FIXME: Add error handling. 1607 } 1608 1609 void UnwrappedLineParser::parseNew() { 1610 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1611 nextToken(); 1612 if (Style.Language != FormatStyle::LK_Java) 1613 return; 1614 1615 // In Java, we can parse everything up to the parens, which aren't optional. 1616 do { 1617 // There should not be a ;, { or } before the new's open paren. 1618 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1619 return; 1620 1621 // Consume the parens. 1622 if (FormatTok->is(tok::l_paren)) { 1623 parseParens(); 1624 1625 // If there is a class body of an anonymous class, consume that as child. 1626 if (FormatTok->is(tok::l_brace)) 1627 parseChildBlock(); 1628 return; 1629 } 1630 nextToken(); 1631 } while (!eof()); 1632 } 1633 1634 void UnwrappedLineParser::parseForOrWhileLoop() { 1635 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1636 "'for', 'while' or foreach macro expected"); 1637 nextToken(); 1638 if (FormatTok->Tok.is(tok::l_paren)) 1639 parseParens(); 1640 if (FormatTok->Tok.is(tok::l_brace)) { 1641 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1642 parseBlock(/*MustBeDeclaration=*/false); 1643 addUnwrappedLine(); 1644 } else { 1645 addUnwrappedLine(); 1646 ++Line->Level; 1647 parseStructuralElement(); 1648 --Line->Level; 1649 } 1650 } 1651 1652 void UnwrappedLineParser::parseDoWhile() { 1653 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1654 nextToken(); 1655 if (FormatTok->Tok.is(tok::l_brace)) { 1656 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1657 parseBlock(/*MustBeDeclaration=*/false); 1658 if (Style.BraceWrapping.IndentBraces) 1659 addUnwrappedLine(); 1660 } else { 1661 addUnwrappedLine(); 1662 ++Line->Level; 1663 parseStructuralElement(); 1664 --Line->Level; 1665 } 1666 1667 // FIXME: Add error handling. 1668 if (!FormatTok->Tok.is(tok::kw_while)) { 1669 addUnwrappedLine(); 1670 return; 1671 } 1672 1673 nextToken(); 1674 parseStructuralElement(); 1675 } 1676 1677 void UnwrappedLineParser::parseLabel() { 1678 nextToken(); 1679 unsigned OldLineLevel = Line->Level; 1680 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1681 --Line->Level; 1682 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1683 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1684 parseBlock(/*MustBeDeclaration=*/false); 1685 if (FormatTok->Tok.is(tok::kw_break)) { 1686 if (Style.BraceWrapping.AfterControlStatement) 1687 addUnwrappedLine(); 1688 parseStructuralElement(); 1689 } 1690 addUnwrappedLine(); 1691 } else { 1692 if (FormatTok->is(tok::semi)) 1693 nextToken(); 1694 addUnwrappedLine(); 1695 } 1696 Line->Level = OldLineLevel; 1697 if (FormatTok->isNot(tok::l_brace)) { 1698 parseStructuralElement(); 1699 addUnwrappedLine(); 1700 } 1701 } 1702 1703 void UnwrappedLineParser::parseCaseLabel() { 1704 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1705 // FIXME: fix handling of complex expressions here. 1706 do { 1707 nextToken(); 1708 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1709 parseLabel(); 1710 } 1711 1712 void UnwrappedLineParser::parseSwitch() { 1713 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1714 nextToken(); 1715 if (FormatTok->Tok.is(tok::l_paren)) 1716 parseParens(); 1717 if (FormatTok->Tok.is(tok::l_brace)) { 1718 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1719 parseBlock(/*MustBeDeclaration=*/false); 1720 addUnwrappedLine(); 1721 } else { 1722 addUnwrappedLine(); 1723 ++Line->Level; 1724 parseStructuralElement(); 1725 --Line->Level; 1726 } 1727 } 1728 1729 void UnwrappedLineParser::parseAccessSpecifier() { 1730 nextToken(); 1731 // Understand Qt's slots. 1732 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1733 nextToken(); 1734 // Otherwise, we don't know what it is, and we'd better keep the next token. 1735 if (FormatTok->Tok.is(tok::colon)) 1736 nextToken(); 1737 addUnwrappedLine(); 1738 } 1739 1740 bool UnwrappedLineParser::parseEnum() { 1741 // Won't be 'enum' for NS_ENUMs. 1742 if (FormatTok->Tok.is(tok::kw_enum)) 1743 nextToken(); 1744 1745 // In TypeScript, "enum" can also be used as property name, e.g. in interface 1746 // declarations. An "enum" keyword followed by a colon would be a syntax 1747 // error and thus assume it is just an identifier. 1748 if (Style.Language == FormatStyle::LK_JavaScript && 1749 FormatTok->isOneOf(tok::colon, tok::question)) 1750 return false; 1751 1752 // Eat up enum class ... 1753 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1754 nextToken(); 1755 1756 while (FormatTok->Tok.getIdentifierInfo() || 1757 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1758 tok::greater, tok::comma, tok::question)) { 1759 nextToken(); 1760 // We can have macros or attributes in between 'enum' and the enum name. 1761 if (FormatTok->is(tok::l_paren)) 1762 parseParens(); 1763 if (FormatTok->is(tok::identifier)) { 1764 nextToken(); 1765 // If there are two identifiers in a row, this is likely an elaborate 1766 // return type. In Java, this can be "implements", etc. 1767 if (Style.isCpp() && FormatTok->is(tok::identifier)) 1768 return false; 1769 } 1770 } 1771 1772 // Just a declaration or something is wrong. 1773 if (FormatTok->isNot(tok::l_brace)) 1774 return true; 1775 FormatTok->BlockKind = BK_Block; 1776 1777 if (Style.Language == FormatStyle::LK_Java) { 1778 // Java enums are different. 1779 parseJavaEnumBody(); 1780 return true; 1781 } 1782 if (Style.Language == FormatStyle::LK_Proto) { 1783 parseBlock(/*MustBeDeclaration=*/true); 1784 return true; 1785 } 1786 1787 // Parse enum body. 1788 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1789 if (HasError) { 1790 if (FormatTok->is(tok::semi)) 1791 nextToken(); 1792 addUnwrappedLine(); 1793 } 1794 return true; 1795 1796 // There is no addUnwrappedLine() here so that we fall through to parsing a 1797 // structural element afterwards. Thus, in "enum A {} n, m;", 1798 // "} n, m;" will end up in one unwrapped line. 1799 } 1800 1801 void UnwrappedLineParser::parseJavaEnumBody() { 1802 // Determine whether the enum is simple, i.e. does not have a semicolon or 1803 // constants with class bodies. Simple enums can be formatted like braced 1804 // lists, contracted to a single line, etc. 1805 unsigned StoredPosition = Tokens->getPosition(); 1806 bool IsSimple = true; 1807 FormatToken *Tok = Tokens->getNextToken(); 1808 while (Tok) { 1809 if (Tok->is(tok::r_brace)) 1810 break; 1811 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1812 IsSimple = false; 1813 break; 1814 } 1815 // FIXME: This will also mark enums with braces in the arguments to enum 1816 // constants as "not simple". This is probably fine in practice, though. 1817 Tok = Tokens->getNextToken(); 1818 } 1819 FormatTok = Tokens->setPosition(StoredPosition); 1820 1821 if (IsSimple) { 1822 parseBracedList(); 1823 addUnwrappedLine(); 1824 return; 1825 } 1826 1827 // Parse the body of a more complex enum. 1828 // First add a line for everything up to the "{". 1829 nextToken(); 1830 addUnwrappedLine(); 1831 ++Line->Level; 1832 1833 // Parse the enum constants. 1834 while (FormatTok) { 1835 if (FormatTok->is(tok::l_brace)) { 1836 // Parse the constant's class body. 1837 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1838 /*MunchSemi=*/false); 1839 } else if (FormatTok->is(tok::l_paren)) { 1840 parseParens(); 1841 } else if (FormatTok->is(tok::comma)) { 1842 nextToken(); 1843 addUnwrappedLine(); 1844 } else if (FormatTok->is(tok::semi)) { 1845 nextToken(); 1846 addUnwrappedLine(); 1847 break; 1848 } else if (FormatTok->is(tok::r_brace)) { 1849 addUnwrappedLine(); 1850 break; 1851 } else { 1852 nextToken(); 1853 } 1854 } 1855 1856 // Parse the class body after the enum's ";" if any. 1857 parseLevel(/*HasOpeningBrace=*/true); 1858 nextToken(); 1859 --Line->Level; 1860 addUnwrappedLine(); 1861 } 1862 1863 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 1864 const FormatToken &InitialToken = *FormatTok; 1865 nextToken(); 1866 1867 // The actual identifier can be a nested name specifier, and in macros 1868 // it is often token-pasted. 1869 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 1870 tok::kw___attribute, tok::kw___declspec, 1871 tok::kw_alignas) || 1872 ((Style.Language == FormatStyle::LK_Java || 1873 Style.Language == FormatStyle::LK_JavaScript) && 1874 FormatTok->isOneOf(tok::period, tok::comma))) { 1875 bool IsNonMacroIdentifier = 1876 FormatTok->is(tok::identifier) && 1877 FormatTok->TokenText != FormatTok->TokenText.upper(); 1878 nextToken(); 1879 // We can have macros or attributes in between 'class' and the class name. 1880 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 1881 parseParens(); 1882 } 1883 1884 // Note that parsing away template declarations here leads to incorrectly 1885 // accepting function declarations as record declarations. 1886 // In general, we cannot solve this problem. Consider: 1887 // class A<int> B() {} 1888 // which can be a function definition or a class definition when B() is a 1889 // macro. If we find enough real-world cases where this is a problem, we 1890 // can parse for the 'template' keyword in the beginning of the statement, 1891 // and thus rule out the record production in case there is no template 1892 // (this would still leave us with an ambiguity between template function 1893 // and class declarations). 1894 if (FormatTok->isOneOf(tok::colon, tok::less)) { 1895 while (!eof()) { 1896 if (FormatTok->is(tok::l_brace)) { 1897 calculateBraceTypes(/*ExpectClassBody=*/true); 1898 if (!tryToParseBracedList()) 1899 break; 1900 } 1901 if (FormatTok->Tok.is(tok::semi)) 1902 return; 1903 nextToken(); 1904 } 1905 } 1906 if (FormatTok->Tok.is(tok::l_brace)) { 1907 if (ParseAsExpr) { 1908 parseChildBlock(); 1909 } else { 1910 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1911 addUnwrappedLine(); 1912 1913 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1914 /*MunchSemi=*/false); 1915 } 1916 } 1917 // There is no addUnwrappedLine() here so that we fall through to parsing a 1918 // structural element afterwards. Thus, in "class A {} n, m;", 1919 // "} n, m;" will end up in one unwrapped line. 1920 } 1921 1922 void UnwrappedLineParser::parseObjCProtocolList() { 1923 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1924 do 1925 nextToken(); 1926 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1927 nextToken(); // Skip '>'. 1928 } 1929 1930 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1931 do { 1932 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1933 nextToken(); 1934 addUnwrappedLine(); 1935 break; 1936 } 1937 if (FormatTok->is(tok::l_brace)) { 1938 parseBlock(/*MustBeDeclaration=*/false); 1939 // In ObjC interfaces, nothing should be following the "}". 1940 addUnwrappedLine(); 1941 } else if (FormatTok->is(tok::r_brace)) { 1942 // Ignore stray "}". parseStructuralElement doesn't consume them. 1943 nextToken(); 1944 addUnwrappedLine(); 1945 } else { 1946 parseStructuralElement(); 1947 } 1948 } while (!eof()); 1949 } 1950 1951 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1952 nextToken(); 1953 nextToken(); // interface name 1954 1955 // @interface can be followed by either a base class, or a category. 1956 if (FormatTok->Tok.is(tok::colon)) { 1957 nextToken(); 1958 nextToken(); // base class name 1959 } else if (FormatTok->Tok.is(tok::l_paren)) 1960 // Skip category, if present. 1961 parseParens(); 1962 1963 if (FormatTok->Tok.is(tok::less)) 1964 parseObjCProtocolList(); 1965 1966 if (FormatTok->Tok.is(tok::l_brace)) { 1967 if (Style.BraceWrapping.AfterObjCDeclaration) 1968 addUnwrappedLine(); 1969 parseBlock(/*MustBeDeclaration=*/true); 1970 } 1971 1972 // With instance variables, this puts '}' on its own line. Without instance 1973 // variables, this ends the @interface line. 1974 addUnwrappedLine(); 1975 1976 parseObjCUntilAtEnd(); 1977 } 1978 1979 void UnwrappedLineParser::parseObjCProtocol() { 1980 nextToken(); 1981 nextToken(); // protocol name 1982 1983 if (FormatTok->Tok.is(tok::less)) 1984 parseObjCProtocolList(); 1985 1986 // Check for protocol declaration. 1987 if (FormatTok->Tok.is(tok::semi)) { 1988 nextToken(); 1989 return addUnwrappedLine(); 1990 } 1991 1992 addUnwrappedLine(); 1993 parseObjCUntilAtEnd(); 1994 } 1995 1996 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 1997 bool IsImport = FormatTok->is(Keywords.kw_import); 1998 assert(IsImport || FormatTok->is(tok::kw_export)); 1999 nextToken(); 2000 2001 // Consume the "default" in "export default class/function". 2002 if (FormatTok->is(tok::kw_default)) 2003 nextToken(); 2004 2005 // Consume "async function", "function" and "default function", so that these 2006 // get parsed as free-standing JS functions, i.e. do not require a trailing 2007 // semicolon. 2008 if (FormatTok->is(Keywords.kw_async)) 2009 nextToken(); 2010 if (FormatTok->is(Keywords.kw_function)) { 2011 nextToken(); 2012 return; 2013 } 2014 2015 // For imports, `export *`, `export {...}`, consume the rest of the line up 2016 // to the terminating `;`. For everything else, just return and continue 2017 // parsing the structural element, i.e. the declaration or expression for 2018 // `export default`. 2019 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2020 !FormatTok->isStringLiteral()) 2021 return; 2022 2023 while (!eof()) { 2024 if (FormatTok->is(tok::semi)) 2025 return; 2026 if (Line->Tokens.size() == 0) { 2027 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2028 // import statement should terminate. 2029 return; 2030 } 2031 if (FormatTok->is(tok::l_brace)) { 2032 FormatTok->BlockKind = BK_Block; 2033 parseBracedList(); 2034 } else { 2035 nextToken(); 2036 } 2037 } 2038 } 2039 2040 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2041 StringRef Prefix = "") { 2042 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 2043 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2044 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2045 E = Line.Tokens.end(); 2046 I != E; ++I) { 2047 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2048 << "T=" << I->Tok->Type 2049 << ", OC=" << I->Tok->OriginalColumn << "] "; 2050 } 2051 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2052 E = Line.Tokens.end(); 2053 I != E; ++I) { 2054 const UnwrappedLineNode &Node = *I; 2055 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2056 I = Node.Children.begin(), 2057 E = Node.Children.end(); 2058 I != E; ++I) { 2059 printDebugInfo(*I, "\nChild: "); 2060 } 2061 } 2062 llvm::dbgs() << "\n"; 2063 } 2064 2065 void UnwrappedLineParser::addUnwrappedLine() { 2066 if (Line->Tokens.empty()) 2067 return; 2068 DEBUG({ 2069 if (CurrentLines == &Lines) 2070 printDebugInfo(*Line); 2071 }); 2072 CurrentLines->push_back(std::move(*Line)); 2073 Line->Tokens.clear(); 2074 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2075 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2076 CurrentLines->append( 2077 std::make_move_iterator(PreprocessorDirectives.begin()), 2078 std::make_move_iterator(PreprocessorDirectives.end())); 2079 PreprocessorDirectives.clear(); 2080 } 2081 } 2082 2083 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2084 2085 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2086 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2087 FormatTok.NewlinesBefore > 0; 2088 } 2089 2090 static bool isLineComment(const FormatToken &FormatTok) { 2091 return FormatTok.is(tok::comment) && 2092 FormatTok.TokenText.startswith("//"); 2093 } 2094 2095 // Checks if \p FormatTok is a line comment that continues the line comment 2096 // section on \p Line. 2097 static bool continuesLineComment(const FormatToken &FormatTok, 2098 const UnwrappedLine &Line, 2099 llvm::Regex &CommentPragmasRegex) { 2100 if (Line.Tokens.empty()) 2101 return false; 2102 2103 StringRef IndentContent = FormatTok.TokenText; 2104 if (FormatTok.TokenText.startswith("//") || 2105 FormatTok.TokenText.startswith("/*")) 2106 IndentContent = FormatTok.TokenText.substr(2); 2107 if (CommentPragmasRegex.match(IndentContent)) 2108 return false; 2109 2110 // If Line starts with a line comment, then FormatTok continues the comment 2111 // section if its original column is greater or equal to the original start 2112 // column of the line. 2113 // 2114 // Define the min column token of a line as follows: if a line ends in '{' or 2115 // contains a '{' followed by a line comment, then the min column token is 2116 // that '{'. Otherwise, the min column token of the line is the first token of 2117 // the line. 2118 // 2119 // If Line starts with a token other than a line comment, then FormatTok 2120 // continues the comment section if its original column is greater than the 2121 // original start column of the min column token of the line. 2122 // 2123 // For example, the second line comment continues the first in these cases: 2124 // 2125 // // first line 2126 // // second line 2127 // 2128 // and: 2129 // 2130 // // first line 2131 // // second line 2132 // 2133 // and: 2134 // 2135 // int i; // first line 2136 // // second line 2137 // 2138 // and: 2139 // 2140 // do { // first line 2141 // // second line 2142 // int i; 2143 // } while (true); 2144 // 2145 // and: 2146 // 2147 // enum { 2148 // a, // first line 2149 // // second line 2150 // b 2151 // }; 2152 // 2153 // The second line comment doesn't continue the first in these cases: 2154 // 2155 // // first line 2156 // // second line 2157 // 2158 // and: 2159 // 2160 // int i; // first line 2161 // // second line 2162 // 2163 // and: 2164 // 2165 // do { // first line 2166 // // second line 2167 // int i; 2168 // } while (true); 2169 // 2170 // and: 2171 // 2172 // enum { 2173 // a, // first line 2174 // // second line 2175 // }; 2176 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2177 2178 // Scan for '{//'. If found, use the column of '{' as a min column for line 2179 // comment section continuation. 2180 const FormatToken *PreviousToken = nullptr; 2181 for (const UnwrappedLineNode &Node : Line.Tokens) { 2182 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2183 isLineComment(*Node.Tok)) { 2184 MinColumnToken = PreviousToken; 2185 break; 2186 } 2187 PreviousToken = Node.Tok; 2188 2189 // Grab the last newline preceding a token in this unwrapped line. 2190 if (Node.Tok->NewlinesBefore > 0) { 2191 MinColumnToken = Node.Tok; 2192 } 2193 } 2194 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2195 MinColumnToken = PreviousToken; 2196 } 2197 2198 unsigned MinContinueColumn = 2199 MinColumnToken->OriginalColumn + 2200 (isLineComment(*MinColumnToken) ? 0 : 1); 2201 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 2202 isLineComment(*(Line.Tokens.back().Tok)) && 2203 FormatTok.OriginalColumn >= MinContinueColumn; 2204 } 2205 2206 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2207 bool JustComments = Line->Tokens.empty(); 2208 for (SmallVectorImpl<FormatToken *>::const_iterator 2209 I = CommentsBeforeNextToken.begin(), 2210 E = CommentsBeforeNextToken.end(); 2211 I != E; ++I) { 2212 // Line comments that belong to the same line comment section are put on the 2213 // same line since later we might want to reflow content between them. 2214 // Additional fine-grained breaking of line comment sections is controlled 2215 // by the class BreakableLineCommentSection in case it is desirable to keep 2216 // several line comment sections in the same unwrapped line. 2217 // 2218 // FIXME: Consider putting separate line comment sections as children to the 2219 // unwrapped line instead. 2220 (*I)->ContinuesLineCommentSection = 2221 continuesLineComment(**I, *Line, CommentPragmasRegex); 2222 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2223 addUnwrappedLine(); 2224 pushToken(*I); 2225 } 2226 if (NewlineBeforeNext && JustComments) 2227 addUnwrappedLine(); 2228 CommentsBeforeNextToken.clear(); 2229 } 2230 2231 void UnwrappedLineParser::nextToken() { 2232 if (eof()) 2233 return; 2234 flushComments(isOnNewLine(*FormatTok)); 2235 pushToken(FormatTok); 2236 if (Style.Language != FormatStyle::LK_JavaScript) 2237 readToken(); 2238 else 2239 readTokenWithJavaScriptASI(); 2240 } 2241 2242 const FormatToken *UnwrappedLineParser::getPreviousToken() { 2243 // FIXME: This is a dirty way to access the previous token. Find a better 2244 // solution. 2245 if (!Line || Line->Tokens.empty()) 2246 return nullptr; 2247 return Line->Tokens.back().Tok; 2248 } 2249 2250 void UnwrappedLineParser::distributeComments( 2251 const SmallVectorImpl<FormatToken *> &Comments, 2252 const FormatToken *NextTok) { 2253 // Whether or not a line comment token continues a line is controlled by 2254 // the method continuesLineComment, with the following caveat: 2255 // 2256 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2257 // that each comment line from the trail is aligned with the next token, if 2258 // the next token exists. If a trail exists, the beginning of the maximal 2259 // trail is marked as a start of a new comment section. 2260 // 2261 // For example in this code: 2262 // 2263 // int a; // line about a 2264 // // line 1 about b 2265 // // line 2 about b 2266 // int b; 2267 // 2268 // the two lines about b form a maximal trail, so there are two sections, the 2269 // first one consisting of the single comment "// line about a" and the 2270 // second one consisting of the next two comments. 2271 if (Comments.empty()) 2272 return; 2273 bool ShouldPushCommentsInCurrentLine = true; 2274 bool HasTrailAlignedWithNextToken = false; 2275 unsigned StartOfTrailAlignedWithNextToken = 0; 2276 if (NextTok) { 2277 // We are skipping the first element intentionally. 2278 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2279 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2280 HasTrailAlignedWithNextToken = true; 2281 StartOfTrailAlignedWithNextToken = i; 2282 } 2283 } 2284 } 2285 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2286 FormatToken *FormatTok = Comments[i]; 2287 if (HasTrailAlignedWithNextToken && 2288 i == StartOfTrailAlignedWithNextToken) { 2289 FormatTok->ContinuesLineCommentSection = false; 2290 } else { 2291 FormatTok->ContinuesLineCommentSection = 2292 continuesLineComment(*FormatTok, *Line, CommentPragmasRegex); 2293 } 2294 if (!FormatTok->ContinuesLineCommentSection && 2295 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2296 ShouldPushCommentsInCurrentLine = false; 2297 } 2298 if (ShouldPushCommentsInCurrentLine) { 2299 pushToken(FormatTok); 2300 } else { 2301 CommentsBeforeNextToken.push_back(FormatTok); 2302 } 2303 } 2304 } 2305 2306 void UnwrappedLineParser::readToken() { 2307 SmallVector<FormatToken *, 1> Comments; 2308 do { 2309 FormatTok = Tokens->getNextToken(); 2310 assert(FormatTok); 2311 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2312 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2313 distributeComments(Comments, FormatTok); 2314 Comments.clear(); 2315 // If there is an unfinished unwrapped line, we flush the preprocessor 2316 // directives only after that unwrapped line was finished later. 2317 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2318 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2319 // Comments stored before the preprocessor directive need to be output 2320 // before the preprocessor directive, at the same level as the 2321 // preprocessor directive, as we consider them to apply to the directive. 2322 flushComments(isOnNewLine(*FormatTok)); 2323 parsePPDirective(); 2324 } 2325 while (FormatTok->Type == TT_ConflictStart || 2326 FormatTok->Type == TT_ConflictEnd || 2327 FormatTok->Type == TT_ConflictAlternative) { 2328 if (FormatTok->Type == TT_ConflictStart) { 2329 conditionalCompilationStart(/*Unreachable=*/false); 2330 } else if (FormatTok->Type == TT_ConflictAlternative) { 2331 conditionalCompilationAlternative(); 2332 } else if (FormatTok->Type == TT_ConflictEnd) { 2333 conditionalCompilationEnd(); 2334 } 2335 FormatTok = Tokens->getNextToken(); 2336 FormatTok->MustBreakBefore = true; 2337 } 2338 2339 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 2340 !Line->InPPDirective) { 2341 continue; 2342 } 2343 2344 if (!FormatTok->Tok.is(tok::comment)) { 2345 distributeComments(Comments, FormatTok); 2346 Comments.clear(); 2347 return; 2348 } 2349 2350 Comments.push_back(FormatTok); 2351 } while (!eof()); 2352 2353 distributeComments(Comments, nullptr); 2354 Comments.clear(); 2355 } 2356 2357 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2358 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2359 if (MustBreakBeforeNextToken) { 2360 Line->Tokens.back().Tok->MustBreakBefore = true; 2361 MustBreakBeforeNextToken = false; 2362 } 2363 } 2364 2365 } // end namespace format 2366 } // end namespace clang 2367