1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 176 if (Style.BraceWrapping.AfterControlStatement) 177 Parser->addUnwrappedLine(); 178 if (Style.BraceWrapping.IndentBraces) 179 ++LineLevel; 180 } 181 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 182 183 private: 184 unsigned &LineLevel; 185 unsigned OldLineLevel; 186 }; 187 188 namespace { 189 190 class IndexedTokenSource : public FormatTokenSource { 191 public: 192 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 193 : Tokens(Tokens), Position(-1) {} 194 195 FormatToken *getNextToken() override { 196 ++Position; 197 return Tokens[Position]; 198 } 199 200 unsigned getPosition() override { 201 assert(Position >= 0); 202 return Position; 203 } 204 205 FormatToken *setPosition(unsigned P) override { 206 Position = P; 207 return Tokens[Position]; 208 } 209 210 void reset() { Position = -1; } 211 212 private: 213 ArrayRef<FormatToken *> Tokens; 214 int Position; 215 }; 216 217 } // end anonymous namespace 218 219 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 220 const AdditionalKeywords &Keywords, 221 unsigned FirstStartColumn, 222 ArrayRef<FormatToken *> Tokens, 223 UnwrappedLineConsumer &Callback) 224 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 225 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 226 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 227 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 228 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 229 ? IG_Rejected 230 : IG_Inited), 231 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 232 233 void UnwrappedLineParser::reset() { 234 PPBranchLevel = -1; 235 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 236 ? IG_Rejected 237 : IG_Inited; 238 IncludeGuardToken = nullptr; 239 Line.reset(new UnwrappedLine); 240 CommentsBeforeNextToken.clear(); 241 FormatTok = nullptr; 242 MustBreakBeforeNextToken = false; 243 PreprocessorDirectives.clear(); 244 CurrentLines = &Lines; 245 DeclarationScopeStack.clear(); 246 PPStack.clear(); 247 Line->FirstStartColumn = FirstStartColumn; 248 } 249 250 void UnwrappedLineParser::parse() { 251 IndexedTokenSource TokenSource(AllTokens); 252 Line->FirstStartColumn = FirstStartColumn; 253 do { 254 LLVM_DEBUG(llvm::dbgs() << "----\n"); 255 reset(); 256 Tokens = &TokenSource; 257 TokenSource.reset(); 258 259 readToken(); 260 parseFile(); 261 262 // If we found an include guard then all preprocessor directives (other than 263 // the guard) are over-indented by one. 264 if (IncludeGuard == IG_Found) 265 for (auto &Line : Lines) 266 if (Line.InPPDirective && Line.Level > 0) 267 --Line.Level; 268 269 // Create line with eof token. 270 pushToken(FormatTok); 271 addUnwrappedLine(); 272 273 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 274 E = Lines.end(); 275 I != E; ++I) { 276 Callback.consumeUnwrappedLine(*I); 277 } 278 Callback.finishRun(); 279 Lines.clear(); 280 while (!PPLevelBranchIndex.empty() && 281 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 282 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 283 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 284 } 285 if (!PPLevelBranchIndex.empty()) { 286 ++PPLevelBranchIndex.back(); 287 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 288 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 289 } 290 } while (!PPLevelBranchIndex.empty()); 291 } 292 293 void UnwrappedLineParser::parseFile() { 294 // The top-level context in a file always has declarations, except for pre- 295 // processor directives and JavaScript files. 296 bool MustBeDeclaration = 297 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 298 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 299 MustBeDeclaration); 300 if (Style.Language == FormatStyle::LK_TextProto) 301 parseBracedList(); 302 else 303 parseLevel(/*HasOpeningBrace=*/false); 304 // Make sure to format the remaining tokens. 305 // 306 // LK_TextProto is special since its top-level is parsed as the body of a 307 // braced list, which does not necessarily have natural line separators such 308 // as a semicolon. Comments after the last entry that have been determined to 309 // not belong to that line, as in: 310 // key: value 311 // // endfile comment 312 // do not have a chance to be put on a line of their own until this point. 313 // Here we add this newline before end-of-file comments. 314 if (Style.Language == FormatStyle::LK_TextProto && 315 !CommentsBeforeNextToken.empty()) 316 addUnwrappedLine(); 317 flushComments(true); 318 addUnwrappedLine(); 319 } 320 321 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 322 bool SwitchLabelEncountered = false; 323 do { 324 tok::TokenKind kind = FormatTok->Tok.getKind(); 325 if (FormatTok->Type == TT_MacroBlockBegin) { 326 kind = tok::l_brace; 327 } else if (FormatTok->Type == TT_MacroBlockEnd) { 328 kind = tok::r_brace; 329 } 330 331 switch (kind) { 332 case tok::comment: 333 nextToken(); 334 addUnwrappedLine(); 335 break; 336 case tok::l_brace: 337 // FIXME: Add parameter whether this can happen - if this happens, we must 338 // be in a non-declaration context. 339 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 340 continue; 341 parseBlock(/*MustBeDeclaration=*/false); 342 addUnwrappedLine(); 343 break; 344 case tok::r_brace: 345 if (HasOpeningBrace) 346 return; 347 nextToken(); 348 addUnwrappedLine(); 349 break; 350 case tok::kw_default: { 351 unsigned StoredPosition = Tokens->getPosition(); 352 FormatToken *Next; 353 do { 354 Next = Tokens->getNextToken(); 355 } while (Next && Next->is(tok::comment)); 356 FormatTok = Tokens->setPosition(StoredPosition); 357 if (Next && Next->isNot(tok::colon)) { 358 // default not followed by ':' is not a case label; treat it like 359 // an identifier. 360 parseStructuralElement(); 361 break; 362 } 363 // Else, if it is 'default:', fall through to the case handling. 364 LLVM_FALLTHROUGH; 365 } 366 case tok::kw_case: 367 if (Style.Language == FormatStyle::LK_JavaScript && 368 Line->MustBeDeclaration) { 369 // A 'case: string' style field declaration. 370 parseStructuralElement(); 371 break; 372 } 373 if (!SwitchLabelEncountered && 374 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 375 ++Line->Level; 376 SwitchLabelEncountered = true; 377 parseStructuralElement(); 378 break; 379 default: 380 parseStructuralElement(); 381 break; 382 } 383 } while (!eof()); 384 } 385 386 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 387 // We'll parse forward through the tokens until we hit 388 // a closing brace or eof - note that getNextToken() will 389 // parse macros, so this will magically work inside macro 390 // definitions, too. 391 unsigned StoredPosition = Tokens->getPosition(); 392 FormatToken *Tok = FormatTok; 393 const FormatToken *PrevTok = Tok->Previous; 394 // Keep a stack of positions of lbrace tokens. We will 395 // update information about whether an lbrace starts a 396 // braced init list or a different block during the loop. 397 SmallVector<FormatToken *, 8> LBraceStack; 398 assert(Tok->Tok.is(tok::l_brace)); 399 do { 400 // Get next non-comment token. 401 FormatToken *NextTok; 402 unsigned ReadTokens = 0; 403 do { 404 NextTok = Tokens->getNextToken(); 405 ++ReadTokens; 406 } while (NextTok->is(tok::comment)); 407 408 switch (Tok->Tok.getKind()) { 409 case tok::l_brace: 410 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 411 if (PrevTok->isOneOf(tok::colon, tok::less)) 412 // A ':' indicates this code is in a type, or a braced list 413 // following a label in an object literal ({a: {b: 1}}). 414 // A '<' could be an object used in a comparison, but that is nonsense 415 // code (can never return true), so more likely it is a generic type 416 // argument (`X<{a: string; b: number}>`). 417 // The code below could be confused by semicolons between the 418 // individual members in a type member list, which would normally 419 // trigger BK_Block. In both cases, this must be parsed as an inline 420 // braced init. 421 Tok->BlockKind = BK_BracedInit; 422 else if (PrevTok->is(tok::r_paren)) 423 // `) { }` can only occur in function or method declarations in JS. 424 Tok->BlockKind = BK_Block; 425 } else { 426 Tok->BlockKind = BK_Unknown; 427 } 428 LBraceStack.push_back(Tok); 429 break; 430 case tok::r_brace: 431 if (LBraceStack.empty()) 432 break; 433 if (LBraceStack.back()->BlockKind == BK_Unknown) { 434 bool ProbablyBracedList = false; 435 if (Style.Language == FormatStyle::LK_Proto) { 436 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 437 } else { 438 // Using OriginalColumn to distinguish between ObjC methods and 439 // binary operators is a bit hacky. 440 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 441 NextTok->OriginalColumn == 0; 442 443 // If there is a comma, semicolon or right paren after the closing 444 // brace, we assume this is a braced initializer list. Note that 445 // regardless how we mark inner braces here, we will overwrite the 446 // BlockKind later if we parse a braced list (where all blocks 447 // inside are by default braced lists), or when we explicitly detect 448 // blocks (for example while parsing lambdas). 449 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 450 // braced list in JS. 451 ProbablyBracedList = 452 (Style.Language == FormatStyle::LK_JavaScript && 453 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 454 Keywords.kw_as)) || 455 (Style.isCpp() && NextTok->is(tok::l_paren)) || 456 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 457 tok::r_paren, tok::r_square, tok::l_brace, 458 tok::ellipsis) || 459 (NextTok->is(tok::identifier) && 460 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 461 (NextTok->is(tok::semi) && 462 (!ExpectClassBody || LBraceStack.size() != 1)) || 463 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 464 if (NextTok->is(tok::l_square)) { 465 // We can have an array subscript after a braced init 466 // list, but C++11 attributes are expected after blocks. 467 NextTok = Tokens->getNextToken(); 468 ++ReadTokens; 469 ProbablyBracedList = NextTok->isNot(tok::l_square); 470 } 471 } 472 if (ProbablyBracedList) { 473 Tok->BlockKind = BK_BracedInit; 474 LBraceStack.back()->BlockKind = BK_BracedInit; 475 } else { 476 Tok->BlockKind = BK_Block; 477 LBraceStack.back()->BlockKind = BK_Block; 478 } 479 } 480 LBraceStack.pop_back(); 481 break; 482 case tok::identifier: 483 if (!Tok->is(TT_StatementMacro)) 484 break; 485 LLVM_FALLTHROUGH; 486 case tok::at: 487 case tok::semi: 488 case tok::kw_if: 489 case tok::kw_while: 490 case tok::kw_for: 491 case tok::kw_switch: 492 case tok::kw_try: 493 case tok::kw___try: 494 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 495 LBraceStack.back()->BlockKind = BK_Block; 496 break; 497 default: 498 break; 499 } 500 PrevTok = Tok; 501 Tok = NextTok; 502 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 503 504 // Assume other blocks for all unclosed opening braces. 505 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 506 if (LBraceStack[i]->BlockKind == BK_Unknown) 507 LBraceStack[i]->BlockKind = BK_Block; 508 } 509 510 FormatTok = Tokens->setPosition(StoredPosition); 511 } 512 513 template <class T> 514 static inline void hash_combine(std::size_t &seed, const T &v) { 515 std::hash<T> hasher; 516 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 517 } 518 519 size_t UnwrappedLineParser::computePPHash() const { 520 size_t h = 0; 521 for (const auto &i : PPStack) { 522 hash_combine(h, size_t(i.Kind)); 523 hash_combine(h, i.Line); 524 } 525 return h; 526 } 527 528 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 529 bool MunchSemi) { 530 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 531 "'{' or macro block token expected"); 532 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 533 FormatTok->BlockKind = BK_Block; 534 535 size_t PPStartHash = computePPHash(); 536 537 unsigned InitialLevel = Line->Level; 538 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 539 540 if (MacroBlock && FormatTok->is(tok::l_paren)) 541 parseParens(); 542 543 size_t NbPreprocessorDirectives = 544 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 545 addUnwrappedLine(); 546 size_t OpeningLineIndex = 547 CurrentLines->empty() 548 ? (UnwrappedLine::kInvalidIndex) 549 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 550 551 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 552 MustBeDeclaration); 553 if (AddLevel) 554 ++Line->Level; 555 parseLevel(/*HasOpeningBrace=*/true); 556 557 if (eof()) 558 return; 559 560 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 561 : !FormatTok->is(tok::r_brace)) { 562 Line->Level = InitialLevel; 563 FormatTok->BlockKind = BK_Block; 564 return; 565 } 566 567 size_t PPEndHash = computePPHash(); 568 569 // Munch the closing brace. 570 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 571 572 if (MacroBlock && FormatTok->is(tok::l_paren)) 573 parseParens(); 574 575 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 576 nextToken(); 577 Line->Level = InitialLevel; 578 579 if (PPStartHash == PPEndHash) { 580 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 581 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 582 // Update the opening line to add the forward reference as well 583 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 584 CurrentLines->size() - 1; 585 } 586 } 587 } 588 589 static bool isGoogScope(const UnwrappedLine &Line) { 590 // FIXME: Closure-library specific stuff should not be hard-coded but be 591 // configurable. 592 if (Line.Tokens.size() < 4) 593 return false; 594 auto I = Line.Tokens.begin(); 595 if (I->Tok->TokenText != "goog") 596 return false; 597 ++I; 598 if (I->Tok->isNot(tok::period)) 599 return false; 600 ++I; 601 if (I->Tok->TokenText != "scope") 602 return false; 603 ++I; 604 return I->Tok->is(tok::l_paren); 605 } 606 607 static bool isIIFE(const UnwrappedLine &Line, 608 const AdditionalKeywords &Keywords) { 609 // Look for the start of an immediately invoked anonymous function. 610 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 611 // This is commonly done in JavaScript to create a new, anonymous scope. 612 // Example: (function() { ... })() 613 if (Line.Tokens.size() < 3) 614 return false; 615 auto I = Line.Tokens.begin(); 616 if (I->Tok->isNot(tok::l_paren)) 617 return false; 618 ++I; 619 if (I->Tok->isNot(Keywords.kw_function)) 620 return false; 621 ++I; 622 return I->Tok->is(tok::l_paren); 623 } 624 625 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 626 const FormatToken &InitialToken) { 627 if (InitialToken.is(tok::kw_namespace)) 628 return Style.BraceWrapping.AfterNamespace; 629 if (InitialToken.is(tok::kw_class)) 630 return Style.BraceWrapping.AfterClass; 631 if (InitialToken.is(tok::kw_union)) 632 return Style.BraceWrapping.AfterUnion; 633 if (InitialToken.is(tok::kw_struct)) 634 return Style.BraceWrapping.AfterStruct; 635 return false; 636 } 637 638 void UnwrappedLineParser::parseChildBlock() { 639 FormatTok->BlockKind = BK_Block; 640 nextToken(); 641 { 642 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 643 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 644 ScopedLineState LineState(*this); 645 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 646 /*MustBeDeclaration=*/false); 647 Line->Level += SkipIndent ? 0 : 1; 648 parseLevel(/*HasOpeningBrace=*/true); 649 flushComments(isOnNewLine(*FormatTok)); 650 Line->Level -= SkipIndent ? 0 : 1; 651 } 652 nextToken(); 653 } 654 655 void UnwrappedLineParser::parsePPDirective() { 656 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 657 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 658 nextToken(); 659 660 if (!FormatTok->Tok.getIdentifierInfo()) { 661 parsePPUnknown(); 662 return; 663 } 664 665 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 666 case tok::pp_define: 667 parsePPDefine(); 668 return; 669 case tok::pp_if: 670 parsePPIf(/*IfDef=*/false); 671 break; 672 case tok::pp_ifdef: 673 case tok::pp_ifndef: 674 parsePPIf(/*IfDef=*/true); 675 break; 676 case tok::pp_else: 677 parsePPElse(); 678 break; 679 case tok::pp_elif: 680 parsePPElIf(); 681 break; 682 case tok::pp_endif: 683 parsePPEndIf(); 684 break; 685 default: 686 parsePPUnknown(); 687 break; 688 } 689 } 690 691 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 692 size_t Line = CurrentLines->size(); 693 if (CurrentLines == &PreprocessorDirectives) 694 Line += Lines.size(); 695 696 if (Unreachable || 697 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 698 PPStack.push_back({PP_Unreachable, Line}); 699 else 700 PPStack.push_back({PP_Conditional, Line}); 701 } 702 703 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 704 ++PPBranchLevel; 705 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 706 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 707 PPLevelBranchIndex.push_back(0); 708 PPLevelBranchCount.push_back(0); 709 } 710 PPChainBranchIndex.push(0); 711 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 712 conditionalCompilationCondition(Unreachable || Skip); 713 } 714 715 void UnwrappedLineParser::conditionalCompilationAlternative() { 716 if (!PPStack.empty()) 717 PPStack.pop_back(); 718 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 719 if (!PPChainBranchIndex.empty()) 720 ++PPChainBranchIndex.top(); 721 conditionalCompilationCondition( 722 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 723 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 724 } 725 726 void UnwrappedLineParser::conditionalCompilationEnd() { 727 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 728 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 729 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 730 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 731 } 732 } 733 // Guard against #endif's without #if. 734 if (PPBranchLevel > -1) 735 --PPBranchLevel; 736 if (!PPChainBranchIndex.empty()) 737 PPChainBranchIndex.pop(); 738 if (!PPStack.empty()) 739 PPStack.pop_back(); 740 } 741 742 void UnwrappedLineParser::parsePPIf(bool IfDef) { 743 bool IfNDef = FormatTok->is(tok::pp_ifndef); 744 nextToken(); 745 bool Unreachable = false; 746 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 747 Unreachable = true; 748 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 749 Unreachable = true; 750 conditionalCompilationStart(Unreachable); 751 FormatToken *IfCondition = FormatTok; 752 // If there's a #ifndef on the first line, and the only lines before it are 753 // comments, it could be an include guard. 754 bool MaybeIncludeGuard = IfNDef; 755 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 756 for (auto &Line : Lines) { 757 if (!Line.Tokens.front().Tok->is(tok::comment)) { 758 MaybeIncludeGuard = false; 759 IncludeGuard = IG_Rejected; 760 break; 761 } 762 } 763 --PPBranchLevel; 764 parsePPUnknown(); 765 ++PPBranchLevel; 766 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 767 IncludeGuard = IG_IfNdefed; 768 IncludeGuardToken = IfCondition; 769 } 770 } 771 772 void UnwrappedLineParser::parsePPElse() { 773 // If a potential include guard has an #else, it's not an include guard. 774 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 775 IncludeGuard = IG_Rejected; 776 conditionalCompilationAlternative(); 777 if (PPBranchLevel > -1) 778 --PPBranchLevel; 779 parsePPUnknown(); 780 ++PPBranchLevel; 781 } 782 783 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 784 785 void UnwrappedLineParser::parsePPEndIf() { 786 conditionalCompilationEnd(); 787 parsePPUnknown(); 788 // If the #endif of a potential include guard is the last thing in the file, 789 // then we found an include guard. 790 unsigned TokenPosition = Tokens->getPosition(); 791 FormatToken *PeekNext = AllTokens[TokenPosition]; 792 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 793 PeekNext->is(tok::eof) && 794 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 795 IncludeGuard = IG_Found; 796 } 797 798 void UnwrappedLineParser::parsePPDefine() { 799 nextToken(); 800 801 if (FormatTok->Tok.getKind() != tok::identifier) { 802 IncludeGuard = IG_Rejected; 803 IncludeGuardToken = nullptr; 804 parsePPUnknown(); 805 return; 806 } 807 808 if (IncludeGuard == IG_IfNdefed && 809 IncludeGuardToken->TokenText == FormatTok->TokenText) { 810 IncludeGuard = IG_Defined; 811 IncludeGuardToken = nullptr; 812 for (auto &Line : Lines) { 813 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 814 IncludeGuard = IG_Rejected; 815 break; 816 } 817 } 818 } 819 820 nextToken(); 821 if (FormatTok->Tok.getKind() == tok::l_paren && 822 FormatTok->WhitespaceRange.getBegin() == 823 FormatTok->WhitespaceRange.getEnd()) { 824 parseParens(); 825 } 826 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 827 Line->Level += PPBranchLevel + 1; 828 addUnwrappedLine(); 829 ++Line->Level; 830 831 // Errors during a preprocessor directive can only affect the layout of the 832 // preprocessor directive, and thus we ignore them. An alternative approach 833 // would be to use the same approach we use on the file level (no 834 // re-indentation if there was a structural error) within the macro 835 // definition. 836 parseFile(); 837 } 838 839 void UnwrappedLineParser::parsePPUnknown() { 840 do { 841 nextToken(); 842 } while (!eof()); 843 if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) 844 Line->Level += PPBranchLevel + 1; 845 addUnwrappedLine(); 846 } 847 848 // Here we blacklist certain tokens that are not usually the first token in an 849 // unwrapped line. This is used in attempt to distinguish macro calls without 850 // trailing semicolons from other constructs split to several lines. 851 static bool tokenCanStartNewLine(const clang::Token &Tok) { 852 // Semicolon can be a null-statement, l_square can be a start of a macro or 853 // a C++11 attribute, but this doesn't seem to be common. 854 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 855 Tok.isNot(tok::l_square) && 856 // Tokens that can only be used as binary operators and a part of 857 // overloaded operator names. 858 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 859 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 860 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 861 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 862 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 863 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 864 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 865 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 866 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 867 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 868 Tok.isNot(tok::lesslessequal) && 869 // Colon is used in labels, base class lists, initializer lists, 870 // range-based for loops, ternary operator, but should never be the 871 // first token in an unwrapped line. 872 Tok.isNot(tok::colon) && 873 // 'noexcept' is a trailing annotation. 874 Tok.isNot(tok::kw_noexcept); 875 } 876 877 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 878 const FormatToken *FormatTok) { 879 // FIXME: This returns true for C/C++ keywords like 'struct'. 880 return FormatTok->is(tok::identifier) && 881 (FormatTok->Tok.getIdentifierInfo() == nullptr || 882 !FormatTok->isOneOf( 883 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 884 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 885 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 886 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 887 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 888 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 889 Keywords.kw_from)); 890 } 891 892 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 893 const FormatToken *FormatTok) { 894 return FormatTok->Tok.isLiteral() || 895 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 896 mustBeJSIdent(Keywords, FormatTok); 897 } 898 899 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 900 // when encountered after a value (see mustBeJSIdentOrValue). 901 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 902 const FormatToken *FormatTok) { 903 return FormatTok->isOneOf( 904 tok::kw_return, Keywords.kw_yield, 905 // conditionals 906 tok::kw_if, tok::kw_else, 907 // loops 908 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 909 // switch/case 910 tok::kw_switch, tok::kw_case, 911 // exceptions 912 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 913 // declaration 914 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 915 Keywords.kw_async, Keywords.kw_function, 916 // import/export 917 Keywords.kw_import, tok::kw_export); 918 } 919 920 // readTokenWithJavaScriptASI reads the next token and terminates the current 921 // line if JavaScript Automatic Semicolon Insertion must 922 // happen between the current token and the next token. 923 // 924 // This method is conservative - it cannot cover all edge cases of JavaScript, 925 // but only aims to correctly handle certain well known cases. It *must not* 926 // return true in speculative cases. 927 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 928 FormatToken *Previous = FormatTok; 929 readToken(); 930 FormatToken *Next = FormatTok; 931 932 bool IsOnSameLine = 933 CommentsBeforeNextToken.empty() 934 ? Next->NewlinesBefore == 0 935 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 936 if (IsOnSameLine) 937 return; 938 939 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 940 bool PreviousStartsTemplateExpr = 941 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 942 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 943 // If the line contains an '@' sign, the previous token might be an 944 // annotation, which can precede another identifier/value. 945 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 946 [](UnwrappedLineNode &LineNode) { 947 return LineNode.Tok->is(tok::at); 948 }) != Line->Tokens.end(); 949 if (HasAt) 950 return; 951 } 952 if (Next->is(tok::exclaim) && PreviousMustBeValue) 953 return addUnwrappedLine(); 954 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 955 bool NextEndsTemplateExpr = 956 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 957 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 958 (PreviousMustBeValue || 959 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 960 tok::minusminus))) 961 return addUnwrappedLine(); 962 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 963 isJSDeclOrStmt(Keywords, Next)) 964 return addUnwrappedLine(); 965 } 966 967 void UnwrappedLineParser::parseStructuralElement() { 968 assert(!FormatTok->is(tok::l_brace)); 969 if (Style.Language == FormatStyle::LK_TableGen && 970 FormatTok->is(tok::pp_include)) { 971 nextToken(); 972 if (FormatTok->is(tok::string_literal)) 973 nextToken(); 974 addUnwrappedLine(); 975 return; 976 } 977 switch (FormatTok->Tok.getKind()) { 978 case tok::kw_asm: 979 nextToken(); 980 if (FormatTok->is(tok::l_brace)) { 981 FormatTok->Type = TT_InlineASMBrace; 982 nextToken(); 983 while (FormatTok && FormatTok->isNot(tok::eof)) { 984 if (FormatTok->is(tok::r_brace)) { 985 FormatTok->Type = TT_InlineASMBrace; 986 nextToken(); 987 addUnwrappedLine(); 988 break; 989 } 990 FormatTok->Finalized = true; 991 nextToken(); 992 } 993 } 994 break; 995 case tok::kw_namespace: 996 parseNamespace(); 997 return; 998 case tok::kw_public: 999 case tok::kw_protected: 1000 case tok::kw_private: 1001 if (Style.Language == FormatStyle::LK_Java || 1002 Style.Language == FormatStyle::LK_JavaScript) 1003 nextToken(); 1004 else 1005 parseAccessSpecifier(); 1006 return; 1007 case tok::kw_if: 1008 parseIfThenElse(); 1009 return; 1010 case tok::kw_for: 1011 case tok::kw_while: 1012 parseForOrWhileLoop(); 1013 return; 1014 case tok::kw_do: 1015 parseDoWhile(); 1016 return; 1017 case tok::kw_switch: 1018 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1019 // 'switch: string' field declaration. 1020 break; 1021 parseSwitch(); 1022 return; 1023 case tok::kw_default: 1024 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1025 // 'default: string' field declaration. 1026 break; 1027 nextToken(); 1028 if (FormatTok->is(tok::colon)) { 1029 parseLabel(); 1030 return; 1031 } 1032 // e.g. "default void f() {}" in a Java interface. 1033 break; 1034 case tok::kw_case: 1035 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1036 // 'case: string' field declaration. 1037 break; 1038 parseCaseLabel(); 1039 return; 1040 case tok::kw_try: 1041 case tok::kw___try: 1042 parseTryCatch(); 1043 return; 1044 case tok::kw_extern: 1045 nextToken(); 1046 if (FormatTok->Tok.is(tok::string_literal)) { 1047 nextToken(); 1048 if (FormatTok->Tok.is(tok::l_brace)) { 1049 if (Style.BraceWrapping.AfterExternBlock) { 1050 addUnwrappedLine(); 1051 parseBlock(/*MustBeDeclaration=*/true); 1052 } else { 1053 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1054 } 1055 addUnwrappedLine(); 1056 return; 1057 } 1058 } 1059 break; 1060 case tok::kw_export: 1061 if (Style.Language == FormatStyle::LK_JavaScript) { 1062 parseJavaScriptEs6ImportExport(); 1063 return; 1064 } 1065 if (!Style.isCpp()) 1066 break; 1067 // Handle C++ "(inline|export) namespace". 1068 LLVM_FALLTHROUGH; 1069 case tok::kw_inline: 1070 nextToken(); 1071 if (FormatTok->Tok.is(tok::kw_namespace)) { 1072 parseNamespace(); 1073 return; 1074 } 1075 break; 1076 case tok::identifier: 1077 if (FormatTok->is(TT_ForEachMacro)) { 1078 parseForOrWhileLoop(); 1079 return; 1080 } 1081 if (FormatTok->is(TT_MacroBlockBegin)) { 1082 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1083 /*MunchSemi=*/false); 1084 return; 1085 } 1086 if (FormatTok->is(Keywords.kw_import)) { 1087 if (Style.Language == FormatStyle::LK_JavaScript) { 1088 parseJavaScriptEs6ImportExport(); 1089 return; 1090 } 1091 if (Style.Language == FormatStyle::LK_Proto) { 1092 nextToken(); 1093 if (FormatTok->is(tok::kw_public)) 1094 nextToken(); 1095 if (!FormatTok->is(tok::string_literal)) 1096 return; 1097 nextToken(); 1098 if (FormatTok->is(tok::semi)) 1099 nextToken(); 1100 addUnwrappedLine(); 1101 return; 1102 } 1103 } 1104 if (Style.isCpp() && 1105 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1106 Keywords.kw_slots, Keywords.kw_qslots)) { 1107 nextToken(); 1108 if (FormatTok->is(tok::colon)) { 1109 nextToken(); 1110 addUnwrappedLine(); 1111 return; 1112 } 1113 } 1114 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1115 parseStatementMacro(); 1116 return; 1117 } 1118 // In all other cases, parse the declaration. 1119 break; 1120 default: 1121 break; 1122 } 1123 do { 1124 const FormatToken *Previous = FormatTok->Previous; 1125 switch (FormatTok->Tok.getKind()) { 1126 case tok::at: 1127 nextToken(); 1128 if (FormatTok->Tok.is(tok::l_brace)) { 1129 nextToken(); 1130 parseBracedList(); 1131 break; 1132 } else if (Style.Language == FormatStyle::LK_Java && 1133 FormatTok->is(Keywords.kw_interface)) { 1134 nextToken(); 1135 break; 1136 } 1137 switch (FormatTok->Tok.getObjCKeywordID()) { 1138 case tok::objc_public: 1139 case tok::objc_protected: 1140 case tok::objc_package: 1141 case tok::objc_private: 1142 return parseAccessSpecifier(); 1143 case tok::objc_interface: 1144 case tok::objc_implementation: 1145 return parseObjCInterfaceOrImplementation(); 1146 case tok::objc_protocol: 1147 if (parseObjCProtocol()) 1148 return; 1149 break; 1150 case tok::objc_end: 1151 return; // Handled by the caller. 1152 case tok::objc_optional: 1153 case tok::objc_required: 1154 nextToken(); 1155 addUnwrappedLine(); 1156 return; 1157 case tok::objc_autoreleasepool: 1158 nextToken(); 1159 if (FormatTok->Tok.is(tok::l_brace)) { 1160 if (Style.BraceWrapping.AfterControlStatement) 1161 addUnwrappedLine(); 1162 parseBlock(/*MustBeDeclaration=*/false); 1163 } 1164 addUnwrappedLine(); 1165 return; 1166 case tok::objc_synchronized: 1167 nextToken(); 1168 if (FormatTok->Tok.is(tok::l_paren)) 1169 // Skip synchronization object 1170 parseParens(); 1171 if (FormatTok->Tok.is(tok::l_brace)) { 1172 if (Style.BraceWrapping.AfterControlStatement) 1173 addUnwrappedLine(); 1174 parseBlock(/*MustBeDeclaration=*/false); 1175 } 1176 addUnwrappedLine(); 1177 return; 1178 case tok::objc_try: 1179 // This branch isn't strictly necessary (the kw_try case below would 1180 // do this too after the tok::at is parsed above). But be explicit. 1181 parseTryCatch(); 1182 return; 1183 default: 1184 break; 1185 } 1186 break; 1187 case tok::kw_enum: 1188 // Ignore if this is part of "template <enum ...". 1189 if (Previous && Previous->is(tok::less)) { 1190 nextToken(); 1191 break; 1192 } 1193 1194 // parseEnum falls through and does not yet add an unwrapped line as an 1195 // enum definition can start a structural element. 1196 if (!parseEnum()) 1197 break; 1198 // This only applies for C++. 1199 if (!Style.isCpp()) { 1200 addUnwrappedLine(); 1201 return; 1202 } 1203 break; 1204 case tok::kw_typedef: 1205 nextToken(); 1206 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1207 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1208 parseEnum(); 1209 break; 1210 case tok::kw_struct: 1211 case tok::kw_union: 1212 case tok::kw_class: 1213 // parseRecord falls through and does not yet add an unwrapped line as a 1214 // record declaration or definition can start a structural element. 1215 parseRecord(); 1216 // This does not apply for Java and JavaScript. 1217 if (Style.Language == FormatStyle::LK_Java || 1218 Style.Language == FormatStyle::LK_JavaScript) { 1219 if (FormatTok->is(tok::semi)) 1220 nextToken(); 1221 addUnwrappedLine(); 1222 return; 1223 } 1224 break; 1225 case tok::period: 1226 nextToken(); 1227 // In Java, classes have an implicit static member "class". 1228 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1229 FormatTok->is(tok::kw_class)) 1230 nextToken(); 1231 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1232 FormatTok->Tok.getIdentifierInfo()) 1233 // JavaScript only has pseudo keywords, all keywords are allowed to 1234 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1235 nextToken(); 1236 break; 1237 case tok::semi: 1238 nextToken(); 1239 addUnwrappedLine(); 1240 return; 1241 case tok::r_brace: 1242 addUnwrappedLine(); 1243 return; 1244 case tok::l_paren: 1245 parseParens(); 1246 break; 1247 case tok::kw_operator: 1248 nextToken(); 1249 if (FormatTok->isBinaryOperator()) 1250 nextToken(); 1251 break; 1252 case tok::caret: 1253 nextToken(); 1254 if (FormatTok->Tok.isAnyIdentifier() || 1255 FormatTok->isSimpleTypeSpecifier()) 1256 nextToken(); 1257 if (FormatTok->is(tok::l_paren)) 1258 parseParens(); 1259 if (FormatTok->is(tok::l_brace)) 1260 parseChildBlock(); 1261 break; 1262 case tok::l_brace: 1263 if (!tryToParseBracedList()) { 1264 // A block outside of parentheses must be the last part of a 1265 // structural element. 1266 // FIXME: Figure out cases where this is not true, and add projections 1267 // for them (the one we know is missing are lambdas). 1268 if (Style.BraceWrapping.AfterFunction) 1269 addUnwrappedLine(); 1270 FormatTok->Type = TT_FunctionLBrace; 1271 parseBlock(/*MustBeDeclaration=*/false); 1272 addUnwrappedLine(); 1273 return; 1274 } 1275 // Otherwise this was a braced init list, and the structural 1276 // element continues. 1277 break; 1278 case tok::kw_try: 1279 // We arrive here when parsing function-try blocks. 1280 if (Style.BraceWrapping.AfterFunction) 1281 addUnwrappedLine(); 1282 parseTryCatch(); 1283 return; 1284 case tok::identifier: { 1285 if (FormatTok->is(TT_MacroBlockEnd)) { 1286 addUnwrappedLine(); 1287 return; 1288 } 1289 1290 // Function declarations (as opposed to function expressions) are parsed 1291 // on their own unwrapped line by continuing this loop. Function 1292 // expressions (functions that are not on their own line) must not create 1293 // a new unwrapped line, so they are special cased below. 1294 size_t TokenCount = Line->Tokens.size(); 1295 if (Style.Language == FormatStyle::LK_JavaScript && 1296 FormatTok->is(Keywords.kw_function) && 1297 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1298 Keywords.kw_async)))) { 1299 tryToParseJSFunction(); 1300 break; 1301 } 1302 if ((Style.Language == FormatStyle::LK_JavaScript || 1303 Style.Language == FormatStyle::LK_Java) && 1304 FormatTok->is(Keywords.kw_interface)) { 1305 if (Style.Language == FormatStyle::LK_JavaScript) { 1306 // In JavaScript/TypeScript, "interface" can be used as a standalone 1307 // identifier, e.g. in `var interface = 1;`. If "interface" is 1308 // followed by another identifier, it is very like to be an actual 1309 // interface declaration. 1310 unsigned StoredPosition = Tokens->getPosition(); 1311 FormatToken *Next = Tokens->getNextToken(); 1312 FormatTok = Tokens->setPosition(StoredPosition); 1313 if (Next && !mustBeJSIdent(Keywords, Next)) { 1314 nextToken(); 1315 break; 1316 } 1317 } 1318 parseRecord(); 1319 addUnwrappedLine(); 1320 return; 1321 } 1322 1323 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1324 parseStatementMacro(); 1325 return; 1326 } 1327 1328 // See if the following token should start a new unwrapped line. 1329 StringRef Text = FormatTok->TokenText; 1330 nextToken(); 1331 if (Line->Tokens.size() == 1 && 1332 // JS doesn't have macros, and within classes colons indicate fields, 1333 // not labels. 1334 Style.Language != FormatStyle::LK_JavaScript) { 1335 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1336 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1337 parseLabel(); 1338 return; 1339 } 1340 // Recognize function-like macro usages without trailing semicolon as 1341 // well as free-standing macros like Q_OBJECT. 1342 bool FunctionLike = FormatTok->is(tok::l_paren); 1343 if (FunctionLike) 1344 parseParens(); 1345 1346 bool FollowedByNewline = 1347 CommentsBeforeNextToken.empty() 1348 ? FormatTok->NewlinesBefore > 0 1349 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1350 1351 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1352 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1353 addUnwrappedLine(); 1354 return; 1355 } 1356 } 1357 break; 1358 } 1359 case tok::equal: 1360 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1361 // TT_JsFatArrow. The always start an expression or a child block if 1362 // followed by a curly. 1363 if (FormatTok->is(TT_JsFatArrow)) { 1364 nextToken(); 1365 if (FormatTok->is(tok::l_brace)) 1366 parseChildBlock(); 1367 break; 1368 } 1369 1370 nextToken(); 1371 if (FormatTok->Tok.is(tok::l_brace)) { 1372 nextToken(); 1373 parseBracedList(); 1374 } else if (Style.Language == FormatStyle::LK_Proto && 1375 FormatTok->Tok.is(tok::less)) { 1376 nextToken(); 1377 parseBracedList(/*ContinueOnSemicolons=*/false, 1378 /*ClosingBraceKind=*/tok::greater); 1379 } 1380 break; 1381 case tok::l_square: 1382 parseSquare(); 1383 break; 1384 case tok::kw_new: 1385 parseNew(); 1386 break; 1387 default: 1388 nextToken(); 1389 break; 1390 } 1391 } while (!eof()); 1392 } 1393 1394 bool UnwrappedLineParser::tryToParseLambda() { 1395 if (!Style.isCpp()) { 1396 nextToken(); 1397 return false; 1398 } 1399 assert(FormatTok->is(tok::l_square)); 1400 FormatToken &LSquare = *FormatTok; 1401 if (!tryToParseLambdaIntroducer()) 1402 return false; 1403 1404 bool SeenArrow = false; 1405 1406 while (FormatTok->isNot(tok::l_brace)) { 1407 if (FormatTok->isSimpleTypeSpecifier()) { 1408 nextToken(); 1409 continue; 1410 } 1411 switch (FormatTok->Tok.getKind()) { 1412 case tok::l_brace: 1413 break; 1414 case tok::l_paren: 1415 parseParens(); 1416 break; 1417 case tok::amp: 1418 case tok::star: 1419 case tok::kw_const: 1420 case tok::comma: 1421 case tok::less: 1422 case tok::greater: 1423 case tok::identifier: 1424 case tok::numeric_constant: 1425 case tok::coloncolon: 1426 case tok::kw_mutable: 1427 case tok::kw_noexcept: 1428 nextToken(); 1429 break; 1430 // Specialization of a template with an integer parameter can contain 1431 // arithmetic, logical, comparison and ternary operators. 1432 // 1433 // FIXME: This also accepts sequences of operators that are not in the scope 1434 // of a template argument list. 1435 // 1436 // In a C++ lambda a template type can only occur after an arrow. We use 1437 // this as an heuristic to distinguish between Objective-C expressions 1438 // followed by an `a->b` expression, such as: 1439 // ([obj func:arg] + a->b) 1440 // Otherwise the code below would parse as a lambda. 1441 case tok::plus: 1442 case tok::minus: 1443 case tok::exclaim: 1444 case tok::tilde: 1445 case tok::slash: 1446 case tok::percent: 1447 case tok::lessless: 1448 case tok::pipe: 1449 case tok::pipepipe: 1450 case tok::ampamp: 1451 case tok::caret: 1452 case tok::equalequal: 1453 case tok::exclaimequal: 1454 case tok::greaterequal: 1455 case tok::lessequal: 1456 case tok::question: 1457 case tok::colon: 1458 case tok::kw_true: 1459 case tok::kw_false: 1460 if (SeenArrow) { 1461 nextToken(); 1462 break; 1463 } 1464 return true; 1465 case tok::arrow: 1466 // This might or might not actually be a lambda arrow (this could be an 1467 // ObjC method invocation followed by a dereferencing arrow). We might 1468 // reset this back to TT_Unknown in TokenAnnotator. 1469 FormatTok->Type = TT_LambdaArrow; 1470 SeenArrow = true; 1471 nextToken(); 1472 break; 1473 default: 1474 return true; 1475 } 1476 } 1477 LSquare.Type = TT_LambdaLSquare; 1478 parseChildBlock(); 1479 return true; 1480 } 1481 1482 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1483 const FormatToken *Previous = FormatTok->Previous; 1484 if (Previous && 1485 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1486 tok::kw_delete, tok::l_square) || 1487 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1488 Previous->isSimpleTypeSpecifier())) { 1489 nextToken(); 1490 return false; 1491 } 1492 nextToken(); 1493 if (FormatTok->is(tok::l_square)) { 1494 return false; 1495 } 1496 parseSquare(/*LambdaIntroducer=*/true); 1497 return true; 1498 } 1499 1500 void UnwrappedLineParser::tryToParseJSFunction() { 1501 assert(FormatTok->is(Keywords.kw_function) || 1502 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1503 if (FormatTok->is(Keywords.kw_async)) 1504 nextToken(); 1505 // Consume "function". 1506 nextToken(); 1507 1508 // Consume * (generator function). Treat it like C++'s overloaded operators. 1509 if (FormatTok->is(tok::star)) { 1510 FormatTok->Type = TT_OverloadedOperator; 1511 nextToken(); 1512 } 1513 1514 // Consume function name. 1515 if (FormatTok->is(tok::identifier)) 1516 nextToken(); 1517 1518 if (FormatTok->isNot(tok::l_paren)) 1519 return; 1520 1521 // Parse formal parameter list. 1522 parseParens(); 1523 1524 if (FormatTok->is(tok::colon)) { 1525 // Parse a type definition. 1526 nextToken(); 1527 1528 // Eat the type declaration. For braced inline object types, balance braces, 1529 // otherwise just parse until finding an l_brace for the function body. 1530 if (FormatTok->is(tok::l_brace)) 1531 tryToParseBracedList(); 1532 else 1533 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1534 nextToken(); 1535 } 1536 1537 if (FormatTok->is(tok::semi)) 1538 return; 1539 1540 parseChildBlock(); 1541 } 1542 1543 bool UnwrappedLineParser::tryToParseBracedList() { 1544 if (FormatTok->BlockKind == BK_Unknown) 1545 calculateBraceTypes(); 1546 assert(FormatTok->BlockKind != BK_Unknown); 1547 if (FormatTok->BlockKind == BK_Block) 1548 return false; 1549 nextToken(); 1550 parseBracedList(); 1551 return true; 1552 } 1553 1554 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1555 tok::TokenKind ClosingBraceKind) { 1556 bool HasError = false; 1557 1558 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1559 // replace this by using parseAssigmentExpression() inside. 1560 do { 1561 if (Style.Language == FormatStyle::LK_JavaScript) { 1562 if (FormatTok->is(Keywords.kw_function) || 1563 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1564 tryToParseJSFunction(); 1565 continue; 1566 } 1567 if (FormatTok->is(TT_JsFatArrow)) { 1568 nextToken(); 1569 // Fat arrows can be followed by simple expressions or by child blocks 1570 // in curly braces. 1571 if (FormatTok->is(tok::l_brace)) { 1572 parseChildBlock(); 1573 continue; 1574 } 1575 } 1576 if (FormatTok->is(tok::l_brace)) { 1577 // Could be a method inside of a braced list `{a() { return 1; }}`. 1578 if (tryToParseBracedList()) 1579 continue; 1580 parseChildBlock(); 1581 } 1582 } 1583 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1584 nextToken(); 1585 return !HasError; 1586 } 1587 switch (FormatTok->Tok.getKind()) { 1588 case tok::caret: 1589 nextToken(); 1590 if (FormatTok->is(tok::l_brace)) { 1591 parseChildBlock(); 1592 } 1593 break; 1594 case tok::l_square: 1595 tryToParseLambda(); 1596 break; 1597 case tok::l_paren: 1598 parseParens(); 1599 // JavaScript can just have free standing methods and getters/setters in 1600 // object literals. Detect them by a "{" following ")". 1601 if (Style.Language == FormatStyle::LK_JavaScript) { 1602 if (FormatTok->is(tok::l_brace)) 1603 parseChildBlock(); 1604 break; 1605 } 1606 break; 1607 case tok::l_brace: 1608 // Assume there are no blocks inside a braced init list apart 1609 // from the ones we explicitly parse out (like lambdas). 1610 FormatTok->BlockKind = BK_BracedInit; 1611 nextToken(); 1612 parseBracedList(); 1613 break; 1614 case tok::less: 1615 if (Style.Language == FormatStyle::LK_Proto) { 1616 nextToken(); 1617 parseBracedList(/*ContinueOnSemicolons=*/false, 1618 /*ClosingBraceKind=*/tok::greater); 1619 } else { 1620 nextToken(); 1621 } 1622 break; 1623 case tok::semi: 1624 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1625 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1626 // used for error recovery if we have otherwise determined that this is 1627 // a braced list. 1628 if (Style.Language == FormatStyle::LK_JavaScript) { 1629 nextToken(); 1630 break; 1631 } 1632 HasError = true; 1633 if (!ContinueOnSemicolons) 1634 return !HasError; 1635 nextToken(); 1636 break; 1637 case tok::comma: 1638 nextToken(); 1639 break; 1640 default: 1641 nextToken(); 1642 break; 1643 } 1644 } while (!eof()); 1645 return false; 1646 } 1647 1648 void UnwrappedLineParser::parseParens() { 1649 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1650 nextToken(); 1651 do { 1652 switch (FormatTok->Tok.getKind()) { 1653 case tok::l_paren: 1654 parseParens(); 1655 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1656 parseChildBlock(); 1657 break; 1658 case tok::r_paren: 1659 nextToken(); 1660 return; 1661 case tok::r_brace: 1662 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1663 return; 1664 case tok::l_square: 1665 tryToParseLambda(); 1666 break; 1667 case tok::l_brace: 1668 if (!tryToParseBracedList()) 1669 parseChildBlock(); 1670 break; 1671 case tok::at: 1672 nextToken(); 1673 if (FormatTok->Tok.is(tok::l_brace)) { 1674 nextToken(); 1675 parseBracedList(); 1676 } 1677 break; 1678 case tok::kw_class: 1679 if (Style.Language == FormatStyle::LK_JavaScript) 1680 parseRecord(/*ParseAsExpr=*/true); 1681 else 1682 nextToken(); 1683 break; 1684 case tok::identifier: 1685 if (Style.Language == FormatStyle::LK_JavaScript && 1686 (FormatTok->is(Keywords.kw_function) || 1687 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1688 tryToParseJSFunction(); 1689 else 1690 nextToken(); 1691 break; 1692 default: 1693 nextToken(); 1694 break; 1695 } 1696 } while (!eof()); 1697 } 1698 1699 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1700 if (!LambdaIntroducer) { 1701 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1702 if (tryToParseLambda()) 1703 return; 1704 } 1705 do { 1706 switch (FormatTok->Tok.getKind()) { 1707 case tok::l_paren: 1708 parseParens(); 1709 break; 1710 case tok::r_square: 1711 nextToken(); 1712 return; 1713 case tok::r_brace: 1714 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1715 return; 1716 case tok::l_square: 1717 parseSquare(); 1718 break; 1719 case tok::l_brace: { 1720 if (!tryToParseBracedList()) 1721 parseChildBlock(); 1722 break; 1723 } 1724 case tok::at: 1725 nextToken(); 1726 if (FormatTok->Tok.is(tok::l_brace)) { 1727 nextToken(); 1728 parseBracedList(); 1729 } 1730 break; 1731 default: 1732 nextToken(); 1733 break; 1734 } 1735 } while (!eof()); 1736 } 1737 1738 void UnwrappedLineParser::parseIfThenElse() { 1739 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1740 nextToken(); 1741 if (FormatTok->Tok.is(tok::kw_constexpr)) 1742 nextToken(); 1743 if (FormatTok->Tok.is(tok::l_paren)) 1744 parseParens(); 1745 bool NeedsUnwrappedLine = false; 1746 if (FormatTok->Tok.is(tok::l_brace)) { 1747 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1748 parseBlock(/*MustBeDeclaration=*/false); 1749 if (Style.BraceWrapping.BeforeElse) 1750 addUnwrappedLine(); 1751 else 1752 NeedsUnwrappedLine = true; 1753 } else { 1754 addUnwrappedLine(); 1755 ++Line->Level; 1756 parseStructuralElement(); 1757 --Line->Level; 1758 } 1759 if (FormatTok->Tok.is(tok::kw_else)) { 1760 nextToken(); 1761 if (FormatTok->Tok.is(tok::l_brace)) { 1762 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1763 parseBlock(/*MustBeDeclaration=*/false); 1764 addUnwrappedLine(); 1765 } else if (FormatTok->Tok.is(tok::kw_if)) { 1766 parseIfThenElse(); 1767 } else { 1768 addUnwrappedLine(); 1769 ++Line->Level; 1770 parseStructuralElement(); 1771 if (FormatTok->is(tok::eof)) 1772 addUnwrappedLine(); 1773 --Line->Level; 1774 } 1775 } else if (NeedsUnwrappedLine) { 1776 addUnwrappedLine(); 1777 } 1778 } 1779 1780 void UnwrappedLineParser::parseTryCatch() { 1781 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1782 nextToken(); 1783 bool NeedsUnwrappedLine = false; 1784 if (FormatTok->is(tok::colon)) { 1785 // We are in a function try block, what comes is an initializer list. 1786 nextToken(); 1787 while (FormatTok->is(tok::identifier)) { 1788 nextToken(); 1789 if (FormatTok->is(tok::l_paren)) 1790 parseParens(); 1791 if (FormatTok->is(tok::comma)) 1792 nextToken(); 1793 } 1794 } 1795 // Parse try with resource. 1796 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1797 parseParens(); 1798 } 1799 if (FormatTok->is(tok::l_brace)) { 1800 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1801 parseBlock(/*MustBeDeclaration=*/false); 1802 if (Style.BraceWrapping.BeforeCatch) { 1803 addUnwrappedLine(); 1804 } else { 1805 NeedsUnwrappedLine = true; 1806 } 1807 } else if (!FormatTok->is(tok::kw_catch)) { 1808 // The C++ standard requires a compound-statement after a try. 1809 // If there's none, we try to assume there's a structuralElement 1810 // and try to continue. 1811 addUnwrappedLine(); 1812 ++Line->Level; 1813 parseStructuralElement(); 1814 --Line->Level; 1815 } 1816 while (1) { 1817 if (FormatTok->is(tok::at)) 1818 nextToken(); 1819 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1820 tok::kw___finally) || 1821 ((Style.Language == FormatStyle::LK_Java || 1822 Style.Language == FormatStyle::LK_JavaScript) && 1823 FormatTok->is(Keywords.kw_finally)) || 1824 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1825 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1826 break; 1827 nextToken(); 1828 while (FormatTok->isNot(tok::l_brace)) { 1829 if (FormatTok->is(tok::l_paren)) { 1830 parseParens(); 1831 continue; 1832 } 1833 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1834 return; 1835 nextToken(); 1836 } 1837 NeedsUnwrappedLine = false; 1838 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1839 parseBlock(/*MustBeDeclaration=*/false); 1840 if (Style.BraceWrapping.BeforeCatch) 1841 addUnwrappedLine(); 1842 else 1843 NeedsUnwrappedLine = true; 1844 } 1845 if (NeedsUnwrappedLine) 1846 addUnwrappedLine(); 1847 } 1848 1849 void UnwrappedLineParser::parseNamespace() { 1850 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1851 1852 const FormatToken &InitialToken = *FormatTok; 1853 nextToken(); 1854 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1855 nextToken(); 1856 if (FormatTok->Tok.is(tok::l_brace)) { 1857 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1858 addUnwrappedLine(); 1859 1860 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1861 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1862 DeclarationScopeStack.size() > 1); 1863 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1864 // Munch the semicolon after a namespace. This is more common than one would 1865 // think. Puttin the semicolon into its own line is very ugly. 1866 if (FormatTok->Tok.is(tok::semi)) 1867 nextToken(); 1868 addUnwrappedLine(); 1869 } 1870 // FIXME: Add error handling. 1871 } 1872 1873 void UnwrappedLineParser::parseNew() { 1874 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1875 nextToken(); 1876 if (Style.Language != FormatStyle::LK_Java) 1877 return; 1878 1879 // In Java, we can parse everything up to the parens, which aren't optional. 1880 do { 1881 // There should not be a ;, { or } before the new's open paren. 1882 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1883 return; 1884 1885 // Consume the parens. 1886 if (FormatTok->is(tok::l_paren)) { 1887 parseParens(); 1888 1889 // If there is a class body of an anonymous class, consume that as child. 1890 if (FormatTok->is(tok::l_brace)) 1891 parseChildBlock(); 1892 return; 1893 } 1894 nextToken(); 1895 } while (!eof()); 1896 } 1897 1898 void UnwrappedLineParser::parseForOrWhileLoop() { 1899 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1900 "'for', 'while' or foreach macro expected"); 1901 nextToken(); 1902 // JS' for await ( ... 1903 if (Style.Language == FormatStyle::LK_JavaScript && 1904 FormatTok->is(Keywords.kw_await)) 1905 nextToken(); 1906 if (FormatTok->Tok.is(tok::l_paren)) 1907 parseParens(); 1908 if (FormatTok->Tok.is(tok::l_brace)) { 1909 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1910 parseBlock(/*MustBeDeclaration=*/false); 1911 addUnwrappedLine(); 1912 } else { 1913 addUnwrappedLine(); 1914 ++Line->Level; 1915 parseStructuralElement(); 1916 --Line->Level; 1917 } 1918 } 1919 1920 void UnwrappedLineParser::parseDoWhile() { 1921 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1922 nextToken(); 1923 if (FormatTok->Tok.is(tok::l_brace)) { 1924 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1925 parseBlock(/*MustBeDeclaration=*/false); 1926 if (Style.BraceWrapping.IndentBraces) 1927 addUnwrappedLine(); 1928 } else { 1929 addUnwrappedLine(); 1930 ++Line->Level; 1931 parseStructuralElement(); 1932 --Line->Level; 1933 } 1934 1935 // FIXME: Add error handling. 1936 if (!FormatTok->Tok.is(tok::kw_while)) { 1937 addUnwrappedLine(); 1938 return; 1939 } 1940 1941 nextToken(); 1942 parseStructuralElement(); 1943 } 1944 1945 void UnwrappedLineParser::parseLabel() { 1946 nextToken(); 1947 unsigned OldLineLevel = Line->Level; 1948 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1949 --Line->Level; 1950 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1951 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1952 parseBlock(/*MustBeDeclaration=*/false); 1953 if (FormatTok->Tok.is(tok::kw_break)) { 1954 if (Style.BraceWrapping.AfterControlStatement) 1955 addUnwrappedLine(); 1956 parseStructuralElement(); 1957 } 1958 addUnwrappedLine(); 1959 } else { 1960 if (FormatTok->is(tok::semi)) 1961 nextToken(); 1962 addUnwrappedLine(); 1963 } 1964 Line->Level = OldLineLevel; 1965 if (FormatTok->isNot(tok::l_brace)) { 1966 parseStructuralElement(); 1967 addUnwrappedLine(); 1968 } 1969 } 1970 1971 void UnwrappedLineParser::parseCaseLabel() { 1972 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1973 // FIXME: fix handling of complex expressions here. 1974 do { 1975 nextToken(); 1976 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1977 parseLabel(); 1978 } 1979 1980 void UnwrappedLineParser::parseSwitch() { 1981 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1982 nextToken(); 1983 if (FormatTok->Tok.is(tok::l_paren)) 1984 parseParens(); 1985 if (FormatTok->Tok.is(tok::l_brace)) { 1986 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1987 parseBlock(/*MustBeDeclaration=*/false); 1988 addUnwrappedLine(); 1989 } else { 1990 addUnwrappedLine(); 1991 ++Line->Level; 1992 parseStructuralElement(); 1993 --Line->Level; 1994 } 1995 } 1996 1997 void UnwrappedLineParser::parseAccessSpecifier() { 1998 nextToken(); 1999 // Understand Qt's slots. 2000 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2001 nextToken(); 2002 // Otherwise, we don't know what it is, and we'd better keep the next token. 2003 if (FormatTok->Tok.is(tok::colon)) 2004 nextToken(); 2005 addUnwrappedLine(); 2006 } 2007 2008 bool UnwrappedLineParser::parseEnum() { 2009 // Won't be 'enum' for NS_ENUMs. 2010 if (FormatTok->Tok.is(tok::kw_enum)) 2011 nextToken(); 2012 2013 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2014 // declarations. An "enum" keyword followed by a colon would be a syntax 2015 // error and thus assume it is just an identifier. 2016 if (Style.Language == FormatStyle::LK_JavaScript && 2017 FormatTok->isOneOf(tok::colon, tok::question)) 2018 return false; 2019 2020 // Eat up enum class ... 2021 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2022 nextToken(); 2023 2024 while (FormatTok->Tok.getIdentifierInfo() || 2025 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2026 tok::greater, tok::comma, tok::question)) { 2027 nextToken(); 2028 // We can have macros or attributes in between 'enum' and the enum name. 2029 if (FormatTok->is(tok::l_paren)) 2030 parseParens(); 2031 if (FormatTok->is(tok::identifier)) { 2032 nextToken(); 2033 // If there are two identifiers in a row, this is likely an elaborate 2034 // return type. In Java, this can be "implements", etc. 2035 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2036 return false; 2037 } 2038 } 2039 2040 // Just a declaration or something is wrong. 2041 if (FormatTok->isNot(tok::l_brace)) 2042 return true; 2043 FormatTok->BlockKind = BK_Block; 2044 2045 if (Style.Language == FormatStyle::LK_Java) { 2046 // Java enums are different. 2047 parseJavaEnumBody(); 2048 return true; 2049 } 2050 if (Style.Language == FormatStyle::LK_Proto) { 2051 parseBlock(/*MustBeDeclaration=*/true); 2052 return true; 2053 } 2054 2055 // Parse enum body. 2056 nextToken(); 2057 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2058 if (HasError) { 2059 if (FormatTok->is(tok::semi)) 2060 nextToken(); 2061 addUnwrappedLine(); 2062 } 2063 return true; 2064 2065 // There is no addUnwrappedLine() here so that we fall through to parsing a 2066 // structural element afterwards. Thus, in "enum A {} n, m;", 2067 // "} n, m;" will end up in one unwrapped line. 2068 } 2069 2070 void UnwrappedLineParser::parseJavaEnumBody() { 2071 // Determine whether the enum is simple, i.e. does not have a semicolon or 2072 // constants with class bodies. Simple enums can be formatted like braced 2073 // lists, contracted to a single line, etc. 2074 unsigned StoredPosition = Tokens->getPosition(); 2075 bool IsSimple = true; 2076 FormatToken *Tok = Tokens->getNextToken(); 2077 while (Tok) { 2078 if (Tok->is(tok::r_brace)) 2079 break; 2080 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2081 IsSimple = false; 2082 break; 2083 } 2084 // FIXME: This will also mark enums with braces in the arguments to enum 2085 // constants as "not simple". This is probably fine in practice, though. 2086 Tok = Tokens->getNextToken(); 2087 } 2088 FormatTok = Tokens->setPosition(StoredPosition); 2089 2090 if (IsSimple) { 2091 nextToken(); 2092 parseBracedList(); 2093 addUnwrappedLine(); 2094 return; 2095 } 2096 2097 // Parse the body of a more complex enum. 2098 // First add a line for everything up to the "{". 2099 nextToken(); 2100 addUnwrappedLine(); 2101 ++Line->Level; 2102 2103 // Parse the enum constants. 2104 while (FormatTok) { 2105 if (FormatTok->is(tok::l_brace)) { 2106 // Parse the constant's class body. 2107 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2108 /*MunchSemi=*/false); 2109 } else if (FormatTok->is(tok::l_paren)) { 2110 parseParens(); 2111 } else if (FormatTok->is(tok::comma)) { 2112 nextToken(); 2113 addUnwrappedLine(); 2114 } else if (FormatTok->is(tok::semi)) { 2115 nextToken(); 2116 addUnwrappedLine(); 2117 break; 2118 } else if (FormatTok->is(tok::r_brace)) { 2119 addUnwrappedLine(); 2120 break; 2121 } else { 2122 nextToken(); 2123 } 2124 } 2125 2126 // Parse the class body after the enum's ";" if any. 2127 parseLevel(/*HasOpeningBrace=*/true); 2128 nextToken(); 2129 --Line->Level; 2130 addUnwrappedLine(); 2131 } 2132 2133 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2134 const FormatToken &InitialToken = *FormatTok; 2135 nextToken(); 2136 2137 // The actual identifier can be a nested name specifier, and in macros 2138 // it is often token-pasted. 2139 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2140 tok::kw___attribute, tok::kw___declspec, 2141 tok::kw_alignas) || 2142 ((Style.Language == FormatStyle::LK_Java || 2143 Style.Language == FormatStyle::LK_JavaScript) && 2144 FormatTok->isOneOf(tok::period, tok::comma))) { 2145 if (Style.Language == FormatStyle::LK_JavaScript && 2146 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2147 // JavaScript/TypeScript supports inline object types in 2148 // extends/implements positions: 2149 // class Foo implements {bar: number} { } 2150 nextToken(); 2151 if (FormatTok->is(tok::l_brace)) { 2152 tryToParseBracedList(); 2153 continue; 2154 } 2155 } 2156 bool IsNonMacroIdentifier = 2157 FormatTok->is(tok::identifier) && 2158 FormatTok->TokenText != FormatTok->TokenText.upper(); 2159 nextToken(); 2160 // We can have macros or attributes in between 'class' and the class name. 2161 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2162 parseParens(); 2163 } 2164 2165 // Note that parsing away template declarations here leads to incorrectly 2166 // accepting function declarations as record declarations. 2167 // In general, we cannot solve this problem. Consider: 2168 // class A<int> B() {} 2169 // which can be a function definition or a class definition when B() is a 2170 // macro. If we find enough real-world cases where this is a problem, we 2171 // can parse for the 'template' keyword in the beginning of the statement, 2172 // and thus rule out the record production in case there is no template 2173 // (this would still leave us with an ambiguity between template function 2174 // and class declarations). 2175 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2176 while (!eof()) { 2177 if (FormatTok->is(tok::l_brace)) { 2178 calculateBraceTypes(/*ExpectClassBody=*/true); 2179 if (!tryToParseBracedList()) 2180 break; 2181 } 2182 if (FormatTok->Tok.is(tok::semi)) 2183 return; 2184 nextToken(); 2185 } 2186 } 2187 if (FormatTok->Tok.is(tok::l_brace)) { 2188 if (ParseAsExpr) { 2189 parseChildBlock(); 2190 } else { 2191 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2192 addUnwrappedLine(); 2193 2194 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2195 /*MunchSemi=*/false); 2196 } 2197 } 2198 // There is no addUnwrappedLine() here so that we fall through to parsing a 2199 // structural element afterwards. Thus, in "class A {} n, m;", 2200 // "} n, m;" will end up in one unwrapped line. 2201 } 2202 2203 void UnwrappedLineParser::parseObjCMethod() { 2204 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2205 "'(' or identifier expected."); 2206 do { 2207 if (FormatTok->Tok.is(tok::semi)) { 2208 nextToken(); 2209 addUnwrappedLine(); 2210 return; 2211 } else if (FormatTok->Tok.is(tok::l_brace)) { 2212 if (Style.BraceWrapping.AfterFunction) 2213 addUnwrappedLine(); 2214 parseBlock(/*MustBeDeclaration=*/false); 2215 addUnwrappedLine(); 2216 return; 2217 } else { 2218 nextToken(); 2219 } 2220 } while (!eof()); 2221 } 2222 2223 void UnwrappedLineParser::parseObjCProtocolList() { 2224 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2225 do { 2226 nextToken(); 2227 // Early exit in case someone forgot a close angle. 2228 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2229 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2230 return; 2231 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2232 nextToken(); // Skip '>'. 2233 } 2234 2235 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2236 do { 2237 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2238 nextToken(); 2239 addUnwrappedLine(); 2240 break; 2241 } 2242 if (FormatTok->is(tok::l_brace)) { 2243 parseBlock(/*MustBeDeclaration=*/false); 2244 // In ObjC interfaces, nothing should be following the "}". 2245 addUnwrappedLine(); 2246 } else if (FormatTok->is(tok::r_brace)) { 2247 // Ignore stray "}". parseStructuralElement doesn't consume them. 2248 nextToken(); 2249 addUnwrappedLine(); 2250 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2251 nextToken(); 2252 parseObjCMethod(); 2253 } else { 2254 parseStructuralElement(); 2255 } 2256 } while (!eof()); 2257 } 2258 2259 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2260 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2261 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2262 nextToken(); 2263 nextToken(); // interface name 2264 2265 // @interface can be followed by a lightweight generic 2266 // specialization list, then either a base class or a category. 2267 if (FormatTok->Tok.is(tok::less)) { 2268 // Unlike protocol lists, generic parameterizations support 2269 // nested angles: 2270 // 2271 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2272 // NSObject <NSCopying, NSSecureCoding> 2273 // 2274 // so we need to count how many open angles we have left. 2275 unsigned NumOpenAngles = 1; 2276 do { 2277 nextToken(); 2278 // Early exit in case someone forgot a close angle. 2279 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2280 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2281 break; 2282 if (FormatTok->Tok.is(tok::less)) 2283 ++NumOpenAngles; 2284 else if (FormatTok->Tok.is(tok::greater)) { 2285 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2286 --NumOpenAngles; 2287 } 2288 } while (!eof() && NumOpenAngles != 0); 2289 nextToken(); // Skip '>'. 2290 } 2291 if (FormatTok->Tok.is(tok::colon)) { 2292 nextToken(); 2293 nextToken(); // base class name 2294 } else if (FormatTok->Tok.is(tok::l_paren)) 2295 // Skip category, if present. 2296 parseParens(); 2297 2298 if (FormatTok->Tok.is(tok::less)) 2299 parseObjCProtocolList(); 2300 2301 if (FormatTok->Tok.is(tok::l_brace)) { 2302 if (Style.BraceWrapping.AfterObjCDeclaration) 2303 addUnwrappedLine(); 2304 parseBlock(/*MustBeDeclaration=*/true); 2305 } 2306 2307 // With instance variables, this puts '}' on its own line. Without instance 2308 // variables, this ends the @interface line. 2309 addUnwrappedLine(); 2310 2311 parseObjCUntilAtEnd(); 2312 } 2313 2314 // Returns true for the declaration/definition form of @protocol, 2315 // false for the expression form. 2316 bool UnwrappedLineParser::parseObjCProtocol() { 2317 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2318 nextToken(); 2319 2320 if (FormatTok->is(tok::l_paren)) 2321 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2322 return false; 2323 2324 // The definition/declaration form, 2325 // @protocol Foo 2326 // - (int)someMethod; 2327 // @end 2328 2329 nextToken(); // protocol name 2330 2331 if (FormatTok->Tok.is(tok::less)) 2332 parseObjCProtocolList(); 2333 2334 // Check for protocol declaration. 2335 if (FormatTok->Tok.is(tok::semi)) { 2336 nextToken(); 2337 addUnwrappedLine(); 2338 return true; 2339 } 2340 2341 addUnwrappedLine(); 2342 parseObjCUntilAtEnd(); 2343 return true; 2344 } 2345 2346 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2347 bool IsImport = FormatTok->is(Keywords.kw_import); 2348 assert(IsImport || FormatTok->is(tok::kw_export)); 2349 nextToken(); 2350 2351 // Consume the "default" in "export default class/function". 2352 if (FormatTok->is(tok::kw_default)) 2353 nextToken(); 2354 2355 // Consume "async function", "function" and "default function", so that these 2356 // get parsed as free-standing JS functions, i.e. do not require a trailing 2357 // semicolon. 2358 if (FormatTok->is(Keywords.kw_async)) 2359 nextToken(); 2360 if (FormatTok->is(Keywords.kw_function)) { 2361 nextToken(); 2362 return; 2363 } 2364 2365 // For imports, `export *`, `export {...}`, consume the rest of the line up 2366 // to the terminating `;`. For everything else, just return and continue 2367 // parsing the structural element, i.e. the declaration or expression for 2368 // `export default`. 2369 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2370 !FormatTok->isStringLiteral()) 2371 return; 2372 2373 while (!eof()) { 2374 if (FormatTok->is(tok::semi)) 2375 return; 2376 if (Line->Tokens.empty()) { 2377 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2378 // import statement should terminate. 2379 return; 2380 } 2381 if (FormatTok->is(tok::l_brace)) { 2382 FormatTok->BlockKind = BK_Block; 2383 nextToken(); 2384 parseBracedList(); 2385 } else { 2386 nextToken(); 2387 } 2388 } 2389 } 2390 2391 void UnwrappedLineParser::parseStatementMacro() { 2392 nextToken(); 2393 if (FormatTok->is(tok::l_paren)) 2394 parseParens(); 2395 if (FormatTok->is(tok::semi)) 2396 nextToken(); 2397 addUnwrappedLine(); 2398 } 2399 2400 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2401 StringRef Prefix = "") { 2402 llvm::dbgs() << Prefix << "Line(" << Line.Level 2403 << ", FSC=" << Line.FirstStartColumn << ")" 2404 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2405 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2406 E = Line.Tokens.end(); 2407 I != E; ++I) { 2408 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2409 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2410 << "] "; 2411 } 2412 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2413 E = Line.Tokens.end(); 2414 I != E; ++I) { 2415 const UnwrappedLineNode &Node = *I; 2416 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2417 I = Node.Children.begin(), 2418 E = Node.Children.end(); 2419 I != E; ++I) { 2420 printDebugInfo(*I, "\nChild: "); 2421 } 2422 } 2423 llvm::dbgs() << "\n"; 2424 } 2425 2426 void UnwrappedLineParser::addUnwrappedLine() { 2427 if (Line->Tokens.empty()) 2428 return; 2429 LLVM_DEBUG({ 2430 if (CurrentLines == &Lines) 2431 printDebugInfo(*Line); 2432 }); 2433 CurrentLines->push_back(std::move(*Line)); 2434 Line->Tokens.clear(); 2435 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2436 Line->FirstStartColumn = 0; 2437 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2438 CurrentLines->append( 2439 std::make_move_iterator(PreprocessorDirectives.begin()), 2440 std::make_move_iterator(PreprocessorDirectives.end())); 2441 PreprocessorDirectives.clear(); 2442 } 2443 // Disconnect the current token from the last token on the previous line. 2444 FormatTok->Previous = nullptr; 2445 } 2446 2447 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2448 2449 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2450 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2451 FormatTok.NewlinesBefore > 0; 2452 } 2453 2454 // Checks if \p FormatTok is a line comment that continues the line comment 2455 // section on \p Line. 2456 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2457 const UnwrappedLine &Line, 2458 llvm::Regex &CommentPragmasRegex) { 2459 if (Line.Tokens.empty()) 2460 return false; 2461 2462 StringRef IndentContent = FormatTok.TokenText; 2463 if (FormatTok.TokenText.startswith("//") || 2464 FormatTok.TokenText.startswith("/*")) 2465 IndentContent = FormatTok.TokenText.substr(2); 2466 if (CommentPragmasRegex.match(IndentContent)) 2467 return false; 2468 2469 // If Line starts with a line comment, then FormatTok continues the comment 2470 // section if its original column is greater or equal to the original start 2471 // column of the line. 2472 // 2473 // Define the min column token of a line as follows: if a line ends in '{' or 2474 // contains a '{' followed by a line comment, then the min column token is 2475 // that '{'. Otherwise, the min column token of the line is the first token of 2476 // the line. 2477 // 2478 // If Line starts with a token other than a line comment, then FormatTok 2479 // continues the comment section if its original column is greater than the 2480 // original start column of the min column token of the line. 2481 // 2482 // For example, the second line comment continues the first in these cases: 2483 // 2484 // // first line 2485 // // second line 2486 // 2487 // and: 2488 // 2489 // // first line 2490 // // second line 2491 // 2492 // and: 2493 // 2494 // int i; // first line 2495 // // second line 2496 // 2497 // and: 2498 // 2499 // do { // first line 2500 // // second line 2501 // int i; 2502 // } while (true); 2503 // 2504 // and: 2505 // 2506 // enum { 2507 // a, // first line 2508 // // second line 2509 // b 2510 // }; 2511 // 2512 // The second line comment doesn't continue the first in these cases: 2513 // 2514 // // first line 2515 // // second line 2516 // 2517 // and: 2518 // 2519 // int i; // first line 2520 // // second line 2521 // 2522 // and: 2523 // 2524 // do { // first line 2525 // // second line 2526 // int i; 2527 // } while (true); 2528 // 2529 // and: 2530 // 2531 // enum { 2532 // a, // first line 2533 // // second line 2534 // }; 2535 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2536 2537 // Scan for '{//'. If found, use the column of '{' as a min column for line 2538 // comment section continuation. 2539 const FormatToken *PreviousToken = nullptr; 2540 for (const UnwrappedLineNode &Node : Line.Tokens) { 2541 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2542 isLineComment(*Node.Tok)) { 2543 MinColumnToken = PreviousToken; 2544 break; 2545 } 2546 PreviousToken = Node.Tok; 2547 2548 // Grab the last newline preceding a token in this unwrapped line. 2549 if (Node.Tok->NewlinesBefore > 0) { 2550 MinColumnToken = Node.Tok; 2551 } 2552 } 2553 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2554 MinColumnToken = PreviousToken; 2555 } 2556 2557 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2558 MinColumnToken); 2559 } 2560 2561 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2562 bool JustComments = Line->Tokens.empty(); 2563 for (SmallVectorImpl<FormatToken *>::const_iterator 2564 I = CommentsBeforeNextToken.begin(), 2565 E = CommentsBeforeNextToken.end(); 2566 I != E; ++I) { 2567 // Line comments that belong to the same line comment section are put on the 2568 // same line since later we might want to reflow content between them. 2569 // Additional fine-grained breaking of line comment sections is controlled 2570 // by the class BreakableLineCommentSection in case it is desirable to keep 2571 // several line comment sections in the same unwrapped line. 2572 // 2573 // FIXME: Consider putting separate line comment sections as children to the 2574 // unwrapped line instead. 2575 (*I)->ContinuesLineCommentSection = 2576 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2577 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2578 addUnwrappedLine(); 2579 pushToken(*I); 2580 } 2581 if (NewlineBeforeNext && JustComments) 2582 addUnwrappedLine(); 2583 CommentsBeforeNextToken.clear(); 2584 } 2585 2586 void UnwrappedLineParser::nextToken(int LevelDifference) { 2587 if (eof()) 2588 return; 2589 flushComments(isOnNewLine(*FormatTok)); 2590 pushToken(FormatTok); 2591 FormatToken *Previous = FormatTok; 2592 if (Style.Language != FormatStyle::LK_JavaScript) 2593 readToken(LevelDifference); 2594 else 2595 readTokenWithJavaScriptASI(); 2596 FormatTok->Previous = Previous; 2597 } 2598 2599 void UnwrappedLineParser::distributeComments( 2600 const SmallVectorImpl<FormatToken *> &Comments, 2601 const FormatToken *NextTok) { 2602 // Whether or not a line comment token continues a line is controlled by 2603 // the method continuesLineCommentSection, with the following caveat: 2604 // 2605 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2606 // that each comment line from the trail is aligned with the next token, if 2607 // the next token exists. If a trail exists, the beginning of the maximal 2608 // trail is marked as a start of a new comment section. 2609 // 2610 // For example in this code: 2611 // 2612 // int a; // line about a 2613 // // line 1 about b 2614 // // line 2 about b 2615 // int b; 2616 // 2617 // the two lines about b form a maximal trail, so there are two sections, the 2618 // first one consisting of the single comment "// line about a" and the 2619 // second one consisting of the next two comments. 2620 if (Comments.empty()) 2621 return; 2622 bool ShouldPushCommentsInCurrentLine = true; 2623 bool HasTrailAlignedWithNextToken = false; 2624 unsigned StartOfTrailAlignedWithNextToken = 0; 2625 if (NextTok) { 2626 // We are skipping the first element intentionally. 2627 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2628 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2629 HasTrailAlignedWithNextToken = true; 2630 StartOfTrailAlignedWithNextToken = i; 2631 } 2632 } 2633 } 2634 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2635 FormatToken *FormatTok = Comments[i]; 2636 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2637 FormatTok->ContinuesLineCommentSection = false; 2638 } else { 2639 FormatTok->ContinuesLineCommentSection = 2640 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2641 } 2642 if (!FormatTok->ContinuesLineCommentSection && 2643 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2644 ShouldPushCommentsInCurrentLine = false; 2645 } 2646 if (ShouldPushCommentsInCurrentLine) { 2647 pushToken(FormatTok); 2648 } else { 2649 CommentsBeforeNextToken.push_back(FormatTok); 2650 } 2651 } 2652 } 2653 2654 void UnwrappedLineParser::readToken(int LevelDifference) { 2655 SmallVector<FormatToken *, 1> Comments; 2656 do { 2657 FormatTok = Tokens->getNextToken(); 2658 assert(FormatTok); 2659 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2660 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2661 distributeComments(Comments, FormatTok); 2662 Comments.clear(); 2663 // If there is an unfinished unwrapped line, we flush the preprocessor 2664 // directives only after that unwrapped line was finished later. 2665 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2666 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2667 assert((LevelDifference >= 0 || 2668 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2669 "LevelDifference makes Line->Level negative"); 2670 Line->Level += LevelDifference; 2671 // Comments stored before the preprocessor directive need to be output 2672 // before the preprocessor directive, at the same level as the 2673 // preprocessor directive, as we consider them to apply to the directive. 2674 flushComments(isOnNewLine(*FormatTok)); 2675 parsePPDirective(); 2676 } 2677 while (FormatTok->Type == TT_ConflictStart || 2678 FormatTok->Type == TT_ConflictEnd || 2679 FormatTok->Type == TT_ConflictAlternative) { 2680 if (FormatTok->Type == TT_ConflictStart) { 2681 conditionalCompilationStart(/*Unreachable=*/false); 2682 } else if (FormatTok->Type == TT_ConflictAlternative) { 2683 conditionalCompilationAlternative(); 2684 } else if (FormatTok->Type == TT_ConflictEnd) { 2685 conditionalCompilationEnd(); 2686 } 2687 FormatTok = Tokens->getNextToken(); 2688 FormatTok->MustBreakBefore = true; 2689 } 2690 2691 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2692 !Line->InPPDirective) { 2693 continue; 2694 } 2695 2696 if (!FormatTok->Tok.is(tok::comment)) { 2697 distributeComments(Comments, FormatTok); 2698 Comments.clear(); 2699 return; 2700 } 2701 2702 Comments.push_back(FormatTok); 2703 } while (!eof()); 2704 2705 distributeComments(Comments, nullptr); 2706 Comments.clear(); 2707 } 2708 2709 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2710 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2711 if (MustBreakBeforeNextToken) { 2712 Line->Tokens.back().Tok->MustBreakBefore = true; 2713 MustBreakBeforeNextToken = false; 2714 } 2715 } 2716 2717 } // end namespace format 2718 } // end namespace clang 2719