1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : CompoundStatementIndenter(Parser, LineLevel, 176 Style.BraceWrapping.AfterControlStatement, 177 Style.BraceWrapping.IndentBraces) { 178 } 179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 180 bool WrapBrace, bool IndentBrace) 181 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 182 if (WrapBrace) 183 Parser->addUnwrappedLine(); 184 if (IndentBrace) 185 ++LineLevel; 186 } 187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 188 189 private: 190 unsigned &LineLevel; 191 unsigned OldLineLevel; 192 }; 193 194 namespace { 195 196 class IndexedTokenSource : public FormatTokenSource { 197 public: 198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 199 : Tokens(Tokens), Position(-1) {} 200 201 FormatToken *getNextToken() override { 202 ++Position; 203 return Tokens[Position]; 204 } 205 206 unsigned getPosition() override { 207 assert(Position >= 0); 208 return Position; 209 } 210 211 FormatToken *setPosition(unsigned P) override { 212 Position = P; 213 return Tokens[Position]; 214 } 215 216 void reset() { Position = -1; } 217 218 private: 219 ArrayRef<FormatToken *> Tokens; 220 int Position; 221 }; 222 223 } // end anonymous namespace 224 225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 226 const AdditionalKeywords &Keywords, 227 unsigned FirstStartColumn, 228 ArrayRef<FormatToken *> Tokens, 229 UnwrappedLineConsumer &Callback) 230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 231 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 235 ? IG_Rejected 236 : IG_Inited), 237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 238 239 void UnwrappedLineParser::reset() { 240 PPBranchLevel = -1; 241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 242 ? IG_Rejected 243 : IG_Inited; 244 IncludeGuardToken = nullptr; 245 Line.reset(new UnwrappedLine); 246 CommentsBeforeNextToken.clear(); 247 FormatTok = nullptr; 248 MustBreakBeforeNextToken = false; 249 PreprocessorDirectives.clear(); 250 CurrentLines = &Lines; 251 DeclarationScopeStack.clear(); 252 PPStack.clear(); 253 Line->FirstStartColumn = FirstStartColumn; 254 } 255 256 void UnwrappedLineParser::parse() { 257 IndexedTokenSource TokenSource(AllTokens); 258 Line->FirstStartColumn = FirstStartColumn; 259 do { 260 LLVM_DEBUG(llvm::dbgs() << "----\n"); 261 reset(); 262 Tokens = &TokenSource; 263 TokenSource.reset(); 264 265 readToken(); 266 parseFile(); 267 268 // If we found an include guard then all preprocessor directives (other than 269 // the guard) are over-indented by one. 270 if (IncludeGuard == IG_Found) 271 for (auto &Line : Lines) 272 if (Line.InPPDirective && Line.Level > 0) 273 --Line.Level; 274 275 // Create line with eof token. 276 pushToken(FormatTok); 277 addUnwrappedLine(); 278 279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 280 E = Lines.end(); 281 I != E; ++I) { 282 Callback.consumeUnwrappedLine(*I); 283 } 284 Callback.finishRun(); 285 Lines.clear(); 286 while (!PPLevelBranchIndex.empty() && 287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 290 } 291 if (!PPLevelBranchIndex.empty()) { 292 ++PPLevelBranchIndex.back(); 293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 295 } 296 } while (!PPLevelBranchIndex.empty()); 297 } 298 299 void UnwrappedLineParser::parseFile() { 300 // The top-level context in a file always has declarations, except for pre- 301 // processor directives and JavaScript files. 302 bool MustBeDeclaration = 303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 305 MustBeDeclaration); 306 if (Style.Language == FormatStyle::LK_TextProto) 307 parseBracedList(); 308 else 309 parseLevel(/*HasOpeningBrace=*/false); 310 // Make sure to format the remaining tokens. 311 // 312 // LK_TextProto is special since its top-level is parsed as the body of a 313 // braced list, which does not necessarily have natural line separators such 314 // as a semicolon. Comments after the last entry that have been determined to 315 // not belong to that line, as in: 316 // key: value 317 // // endfile comment 318 // do not have a chance to be put on a line of their own until this point. 319 // Here we add this newline before end-of-file comments. 320 if (Style.Language == FormatStyle::LK_TextProto && 321 !CommentsBeforeNextToken.empty()) 322 addUnwrappedLine(); 323 flushComments(true); 324 addUnwrappedLine(); 325 } 326 327 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 328 bool SwitchLabelEncountered = false; 329 do { 330 tok::TokenKind kind = FormatTok->Tok.getKind(); 331 if (FormatTok->Type == TT_MacroBlockBegin) { 332 kind = tok::l_brace; 333 } else if (FormatTok->Type == TT_MacroBlockEnd) { 334 kind = tok::r_brace; 335 } 336 337 switch (kind) { 338 case tok::comment: 339 nextToken(); 340 addUnwrappedLine(); 341 break; 342 case tok::l_brace: 343 // FIXME: Add parameter whether this can happen - if this happens, we must 344 // be in a non-declaration context. 345 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 346 continue; 347 parseBlock(/*MustBeDeclaration=*/false); 348 addUnwrappedLine(); 349 break; 350 case tok::r_brace: 351 if (HasOpeningBrace) 352 return; 353 nextToken(); 354 addUnwrappedLine(); 355 break; 356 case tok::kw_default: { 357 unsigned StoredPosition = Tokens->getPosition(); 358 FormatToken *Next; 359 do { 360 Next = Tokens->getNextToken(); 361 } while (Next && Next->is(tok::comment)); 362 FormatTok = Tokens->setPosition(StoredPosition); 363 if (Next && Next->isNot(tok::colon)) { 364 // default not followed by ':' is not a case label; treat it like 365 // an identifier. 366 parseStructuralElement(); 367 break; 368 } 369 // Else, if it is 'default:', fall through to the case handling. 370 LLVM_FALLTHROUGH; 371 } 372 case tok::kw_case: 373 if (Style.Language == FormatStyle::LK_JavaScript && 374 Line->MustBeDeclaration) { 375 // A 'case: string' style field declaration. 376 parseStructuralElement(); 377 break; 378 } 379 if (!SwitchLabelEncountered && 380 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 381 ++Line->Level; 382 SwitchLabelEncountered = true; 383 parseStructuralElement(); 384 break; 385 default: 386 parseStructuralElement(); 387 break; 388 } 389 } while (!eof()); 390 } 391 392 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 393 // We'll parse forward through the tokens until we hit 394 // a closing brace or eof - note that getNextToken() will 395 // parse macros, so this will magically work inside macro 396 // definitions, too. 397 unsigned StoredPosition = Tokens->getPosition(); 398 FormatToken *Tok = FormatTok; 399 const FormatToken *PrevTok = Tok->Previous; 400 // Keep a stack of positions of lbrace tokens. We will 401 // update information about whether an lbrace starts a 402 // braced init list or a different block during the loop. 403 SmallVector<FormatToken *, 8> LBraceStack; 404 assert(Tok->Tok.is(tok::l_brace)); 405 do { 406 // Get next non-comment token. 407 FormatToken *NextTok; 408 unsigned ReadTokens = 0; 409 do { 410 NextTok = Tokens->getNextToken(); 411 ++ReadTokens; 412 } while (NextTok->is(tok::comment)); 413 414 switch (Tok->Tok.getKind()) { 415 case tok::l_brace: 416 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 417 if (PrevTok->isOneOf(tok::colon, tok::less)) 418 // A ':' indicates this code is in a type, or a braced list 419 // following a label in an object literal ({a: {b: 1}}). 420 // A '<' could be an object used in a comparison, but that is nonsense 421 // code (can never return true), so more likely it is a generic type 422 // argument (`X<{a: string; b: number}>`). 423 // The code below could be confused by semicolons between the 424 // individual members in a type member list, which would normally 425 // trigger BK_Block. In both cases, this must be parsed as an inline 426 // braced init. 427 Tok->BlockKind = BK_BracedInit; 428 else if (PrevTok->is(tok::r_paren)) 429 // `) { }` can only occur in function or method declarations in JS. 430 Tok->BlockKind = BK_Block; 431 } else { 432 Tok->BlockKind = BK_Unknown; 433 } 434 LBraceStack.push_back(Tok); 435 break; 436 case tok::r_brace: 437 if (LBraceStack.empty()) 438 break; 439 if (LBraceStack.back()->BlockKind == BK_Unknown) { 440 bool ProbablyBracedList = false; 441 if (Style.Language == FormatStyle::LK_Proto) { 442 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 443 } else { 444 // Using OriginalColumn to distinguish between ObjC methods and 445 // binary operators is a bit hacky. 446 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 447 NextTok->OriginalColumn == 0; 448 449 // If there is a comma, semicolon or right paren after the closing 450 // brace, we assume this is a braced initializer list. Note that 451 // regardless how we mark inner braces here, we will overwrite the 452 // BlockKind later if we parse a braced list (where all blocks 453 // inside are by default braced lists), or when we explicitly detect 454 // blocks (for example while parsing lambdas). 455 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 456 // braced list in JS. 457 ProbablyBracedList = 458 (Style.Language == FormatStyle::LK_JavaScript && 459 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 460 Keywords.kw_as)) || 461 (Style.isCpp() && NextTok->is(tok::l_paren)) || 462 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 463 tok::r_paren, tok::r_square, tok::l_brace, 464 tok::ellipsis) || 465 (NextTok->is(tok::identifier) && 466 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 467 (NextTok->is(tok::semi) && 468 (!ExpectClassBody || LBraceStack.size() != 1)) || 469 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 470 if (NextTok->is(tok::l_square)) { 471 // We can have an array subscript after a braced init 472 // list, but C++11 attributes are expected after blocks. 473 NextTok = Tokens->getNextToken(); 474 ++ReadTokens; 475 ProbablyBracedList = NextTok->isNot(tok::l_square); 476 } 477 } 478 if (ProbablyBracedList) { 479 Tok->BlockKind = BK_BracedInit; 480 LBraceStack.back()->BlockKind = BK_BracedInit; 481 } else { 482 Tok->BlockKind = BK_Block; 483 LBraceStack.back()->BlockKind = BK_Block; 484 } 485 } 486 LBraceStack.pop_back(); 487 break; 488 case tok::identifier: 489 if (!Tok->is(TT_StatementMacro)) 490 break; 491 LLVM_FALLTHROUGH; 492 case tok::at: 493 case tok::semi: 494 case tok::kw_if: 495 case tok::kw_while: 496 case tok::kw_for: 497 case tok::kw_switch: 498 case tok::kw_try: 499 case tok::kw___try: 500 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 501 LBraceStack.back()->BlockKind = BK_Block; 502 break; 503 default: 504 break; 505 } 506 PrevTok = Tok; 507 Tok = NextTok; 508 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 509 510 // Assume other blocks for all unclosed opening braces. 511 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 512 if (LBraceStack[i]->BlockKind == BK_Unknown) 513 LBraceStack[i]->BlockKind = BK_Block; 514 } 515 516 FormatTok = Tokens->setPosition(StoredPosition); 517 } 518 519 template <class T> 520 static inline void hash_combine(std::size_t &seed, const T &v) { 521 std::hash<T> hasher; 522 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 523 } 524 525 size_t UnwrappedLineParser::computePPHash() const { 526 size_t h = 0; 527 for (const auto &i : PPStack) { 528 hash_combine(h, size_t(i.Kind)); 529 hash_combine(h, i.Line); 530 } 531 return h; 532 } 533 534 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 535 bool MunchSemi) { 536 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 537 "'{' or macro block token expected"); 538 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 539 FormatTok->BlockKind = BK_Block; 540 541 size_t PPStartHash = computePPHash(); 542 543 unsigned InitialLevel = Line->Level; 544 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 545 546 if (MacroBlock && FormatTok->is(tok::l_paren)) 547 parseParens(); 548 549 size_t NbPreprocessorDirectives = 550 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 551 addUnwrappedLine(); 552 size_t OpeningLineIndex = 553 CurrentLines->empty() 554 ? (UnwrappedLine::kInvalidIndex) 555 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 556 557 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 558 MustBeDeclaration); 559 if (AddLevel) 560 ++Line->Level; 561 parseLevel(/*HasOpeningBrace=*/true); 562 563 if (eof()) 564 return; 565 566 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 567 : !FormatTok->is(tok::r_brace)) { 568 Line->Level = InitialLevel; 569 FormatTok->BlockKind = BK_Block; 570 return; 571 } 572 573 size_t PPEndHash = computePPHash(); 574 575 // Munch the closing brace. 576 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 577 578 if (MacroBlock && FormatTok->is(tok::l_paren)) 579 parseParens(); 580 581 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 582 nextToken(); 583 Line->Level = InitialLevel; 584 585 if (PPStartHash == PPEndHash) { 586 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 587 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 588 // Update the opening line to add the forward reference as well 589 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 590 CurrentLines->size() - 1; 591 } 592 } 593 } 594 595 static bool isGoogScope(const UnwrappedLine &Line) { 596 // FIXME: Closure-library specific stuff should not be hard-coded but be 597 // configurable. 598 if (Line.Tokens.size() < 4) 599 return false; 600 auto I = Line.Tokens.begin(); 601 if (I->Tok->TokenText != "goog") 602 return false; 603 ++I; 604 if (I->Tok->isNot(tok::period)) 605 return false; 606 ++I; 607 if (I->Tok->TokenText != "scope") 608 return false; 609 ++I; 610 return I->Tok->is(tok::l_paren); 611 } 612 613 static bool isIIFE(const UnwrappedLine &Line, 614 const AdditionalKeywords &Keywords) { 615 // Look for the start of an immediately invoked anonymous function. 616 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 617 // This is commonly done in JavaScript to create a new, anonymous scope. 618 // Example: (function() { ... })() 619 if (Line.Tokens.size() < 3) 620 return false; 621 auto I = Line.Tokens.begin(); 622 if (I->Tok->isNot(tok::l_paren)) 623 return false; 624 ++I; 625 if (I->Tok->isNot(Keywords.kw_function)) 626 return false; 627 ++I; 628 return I->Tok->is(tok::l_paren); 629 } 630 631 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 632 const FormatToken &InitialToken) { 633 if (InitialToken.is(tok::kw_namespace)) 634 return Style.BraceWrapping.AfterNamespace; 635 if (InitialToken.is(tok::kw_class)) 636 return Style.BraceWrapping.AfterClass; 637 if (InitialToken.is(tok::kw_union)) 638 return Style.BraceWrapping.AfterUnion; 639 if (InitialToken.is(tok::kw_struct)) 640 return Style.BraceWrapping.AfterStruct; 641 return false; 642 } 643 644 void UnwrappedLineParser::parseChildBlock() { 645 FormatTok->BlockKind = BK_Block; 646 nextToken(); 647 { 648 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 649 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 650 ScopedLineState LineState(*this); 651 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 652 /*MustBeDeclaration=*/false); 653 Line->Level += SkipIndent ? 0 : 1; 654 parseLevel(/*HasOpeningBrace=*/true); 655 flushComments(isOnNewLine(*FormatTok)); 656 Line->Level -= SkipIndent ? 0 : 1; 657 } 658 nextToken(); 659 } 660 661 void UnwrappedLineParser::parsePPDirective() { 662 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 663 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 664 665 nextToken(); 666 667 if (!FormatTok->Tok.getIdentifierInfo()) { 668 parsePPUnknown(); 669 return; 670 } 671 672 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 673 case tok::pp_define: 674 parsePPDefine(); 675 return; 676 case tok::pp_if: 677 parsePPIf(/*IfDef=*/false); 678 break; 679 case tok::pp_ifdef: 680 case tok::pp_ifndef: 681 parsePPIf(/*IfDef=*/true); 682 break; 683 case tok::pp_else: 684 parsePPElse(); 685 break; 686 case tok::pp_elif: 687 parsePPElIf(); 688 break; 689 case tok::pp_endif: 690 parsePPEndIf(); 691 break; 692 default: 693 parsePPUnknown(); 694 break; 695 } 696 } 697 698 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 699 size_t Line = CurrentLines->size(); 700 if (CurrentLines == &PreprocessorDirectives) 701 Line += Lines.size(); 702 703 if (Unreachable || 704 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 705 PPStack.push_back({PP_Unreachable, Line}); 706 else 707 PPStack.push_back({PP_Conditional, Line}); 708 } 709 710 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 711 ++PPBranchLevel; 712 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 713 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 714 PPLevelBranchIndex.push_back(0); 715 PPLevelBranchCount.push_back(0); 716 } 717 PPChainBranchIndex.push(0); 718 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 719 conditionalCompilationCondition(Unreachable || Skip); 720 } 721 722 void UnwrappedLineParser::conditionalCompilationAlternative() { 723 if (!PPStack.empty()) 724 PPStack.pop_back(); 725 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 726 if (!PPChainBranchIndex.empty()) 727 ++PPChainBranchIndex.top(); 728 conditionalCompilationCondition( 729 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 730 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 731 } 732 733 void UnwrappedLineParser::conditionalCompilationEnd() { 734 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 735 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 736 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 737 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 738 } 739 } 740 // Guard against #endif's without #if. 741 if (PPBranchLevel > -1) 742 --PPBranchLevel; 743 if (!PPChainBranchIndex.empty()) 744 PPChainBranchIndex.pop(); 745 if (!PPStack.empty()) 746 PPStack.pop_back(); 747 } 748 749 void UnwrappedLineParser::parsePPIf(bool IfDef) { 750 bool IfNDef = FormatTok->is(tok::pp_ifndef); 751 nextToken(); 752 bool Unreachable = false; 753 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 754 Unreachable = true; 755 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 756 Unreachable = true; 757 conditionalCompilationStart(Unreachable); 758 FormatToken *IfCondition = FormatTok; 759 // If there's a #ifndef on the first line, and the only lines before it are 760 // comments, it could be an include guard. 761 bool MaybeIncludeGuard = IfNDef; 762 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 763 for (auto &Line : Lines) { 764 if (!Line.Tokens.front().Tok->is(tok::comment)) { 765 MaybeIncludeGuard = false; 766 IncludeGuard = IG_Rejected; 767 break; 768 } 769 } 770 --PPBranchLevel; 771 parsePPUnknown(); 772 ++PPBranchLevel; 773 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 774 IncludeGuard = IG_IfNdefed; 775 IncludeGuardToken = IfCondition; 776 } 777 } 778 779 void UnwrappedLineParser::parsePPElse() { 780 // If a potential include guard has an #else, it's not an include guard. 781 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 782 IncludeGuard = IG_Rejected; 783 conditionalCompilationAlternative(); 784 if (PPBranchLevel > -1) 785 --PPBranchLevel; 786 parsePPUnknown(); 787 ++PPBranchLevel; 788 } 789 790 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 791 792 void UnwrappedLineParser::parsePPEndIf() { 793 conditionalCompilationEnd(); 794 parsePPUnknown(); 795 // If the #endif of a potential include guard is the last thing in the file, 796 // then we found an include guard. 797 unsigned TokenPosition = Tokens->getPosition(); 798 FormatToken *PeekNext = AllTokens[TokenPosition]; 799 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 800 PeekNext->is(tok::eof) && 801 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 802 IncludeGuard = IG_Found; 803 } 804 805 void UnwrappedLineParser::parsePPDefine() { 806 nextToken(); 807 808 if (!FormatTok->Tok.getIdentifierInfo()) { 809 IncludeGuard = IG_Rejected; 810 IncludeGuardToken = nullptr; 811 parsePPUnknown(); 812 return; 813 } 814 815 if (IncludeGuard == IG_IfNdefed && 816 IncludeGuardToken->TokenText == FormatTok->TokenText) { 817 IncludeGuard = IG_Defined; 818 IncludeGuardToken = nullptr; 819 for (auto &Line : Lines) { 820 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 821 IncludeGuard = IG_Rejected; 822 break; 823 } 824 } 825 } 826 827 nextToken(); 828 if (FormatTok->Tok.getKind() == tok::l_paren && 829 FormatTok->WhitespaceRange.getBegin() == 830 FormatTok->WhitespaceRange.getEnd()) { 831 parseParens(); 832 } 833 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 834 Line->Level += PPBranchLevel + 1; 835 addUnwrappedLine(); 836 ++Line->Level; 837 838 // Errors during a preprocessor directive can only affect the layout of the 839 // preprocessor directive, and thus we ignore them. An alternative approach 840 // would be to use the same approach we use on the file level (no 841 // re-indentation if there was a structural error) within the macro 842 // definition. 843 parseFile(); 844 } 845 846 void UnwrappedLineParser::parsePPUnknown() { 847 do { 848 nextToken(); 849 } while (!eof()); 850 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 851 Line->Level += PPBranchLevel + 1; 852 addUnwrappedLine(); 853 } 854 855 // Here we blacklist certain tokens that are not usually the first token in an 856 // unwrapped line. This is used in attempt to distinguish macro calls without 857 // trailing semicolons from other constructs split to several lines. 858 static bool tokenCanStartNewLine(const clang::Token &Tok) { 859 // Semicolon can be a null-statement, l_square can be a start of a macro or 860 // a C++11 attribute, but this doesn't seem to be common. 861 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 862 Tok.isNot(tok::l_square) && 863 // Tokens that can only be used as binary operators and a part of 864 // overloaded operator names. 865 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 866 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 867 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 868 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 869 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 870 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 871 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 872 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 873 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 874 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 875 Tok.isNot(tok::lesslessequal) && 876 // Colon is used in labels, base class lists, initializer lists, 877 // range-based for loops, ternary operator, but should never be the 878 // first token in an unwrapped line. 879 Tok.isNot(tok::colon) && 880 // 'noexcept' is a trailing annotation. 881 Tok.isNot(tok::kw_noexcept); 882 } 883 884 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 885 const FormatToken *FormatTok) { 886 // FIXME: This returns true for C/C++ keywords like 'struct'. 887 return FormatTok->is(tok::identifier) && 888 (FormatTok->Tok.getIdentifierInfo() == nullptr || 889 !FormatTok->isOneOf( 890 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 891 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 892 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 893 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 894 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 895 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 896 Keywords.kw_from)); 897 } 898 899 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 900 const FormatToken *FormatTok) { 901 return FormatTok->Tok.isLiteral() || 902 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 903 mustBeJSIdent(Keywords, FormatTok); 904 } 905 906 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 907 // when encountered after a value (see mustBeJSIdentOrValue). 908 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 909 const FormatToken *FormatTok) { 910 return FormatTok->isOneOf( 911 tok::kw_return, Keywords.kw_yield, 912 // conditionals 913 tok::kw_if, tok::kw_else, 914 // loops 915 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 916 // switch/case 917 tok::kw_switch, tok::kw_case, 918 // exceptions 919 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 920 // declaration 921 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 922 Keywords.kw_async, Keywords.kw_function, 923 // import/export 924 Keywords.kw_import, tok::kw_export); 925 } 926 927 // readTokenWithJavaScriptASI reads the next token and terminates the current 928 // line if JavaScript Automatic Semicolon Insertion must 929 // happen between the current token and the next token. 930 // 931 // This method is conservative - it cannot cover all edge cases of JavaScript, 932 // but only aims to correctly handle certain well known cases. It *must not* 933 // return true in speculative cases. 934 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 935 FormatToken *Previous = FormatTok; 936 readToken(); 937 FormatToken *Next = FormatTok; 938 939 bool IsOnSameLine = 940 CommentsBeforeNextToken.empty() 941 ? Next->NewlinesBefore == 0 942 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 943 if (IsOnSameLine) 944 return; 945 946 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 947 bool PreviousStartsTemplateExpr = 948 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 949 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 950 // If the line contains an '@' sign, the previous token might be an 951 // annotation, which can precede another identifier/value. 952 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 953 [](UnwrappedLineNode &LineNode) { 954 return LineNode.Tok->is(tok::at); 955 }) != Line->Tokens.end(); 956 if (HasAt) 957 return; 958 } 959 if (Next->is(tok::exclaim) && PreviousMustBeValue) 960 return addUnwrappedLine(); 961 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 962 bool NextEndsTemplateExpr = 963 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 964 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 965 (PreviousMustBeValue || 966 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 967 tok::minusminus))) 968 return addUnwrappedLine(); 969 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 970 isJSDeclOrStmt(Keywords, Next)) 971 return addUnwrappedLine(); 972 } 973 974 void UnwrappedLineParser::parseStructuralElement() { 975 assert(!FormatTok->is(tok::l_brace)); 976 if (Style.Language == FormatStyle::LK_TableGen && 977 FormatTok->is(tok::pp_include)) { 978 nextToken(); 979 if (FormatTok->is(tok::string_literal)) 980 nextToken(); 981 addUnwrappedLine(); 982 return; 983 } 984 switch (FormatTok->Tok.getKind()) { 985 case tok::kw_asm: 986 nextToken(); 987 if (FormatTok->is(tok::l_brace)) { 988 FormatTok->Type = TT_InlineASMBrace; 989 nextToken(); 990 while (FormatTok && FormatTok->isNot(tok::eof)) { 991 if (FormatTok->is(tok::r_brace)) { 992 FormatTok->Type = TT_InlineASMBrace; 993 nextToken(); 994 addUnwrappedLine(); 995 break; 996 } 997 FormatTok->Finalized = true; 998 nextToken(); 999 } 1000 } 1001 break; 1002 case tok::kw_namespace: 1003 parseNamespace(); 1004 return; 1005 case tok::kw_public: 1006 case tok::kw_protected: 1007 case tok::kw_private: 1008 if (Style.Language == FormatStyle::LK_Java || 1009 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) 1010 nextToken(); 1011 else 1012 parseAccessSpecifier(); 1013 return; 1014 case tok::kw_if: 1015 parseIfThenElse(); 1016 return; 1017 case tok::kw_for: 1018 case tok::kw_while: 1019 parseForOrWhileLoop(); 1020 return; 1021 case tok::kw_do: 1022 parseDoWhile(); 1023 return; 1024 case tok::kw_switch: 1025 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1026 // 'switch: string' field declaration. 1027 break; 1028 parseSwitch(); 1029 return; 1030 case tok::kw_default: 1031 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1032 // 'default: string' field declaration. 1033 break; 1034 nextToken(); 1035 if (FormatTok->is(tok::colon)) { 1036 parseLabel(); 1037 return; 1038 } 1039 // e.g. "default void f() {}" in a Java interface. 1040 break; 1041 case tok::kw_case: 1042 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1043 // 'case: string' field declaration. 1044 break; 1045 parseCaseLabel(); 1046 return; 1047 case tok::kw_try: 1048 case tok::kw___try: 1049 parseTryCatch(); 1050 return; 1051 case tok::kw_extern: 1052 nextToken(); 1053 if (FormatTok->Tok.is(tok::string_literal)) { 1054 nextToken(); 1055 if (FormatTok->Tok.is(tok::l_brace)) { 1056 if (Style.BraceWrapping.AfterExternBlock) { 1057 addUnwrappedLine(); 1058 parseBlock(/*MustBeDeclaration=*/true); 1059 } else { 1060 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1061 } 1062 addUnwrappedLine(); 1063 return; 1064 } 1065 } 1066 break; 1067 case tok::kw_export: 1068 if (Style.Language == FormatStyle::LK_JavaScript) { 1069 parseJavaScriptEs6ImportExport(); 1070 return; 1071 } 1072 if (!Style.isCpp()) 1073 break; 1074 // Handle C++ "(inline|export) namespace". 1075 LLVM_FALLTHROUGH; 1076 case tok::kw_inline: 1077 nextToken(); 1078 if (FormatTok->Tok.is(tok::kw_namespace)) { 1079 parseNamespace(); 1080 return; 1081 } 1082 break; 1083 case tok::identifier: 1084 if (FormatTok->is(TT_ForEachMacro)) { 1085 parseForOrWhileLoop(); 1086 return; 1087 } 1088 if (FormatTok->is(TT_MacroBlockBegin)) { 1089 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1090 /*MunchSemi=*/false); 1091 return; 1092 } 1093 if (FormatTok->is(Keywords.kw_import)) { 1094 if (Style.Language == FormatStyle::LK_JavaScript) { 1095 parseJavaScriptEs6ImportExport(); 1096 return; 1097 } 1098 if (Style.Language == FormatStyle::LK_Proto) { 1099 nextToken(); 1100 if (FormatTok->is(tok::kw_public)) 1101 nextToken(); 1102 if (!FormatTok->is(tok::string_literal)) 1103 return; 1104 nextToken(); 1105 if (FormatTok->is(tok::semi)) 1106 nextToken(); 1107 addUnwrappedLine(); 1108 return; 1109 } 1110 } 1111 if (Style.isCpp() && 1112 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1113 Keywords.kw_slots, Keywords.kw_qslots)) { 1114 nextToken(); 1115 if (FormatTok->is(tok::colon)) { 1116 nextToken(); 1117 addUnwrappedLine(); 1118 return; 1119 } 1120 } 1121 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1122 parseStatementMacro(); 1123 return; 1124 } 1125 // In all other cases, parse the declaration. 1126 break; 1127 default: 1128 break; 1129 } 1130 do { 1131 const FormatToken *Previous = FormatTok->Previous; 1132 switch (FormatTok->Tok.getKind()) { 1133 case tok::at: 1134 nextToken(); 1135 if (FormatTok->Tok.is(tok::l_brace)) { 1136 nextToken(); 1137 parseBracedList(); 1138 break; 1139 } else if (Style.Language == FormatStyle::LK_Java && 1140 FormatTok->is(Keywords.kw_interface)) { 1141 nextToken(); 1142 break; 1143 } 1144 switch (FormatTok->Tok.getObjCKeywordID()) { 1145 case tok::objc_public: 1146 case tok::objc_protected: 1147 case tok::objc_package: 1148 case tok::objc_private: 1149 return parseAccessSpecifier(); 1150 case tok::objc_interface: 1151 case tok::objc_implementation: 1152 return parseObjCInterfaceOrImplementation(); 1153 case tok::objc_protocol: 1154 if (parseObjCProtocol()) 1155 return; 1156 break; 1157 case tok::objc_end: 1158 return; // Handled by the caller. 1159 case tok::objc_optional: 1160 case tok::objc_required: 1161 nextToken(); 1162 addUnwrappedLine(); 1163 return; 1164 case tok::objc_autoreleasepool: 1165 nextToken(); 1166 if (FormatTok->Tok.is(tok::l_brace)) { 1167 if (Style.BraceWrapping.AfterControlStatement) 1168 addUnwrappedLine(); 1169 parseBlock(/*MustBeDeclaration=*/false); 1170 } 1171 addUnwrappedLine(); 1172 return; 1173 case tok::objc_synchronized: 1174 nextToken(); 1175 if (FormatTok->Tok.is(tok::l_paren)) 1176 // Skip synchronization object 1177 parseParens(); 1178 if (FormatTok->Tok.is(tok::l_brace)) { 1179 if (Style.BraceWrapping.AfterControlStatement) 1180 addUnwrappedLine(); 1181 parseBlock(/*MustBeDeclaration=*/false); 1182 } 1183 addUnwrappedLine(); 1184 return; 1185 case tok::objc_try: 1186 // This branch isn't strictly necessary (the kw_try case below would 1187 // do this too after the tok::at is parsed above). But be explicit. 1188 parseTryCatch(); 1189 return; 1190 default: 1191 break; 1192 } 1193 break; 1194 case tok::kw_enum: 1195 // Ignore if this is part of "template <enum ...". 1196 if (Previous && Previous->is(tok::less)) { 1197 nextToken(); 1198 break; 1199 } 1200 1201 // parseEnum falls through and does not yet add an unwrapped line as an 1202 // enum definition can start a structural element. 1203 if (!parseEnum()) 1204 break; 1205 // This only applies for C++. 1206 if (!Style.isCpp()) { 1207 addUnwrappedLine(); 1208 return; 1209 } 1210 break; 1211 case tok::kw_typedef: 1212 nextToken(); 1213 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1214 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1215 parseEnum(); 1216 break; 1217 case tok::kw_struct: 1218 case tok::kw_union: 1219 case tok::kw_class: 1220 // parseRecord falls through and does not yet add an unwrapped line as a 1221 // record declaration or definition can start a structural element. 1222 parseRecord(); 1223 // This does not apply for Java, JavaScript and C#. 1224 if (Style.Language == FormatStyle::LK_Java || 1225 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { 1226 if (FormatTok->is(tok::semi)) 1227 nextToken(); 1228 addUnwrappedLine(); 1229 return; 1230 } 1231 break; 1232 case tok::period: 1233 nextToken(); 1234 // In Java, classes have an implicit static member "class". 1235 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1236 FormatTok->is(tok::kw_class)) 1237 nextToken(); 1238 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1239 FormatTok->Tok.getIdentifierInfo()) 1240 // JavaScript only has pseudo keywords, all keywords are allowed to 1241 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1242 nextToken(); 1243 break; 1244 case tok::semi: 1245 nextToken(); 1246 addUnwrappedLine(); 1247 return; 1248 case tok::r_brace: 1249 addUnwrappedLine(); 1250 return; 1251 case tok::l_paren: 1252 parseParens(); 1253 break; 1254 case tok::kw_operator: 1255 nextToken(); 1256 if (FormatTok->isBinaryOperator()) 1257 nextToken(); 1258 break; 1259 case tok::caret: 1260 nextToken(); 1261 if (FormatTok->Tok.isAnyIdentifier() || 1262 FormatTok->isSimpleTypeSpecifier()) 1263 nextToken(); 1264 if (FormatTok->is(tok::l_paren)) 1265 parseParens(); 1266 if (FormatTok->is(tok::l_brace)) 1267 parseChildBlock(); 1268 break; 1269 case tok::l_brace: 1270 if (!tryToParseBracedList()) { 1271 // A block outside of parentheses must be the last part of a 1272 // structural element. 1273 // FIXME: Figure out cases where this is not true, and add projections 1274 // for them (the one we know is missing are lambdas). 1275 if (Style.BraceWrapping.AfterFunction) 1276 addUnwrappedLine(); 1277 FormatTok->Type = TT_FunctionLBrace; 1278 parseBlock(/*MustBeDeclaration=*/false); 1279 addUnwrappedLine(); 1280 return; 1281 } 1282 // Otherwise this was a braced init list, and the structural 1283 // element continues. 1284 break; 1285 case tok::kw_try: 1286 // We arrive here when parsing function-try blocks. 1287 if (Style.BraceWrapping.AfterFunction) 1288 addUnwrappedLine(); 1289 parseTryCatch(); 1290 return; 1291 case tok::identifier: { 1292 if (FormatTok->is(TT_MacroBlockEnd)) { 1293 addUnwrappedLine(); 1294 return; 1295 } 1296 1297 // Function declarations (as opposed to function expressions) are parsed 1298 // on their own unwrapped line by continuing this loop. Function 1299 // expressions (functions that are not on their own line) must not create 1300 // a new unwrapped line, so they are special cased below. 1301 size_t TokenCount = Line->Tokens.size(); 1302 if (Style.Language == FormatStyle::LK_JavaScript && 1303 FormatTok->is(Keywords.kw_function) && 1304 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1305 Keywords.kw_async)))) { 1306 tryToParseJSFunction(); 1307 break; 1308 } 1309 if ((Style.Language == FormatStyle::LK_JavaScript || 1310 Style.Language == FormatStyle::LK_Java) && 1311 FormatTok->is(Keywords.kw_interface)) { 1312 if (Style.Language == FormatStyle::LK_JavaScript) { 1313 // In JavaScript/TypeScript, "interface" can be used as a standalone 1314 // identifier, e.g. in `var interface = 1;`. If "interface" is 1315 // followed by another identifier, it is very like to be an actual 1316 // interface declaration. 1317 unsigned StoredPosition = Tokens->getPosition(); 1318 FormatToken *Next = Tokens->getNextToken(); 1319 FormatTok = Tokens->setPosition(StoredPosition); 1320 if (Next && !mustBeJSIdent(Keywords, Next)) { 1321 nextToken(); 1322 break; 1323 } 1324 } 1325 parseRecord(); 1326 addUnwrappedLine(); 1327 return; 1328 } 1329 1330 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1331 parseStatementMacro(); 1332 return; 1333 } 1334 1335 // See if the following token should start a new unwrapped line. 1336 StringRef Text = FormatTok->TokenText; 1337 nextToken(); 1338 1339 // JS doesn't have macros, and within classes colons indicate fields, not 1340 // labels. 1341 if (Style.Language == FormatStyle::LK_JavaScript) 1342 break; 1343 1344 TokenCount = Line->Tokens.size(); 1345 if (TokenCount == 1 || 1346 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1347 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1348 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1349 parseLabel(); 1350 return; 1351 } 1352 // Recognize function-like macro usages without trailing semicolon as 1353 // well as free-standing macros like Q_OBJECT. 1354 bool FunctionLike = FormatTok->is(tok::l_paren); 1355 if (FunctionLike) 1356 parseParens(); 1357 1358 bool FollowedByNewline = 1359 CommentsBeforeNextToken.empty() 1360 ? FormatTok->NewlinesBefore > 0 1361 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1362 1363 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1364 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1365 addUnwrappedLine(); 1366 return; 1367 } 1368 } 1369 break; 1370 } 1371 case tok::equal: 1372 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1373 // TT_JsFatArrow. The always start an expression or a child block if 1374 // followed by a curly. 1375 if (FormatTok->is(TT_JsFatArrow)) { 1376 nextToken(); 1377 if (FormatTok->is(tok::l_brace)) 1378 parseChildBlock(); 1379 break; 1380 } 1381 1382 nextToken(); 1383 if (FormatTok->Tok.is(tok::l_brace)) { 1384 nextToken(); 1385 parseBracedList(); 1386 } else if (Style.Language == FormatStyle::LK_Proto && 1387 FormatTok->Tok.is(tok::less)) { 1388 nextToken(); 1389 parseBracedList(/*ContinueOnSemicolons=*/false, 1390 /*ClosingBraceKind=*/tok::greater); 1391 } 1392 break; 1393 case tok::l_square: 1394 parseSquare(); 1395 break; 1396 case tok::kw_new: 1397 parseNew(); 1398 break; 1399 default: 1400 nextToken(); 1401 break; 1402 } 1403 } while (!eof()); 1404 } 1405 1406 bool UnwrappedLineParser::tryToParseLambda() { 1407 if (!Style.isCpp()) { 1408 nextToken(); 1409 return false; 1410 } 1411 assert(FormatTok->is(tok::l_square)); 1412 FormatToken &LSquare = *FormatTok; 1413 if (!tryToParseLambdaIntroducer()) 1414 return false; 1415 1416 bool SeenArrow = false; 1417 1418 while (FormatTok->isNot(tok::l_brace)) { 1419 if (FormatTok->isSimpleTypeSpecifier()) { 1420 nextToken(); 1421 continue; 1422 } 1423 switch (FormatTok->Tok.getKind()) { 1424 case tok::l_brace: 1425 break; 1426 case tok::l_paren: 1427 parseParens(); 1428 break; 1429 case tok::amp: 1430 case tok::star: 1431 case tok::kw_const: 1432 case tok::comma: 1433 case tok::less: 1434 case tok::greater: 1435 case tok::identifier: 1436 case tok::numeric_constant: 1437 case tok::coloncolon: 1438 case tok::kw_mutable: 1439 case tok::kw_noexcept: 1440 nextToken(); 1441 break; 1442 // Specialization of a template with an integer parameter can contain 1443 // arithmetic, logical, comparison and ternary operators. 1444 // 1445 // FIXME: This also accepts sequences of operators that are not in the scope 1446 // of a template argument list. 1447 // 1448 // In a C++ lambda a template type can only occur after an arrow. We use 1449 // this as an heuristic to distinguish between Objective-C expressions 1450 // followed by an `a->b` expression, such as: 1451 // ([obj func:arg] + a->b) 1452 // Otherwise the code below would parse as a lambda. 1453 case tok::plus: 1454 case tok::minus: 1455 case tok::exclaim: 1456 case tok::tilde: 1457 case tok::slash: 1458 case tok::percent: 1459 case tok::lessless: 1460 case tok::pipe: 1461 case tok::pipepipe: 1462 case tok::ampamp: 1463 case tok::caret: 1464 case tok::equalequal: 1465 case tok::exclaimequal: 1466 case tok::greaterequal: 1467 case tok::lessequal: 1468 case tok::question: 1469 case tok::colon: 1470 case tok::kw_true: 1471 case tok::kw_false: 1472 if (SeenArrow) { 1473 nextToken(); 1474 break; 1475 } 1476 return true; 1477 case tok::arrow: 1478 // This might or might not actually be a lambda arrow (this could be an 1479 // ObjC method invocation followed by a dereferencing arrow). We might 1480 // reset this back to TT_Unknown in TokenAnnotator. 1481 FormatTok->Type = TT_LambdaArrow; 1482 SeenArrow = true; 1483 nextToken(); 1484 break; 1485 default: 1486 return true; 1487 } 1488 } 1489 FormatTok->Type = TT_LambdaLBrace; 1490 LSquare.Type = TT_LambdaLSquare; 1491 parseChildBlock(); 1492 return true; 1493 } 1494 1495 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1496 const FormatToken *Previous = FormatTok->Previous; 1497 if (Previous && 1498 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1499 tok::kw_delete, tok::l_square) || 1500 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1501 Previous->isSimpleTypeSpecifier())) { 1502 nextToken(); 1503 return false; 1504 } 1505 nextToken(); 1506 if (FormatTok->is(tok::l_square)) { 1507 return false; 1508 } 1509 parseSquare(/*LambdaIntroducer=*/true); 1510 return true; 1511 } 1512 1513 void UnwrappedLineParser::tryToParseJSFunction() { 1514 assert(FormatTok->is(Keywords.kw_function) || 1515 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1516 if (FormatTok->is(Keywords.kw_async)) 1517 nextToken(); 1518 // Consume "function". 1519 nextToken(); 1520 1521 // Consume * (generator function). Treat it like C++'s overloaded operators. 1522 if (FormatTok->is(tok::star)) { 1523 FormatTok->Type = TT_OverloadedOperator; 1524 nextToken(); 1525 } 1526 1527 // Consume function name. 1528 if (FormatTok->is(tok::identifier)) 1529 nextToken(); 1530 1531 if (FormatTok->isNot(tok::l_paren)) 1532 return; 1533 1534 // Parse formal parameter list. 1535 parseParens(); 1536 1537 if (FormatTok->is(tok::colon)) { 1538 // Parse a type definition. 1539 nextToken(); 1540 1541 // Eat the type declaration. For braced inline object types, balance braces, 1542 // otherwise just parse until finding an l_brace for the function body. 1543 if (FormatTok->is(tok::l_brace)) 1544 tryToParseBracedList(); 1545 else 1546 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1547 nextToken(); 1548 } 1549 1550 if (FormatTok->is(tok::semi)) 1551 return; 1552 1553 parseChildBlock(); 1554 } 1555 1556 bool UnwrappedLineParser::tryToParseBracedList() { 1557 if (FormatTok->BlockKind == BK_Unknown) 1558 calculateBraceTypes(); 1559 assert(FormatTok->BlockKind != BK_Unknown); 1560 if (FormatTok->BlockKind == BK_Block) 1561 return false; 1562 nextToken(); 1563 parseBracedList(); 1564 return true; 1565 } 1566 1567 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1568 tok::TokenKind ClosingBraceKind) { 1569 bool HasError = false; 1570 1571 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1572 // replace this by using parseAssigmentExpression() inside. 1573 do { 1574 if (Style.Language == FormatStyle::LK_JavaScript) { 1575 if (FormatTok->is(Keywords.kw_function) || 1576 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1577 tryToParseJSFunction(); 1578 continue; 1579 } 1580 if (FormatTok->is(TT_JsFatArrow)) { 1581 nextToken(); 1582 // Fat arrows can be followed by simple expressions or by child blocks 1583 // in curly braces. 1584 if (FormatTok->is(tok::l_brace)) { 1585 parseChildBlock(); 1586 continue; 1587 } 1588 } 1589 if (FormatTok->is(tok::l_brace)) { 1590 // Could be a method inside of a braced list `{a() { return 1; }}`. 1591 if (tryToParseBracedList()) 1592 continue; 1593 parseChildBlock(); 1594 } 1595 } 1596 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1597 nextToken(); 1598 return !HasError; 1599 } 1600 switch (FormatTok->Tok.getKind()) { 1601 case tok::caret: 1602 nextToken(); 1603 if (FormatTok->is(tok::l_brace)) { 1604 parseChildBlock(); 1605 } 1606 break; 1607 case tok::l_square: 1608 tryToParseLambda(); 1609 break; 1610 case tok::l_paren: 1611 parseParens(); 1612 // JavaScript can just have free standing methods and getters/setters in 1613 // object literals. Detect them by a "{" following ")". 1614 if (Style.Language == FormatStyle::LK_JavaScript) { 1615 if (FormatTok->is(tok::l_brace)) 1616 parseChildBlock(); 1617 break; 1618 } 1619 break; 1620 case tok::l_brace: 1621 // Assume there are no blocks inside a braced init list apart 1622 // from the ones we explicitly parse out (like lambdas). 1623 FormatTok->BlockKind = BK_BracedInit; 1624 nextToken(); 1625 parseBracedList(); 1626 break; 1627 case tok::less: 1628 if (Style.Language == FormatStyle::LK_Proto) { 1629 nextToken(); 1630 parseBracedList(/*ContinueOnSemicolons=*/false, 1631 /*ClosingBraceKind=*/tok::greater); 1632 } else { 1633 nextToken(); 1634 } 1635 break; 1636 case tok::semi: 1637 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1638 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1639 // used for error recovery if we have otherwise determined that this is 1640 // a braced list. 1641 if (Style.Language == FormatStyle::LK_JavaScript) { 1642 nextToken(); 1643 break; 1644 } 1645 HasError = true; 1646 if (!ContinueOnSemicolons) 1647 return !HasError; 1648 nextToken(); 1649 break; 1650 case tok::comma: 1651 nextToken(); 1652 break; 1653 default: 1654 nextToken(); 1655 break; 1656 } 1657 } while (!eof()); 1658 return false; 1659 } 1660 1661 void UnwrappedLineParser::parseParens() { 1662 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1663 nextToken(); 1664 do { 1665 switch (FormatTok->Tok.getKind()) { 1666 case tok::l_paren: 1667 parseParens(); 1668 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1669 parseChildBlock(); 1670 break; 1671 case tok::r_paren: 1672 nextToken(); 1673 return; 1674 case tok::r_brace: 1675 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1676 return; 1677 case tok::l_square: 1678 tryToParseLambda(); 1679 break; 1680 case tok::l_brace: 1681 if (!tryToParseBracedList()) 1682 parseChildBlock(); 1683 break; 1684 case tok::at: 1685 nextToken(); 1686 if (FormatTok->Tok.is(tok::l_brace)) { 1687 nextToken(); 1688 parseBracedList(); 1689 } 1690 break; 1691 case tok::kw_class: 1692 if (Style.Language == FormatStyle::LK_JavaScript) 1693 parseRecord(/*ParseAsExpr=*/true); 1694 else 1695 nextToken(); 1696 break; 1697 case tok::identifier: 1698 if (Style.Language == FormatStyle::LK_JavaScript && 1699 (FormatTok->is(Keywords.kw_function) || 1700 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1701 tryToParseJSFunction(); 1702 else 1703 nextToken(); 1704 break; 1705 default: 1706 nextToken(); 1707 break; 1708 } 1709 } while (!eof()); 1710 } 1711 1712 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1713 if (!LambdaIntroducer) { 1714 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1715 if (tryToParseLambda()) 1716 return; 1717 } 1718 do { 1719 switch (FormatTok->Tok.getKind()) { 1720 case tok::l_paren: 1721 parseParens(); 1722 break; 1723 case tok::r_square: 1724 nextToken(); 1725 return; 1726 case tok::r_brace: 1727 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1728 return; 1729 case tok::l_square: 1730 parseSquare(); 1731 break; 1732 case tok::l_brace: { 1733 if (!tryToParseBracedList()) 1734 parseChildBlock(); 1735 break; 1736 } 1737 case tok::at: 1738 nextToken(); 1739 if (FormatTok->Tok.is(tok::l_brace)) { 1740 nextToken(); 1741 parseBracedList(); 1742 } 1743 break; 1744 default: 1745 nextToken(); 1746 break; 1747 } 1748 } while (!eof()); 1749 } 1750 1751 void UnwrappedLineParser::parseIfThenElse() { 1752 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1753 nextToken(); 1754 if (FormatTok->Tok.is(tok::kw_constexpr)) 1755 nextToken(); 1756 if (FormatTok->Tok.is(tok::l_paren)) 1757 parseParens(); 1758 bool NeedsUnwrappedLine = false; 1759 if (FormatTok->Tok.is(tok::l_brace)) { 1760 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1761 parseBlock(/*MustBeDeclaration=*/false); 1762 if (Style.BraceWrapping.BeforeElse) 1763 addUnwrappedLine(); 1764 else 1765 NeedsUnwrappedLine = true; 1766 } else { 1767 addUnwrappedLine(); 1768 ++Line->Level; 1769 parseStructuralElement(); 1770 --Line->Level; 1771 } 1772 if (FormatTok->Tok.is(tok::kw_else)) { 1773 nextToken(); 1774 if (FormatTok->Tok.is(tok::l_brace)) { 1775 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1776 parseBlock(/*MustBeDeclaration=*/false); 1777 addUnwrappedLine(); 1778 } else if (FormatTok->Tok.is(tok::kw_if)) { 1779 parseIfThenElse(); 1780 } else { 1781 addUnwrappedLine(); 1782 ++Line->Level; 1783 parseStructuralElement(); 1784 if (FormatTok->is(tok::eof)) 1785 addUnwrappedLine(); 1786 --Line->Level; 1787 } 1788 } else if (NeedsUnwrappedLine) { 1789 addUnwrappedLine(); 1790 } 1791 } 1792 1793 void UnwrappedLineParser::parseTryCatch() { 1794 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1795 nextToken(); 1796 bool NeedsUnwrappedLine = false; 1797 if (FormatTok->is(tok::colon)) { 1798 // We are in a function try block, what comes is an initializer list. 1799 nextToken(); 1800 while (FormatTok->is(tok::identifier)) { 1801 nextToken(); 1802 if (FormatTok->is(tok::l_paren)) 1803 parseParens(); 1804 if (FormatTok->is(tok::comma)) 1805 nextToken(); 1806 } 1807 } 1808 // Parse try with resource. 1809 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1810 parseParens(); 1811 } 1812 if (FormatTok->is(tok::l_brace)) { 1813 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1814 parseBlock(/*MustBeDeclaration=*/false); 1815 if (Style.BraceWrapping.BeforeCatch) { 1816 addUnwrappedLine(); 1817 } else { 1818 NeedsUnwrappedLine = true; 1819 } 1820 } else if (!FormatTok->is(tok::kw_catch)) { 1821 // The C++ standard requires a compound-statement after a try. 1822 // If there's none, we try to assume there's a structuralElement 1823 // and try to continue. 1824 addUnwrappedLine(); 1825 ++Line->Level; 1826 parseStructuralElement(); 1827 --Line->Level; 1828 } 1829 while (1) { 1830 if (FormatTok->is(tok::at)) 1831 nextToken(); 1832 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1833 tok::kw___finally) || 1834 ((Style.Language == FormatStyle::LK_Java || 1835 Style.Language == FormatStyle::LK_JavaScript) && 1836 FormatTok->is(Keywords.kw_finally)) || 1837 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1838 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1839 break; 1840 nextToken(); 1841 while (FormatTok->isNot(tok::l_brace)) { 1842 if (FormatTok->is(tok::l_paren)) { 1843 parseParens(); 1844 continue; 1845 } 1846 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1847 return; 1848 nextToken(); 1849 } 1850 NeedsUnwrappedLine = false; 1851 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1852 parseBlock(/*MustBeDeclaration=*/false); 1853 if (Style.BraceWrapping.BeforeCatch) 1854 addUnwrappedLine(); 1855 else 1856 NeedsUnwrappedLine = true; 1857 } 1858 if (NeedsUnwrappedLine) 1859 addUnwrappedLine(); 1860 } 1861 1862 void UnwrappedLineParser::parseNamespace() { 1863 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1864 1865 const FormatToken &InitialToken = *FormatTok; 1866 nextToken(); 1867 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1868 nextToken(); 1869 if (FormatTok->Tok.is(tok::l_brace)) { 1870 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1871 addUnwrappedLine(); 1872 1873 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1874 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1875 DeclarationScopeStack.size() > 1); 1876 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1877 // Munch the semicolon after a namespace. This is more common than one would 1878 // think. Puttin the semicolon into its own line is very ugly. 1879 if (FormatTok->Tok.is(tok::semi)) 1880 nextToken(); 1881 addUnwrappedLine(); 1882 } 1883 // FIXME: Add error handling. 1884 } 1885 1886 void UnwrappedLineParser::parseNew() { 1887 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1888 nextToken(); 1889 if (Style.Language != FormatStyle::LK_Java) 1890 return; 1891 1892 // In Java, we can parse everything up to the parens, which aren't optional. 1893 do { 1894 // There should not be a ;, { or } before the new's open paren. 1895 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1896 return; 1897 1898 // Consume the parens. 1899 if (FormatTok->is(tok::l_paren)) { 1900 parseParens(); 1901 1902 // If there is a class body of an anonymous class, consume that as child. 1903 if (FormatTok->is(tok::l_brace)) 1904 parseChildBlock(); 1905 return; 1906 } 1907 nextToken(); 1908 } while (!eof()); 1909 } 1910 1911 void UnwrappedLineParser::parseForOrWhileLoop() { 1912 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1913 "'for', 'while' or foreach macro expected"); 1914 nextToken(); 1915 // JS' for await ( ... 1916 if (Style.Language == FormatStyle::LK_JavaScript && 1917 FormatTok->is(Keywords.kw_await)) 1918 nextToken(); 1919 if (FormatTok->Tok.is(tok::l_paren)) 1920 parseParens(); 1921 if (FormatTok->Tok.is(tok::l_brace)) { 1922 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1923 parseBlock(/*MustBeDeclaration=*/false); 1924 addUnwrappedLine(); 1925 } else { 1926 addUnwrappedLine(); 1927 ++Line->Level; 1928 parseStructuralElement(); 1929 --Line->Level; 1930 } 1931 } 1932 1933 void UnwrappedLineParser::parseDoWhile() { 1934 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1935 nextToken(); 1936 if (FormatTok->Tok.is(tok::l_brace)) { 1937 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1938 parseBlock(/*MustBeDeclaration=*/false); 1939 if (Style.BraceWrapping.IndentBraces) 1940 addUnwrappedLine(); 1941 } else { 1942 addUnwrappedLine(); 1943 ++Line->Level; 1944 parseStructuralElement(); 1945 --Line->Level; 1946 } 1947 1948 // FIXME: Add error handling. 1949 if (!FormatTok->Tok.is(tok::kw_while)) { 1950 addUnwrappedLine(); 1951 return; 1952 } 1953 1954 nextToken(); 1955 parseStructuralElement(); 1956 } 1957 1958 void UnwrappedLineParser::parseLabel() { 1959 nextToken(); 1960 unsigned OldLineLevel = Line->Level; 1961 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1962 --Line->Level; 1963 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1964 CompoundStatementIndenter Indenter(this, Line->Level, 1965 Style.BraceWrapping.AfterCaseLabel, 1966 Style.BraceWrapping.IndentBraces); 1967 parseBlock(/*MustBeDeclaration=*/false); 1968 if (FormatTok->Tok.is(tok::kw_break)) { 1969 if (Style.BraceWrapping.AfterControlStatement) 1970 addUnwrappedLine(); 1971 parseStructuralElement(); 1972 } 1973 addUnwrappedLine(); 1974 } else { 1975 if (FormatTok->is(tok::semi)) 1976 nextToken(); 1977 addUnwrappedLine(); 1978 } 1979 Line->Level = OldLineLevel; 1980 if (FormatTok->isNot(tok::l_brace)) { 1981 parseStructuralElement(); 1982 addUnwrappedLine(); 1983 } 1984 } 1985 1986 void UnwrappedLineParser::parseCaseLabel() { 1987 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1988 // FIXME: fix handling of complex expressions here. 1989 do { 1990 nextToken(); 1991 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1992 parseLabel(); 1993 } 1994 1995 void UnwrappedLineParser::parseSwitch() { 1996 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1997 nextToken(); 1998 if (FormatTok->Tok.is(tok::l_paren)) 1999 parseParens(); 2000 if (FormatTok->Tok.is(tok::l_brace)) { 2001 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2002 parseBlock(/*MustBeDeclaration=*/false); 2003 addUnwrappedLine(); 2004 } else { 2005 addUnwrappedLine(); 2006 ++Line->Level; 2007 parseStructuralElement(); 2008 --Line->Level; 2009 } 2010 } 2011 2012 void UnwrappedLineParser::parseAccessSpecifier() { 2013 nextToken(); 2014 // Understand Qt's slots. 2015 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2016 nextToken(); 2017 // Otherwise, we don't know what it is, and we'd better keep the next token. 2018 if (FormatTok->Tok.is(tok::colon)) 2019 nextToken(); 2020 addUnwrappedLine(); 2021 } 2022 2023 bool UnwrappedLineParser::parseEnum() { 2024 // Won't be 'enum' for NS_ENUMs. 2025 if (FormatTok->Tok.is(tok::kw_enum)) 2026 nextToken(); 2027 2028 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2029 // declarations. An "enum" keyword followed by a colon would be a syntax 2030 // error and thus assume it is just an identifier. 2031 if (Style.Language == FormatStyle::LK_JavaScript && 2032 FormatTok->isOneOf(tok::colon, tok::question)) 2033 return false; 2034 2035 // In protobuf, "enum" can be used as a field name. 2036 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2037 return false; 2038 2039 // Eat up enum class ... 2040 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2041 nextToken(); 2042 2043 while (FormatTok->Tok.getIdentifierInfo() || 2044 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2045 tok::greater, tok::comma, tok::question)) { 2046 nextToken(); 2047 // We can have macros or attributes in between 'enum' and the enum name. 2048 if (FormatTok->is(tok::l_paren)) 2049 parseParens(); 2050 if (FormatTok->is(tok::identifier)) { 2051 nextToken(); 2052 // If there are two identifiers in a row, this is likely an elaborate 2053 // return type. In Java, this can be "implements", etc. 2054 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2055 return false; 2056 } 2057 } 2058 2059 // Just a declaration or something is wrong. 2060 if (FormatTok->isNot(tok::l_brace)) 2061 return true; 2062 FormatTok->BlockKind = BK_Block; 2063 2064 if (Style.Language == FormatStyle::LK_Java) { 2065 // Java enums are different. 2066 parseJavaEnumBody(); 2067 return true; 2068 } 2069 if (Style.Language == FormatStyle::LK_Proto) { 2070 parseBlock(/*MustBeDeclaration=*/true); 2071 return true; 2072 } 2073 2074 // Parse enum body. 2075 nextToken(); 2076 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2077 if (HasError) { 2078 if (FormatTok->is(tok::semi)) 2079 nextToken(); 2080 addUnwrappedLine(); 2081 } 2082 return true; 2083 2084 // There is no addUnwrappedLine() here so that we fall through to parsing a 2085 // structural element afterwards. Thus, in "enum A {} n, m;", 2086 // "} n, m;" will end up in one unwrapped line. 2087 } 2088 2089 void UnwrappedLineParser::parseJavaEnumBody() { 2090 // Determine whether the enum is simple, i.e. does not have a semicolon or 2091 // constants with class bodies. Simple enums can be formatted like braced 2092 // lists, contracted to a single line, etc. 2093 unsigned StoredPosition = Tokens->getPosition(); 2094 bool IsSimple = true; 2095 FormatToken *Tok = Tokens->getNextToken(); 2096 while (Tok) { 2097 if (Tok->is(tok::r_brace)) 2098 break; 2099 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2100 IsSimple = false; 2101 break; 2102 } 2103 // FIXME: This will also mark enums with braces in the arguments to enum 2104 // constants as "not simple". This is probably fine in practice, though. 2105 Tok = Tokens->getNextToken(); 2106 } 2107 FormatTok = Tokens->setPosition(StoredPosition); 2108 2109 if (IsSimple) { 2110 nextToken(); 2111 parseBracedList(); 2112 addUnwrappedLine(); 2113 return; 2114 } 2115 2116 // Parse the body of a more complex enum. 2117 // First add a line for everything up to the "{". 2118 nextToken(); 2119 addUnwrappedLine(); 2120 ++Line->Level; 2121 2122 // Parse the enum constants. 2123 while (FormatTok) { 2124 if (FormatTok->is(tok::l_brace)) { 2125 // Parse the constant's class body. 2126 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2127 /*MunchSemi=*/false); 2128 } else if (FormatTok->is(tok::l_paren)) { 2129 parseParens(); 2130 } else if (FormatTok->is(tok::comma)) { 2131 nextToken(); 2132 addUnwrappedLine(); 2133 } else if (FormatTok->is(tok::semi)) { 2134 nextToken(); 2135 addUnwrappedLine(); 2136 break; 2137 } else if (FormatTok->is(tok::r_brace)) { 2138 addUnwrappedLine(); 2139 break; 2140 } else { 2141 nextToken(); 2142 } 2143 } 2144 2145 // Parse the class body after the enum's ";" if any. 2146 parseLevel(/*HasOpeningBrace=*/true); 2147 nextToken(); 2148 --Line->Level; 2149 addUnwrappedLine(); 2150 } 2151 2152 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2153 const FormatToken &InitialToken = *FormatTok; 2154 nextToken(); 2155 2156 // The actual identifier can be a nested name specifier, and in macros 2157 // it is often token-pasted. 2158 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2159 tok::kw___attribute, tok::kw___declspec, 2160 tok::kw_alignas) || 2161 ((Style.Language == FormatStyle::LK_Java || 2162 Style.Language == FormatStyle::LK_JavaScript) && 2163 FormatTok->isOneOf(tok::period, tok::comma))) { 2164 if (Style.Language == FormatStyle::LK_JavaScript && 2165 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2166 // JavaScript/TypeScript supports inline object types in 2167 // extends/implements positions: 2168 // class Foo implements {bar: number} { } 2169 nextToken(); 2170 if (FormatTok->is(tok::l_brace)) { 2171 tryToParseBracedList(); 2172 continue; 2173 } 2174 } 2175 bool IsNonMacroIdentifier = 2176 FormatTok->is(tok::identifier) && 2177 FormatTok->TokenText != FormatTok->TokenText.upper(); 2178 nextToken(); 2179 // We can have macros or attributes in between 'class' and the class name. 2180 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2181 parseParens(); 2182 } 2183 2184 // Note that parsing away template declarations here leads to incorrectly 2185 // accepting function declarations as record declarations. 2186 // In general, we cannot solve this problem. Consider: 2187 // class A<int> B() {} 2188 // which can be a function definition or a class definition when B() is a 2189 // macro. If we find enough real-world cases where this is a problem, we 2190 // can parse for the 'template' keyword in the beginning of the statement, 2191 // and thus rule out the record production in case there is no template 2192 // (this would still leave us with an ambiguity between template function 2193 // and class declarations). 2194 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2195 while (!eof()) { 2196 if (FormatTok->is(tok::l_brace)) { 2197 calculateBraceTypes(/*ExpectClassBody=*/true); 2198 if (!tryToParseBracedList()) 2199 break; 2200 } 2201 if (FormatTok->Tok.is(tok::semi)) 2202 return; 2203 nextToken(); 2204 } 2205 } 2206 if (FormatTok->Tok.is(tok::l_brace)) { 2207 if (ParseAsExpr) { 2208 parseChildBlock(); 2209 } else { 2210 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2211 addUnwrappedLine(); 2212 2213 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2214 /*MunchSemi=*/false); 2215 } 2216 } 2217 // There is no addUnwrappedLine() here so that we fall through to parsing a 2218 // structural element afterwards. Thus, in "class A {} n, m;", 2219 // "} n, m;" will end up in one unwrapped line. 2220 } 2221 2222 void UnwrappedLineParser::parseObjCMethod() { 2223 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2224 "'(' or identifier expected."); 2225 do { 2226 if (FormatTok->Tok.is(tok::semi)) { 2227 nextToken(); 2228 addUnwrappedLine(); 2229 return; 2230 } else if (FormatTok->Tok.is(tok::l_brace)) { 2231 if (Style.BraceWrapping.AfterFunction) 2232 addUnwrappedLine(); 2233 parseBlock(/*MustBeDeclaration=*/false); 2234 addUnwrappedLine(); 2235 return; 2236 } else { 2237 nextToken(); 2238 } 2239 } while (!eof()); 2240 } 2241 2242 void UnwrappedLineParser::parseObjCProtocolList() { 2243 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2244 do { 2245 nextToken(); 2246 // Early exit in case someone forgot a close angle. 2247 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2248 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2249 return; 2250 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2251 nextToken(); // Skip '>'. 2252 } 2253 2254 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2255 do { 2256 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2257 nextToken(); 2258 addUnwrappedLine(); 2259 break; 2260 } 2261 if (FormatTok->is(tok::l_brace)) { 2262 parseBlock(/*MustBeDeclaration=*/false); 2263 // In ObjC interfaces, nothing should be following the "}". 2264 addUnwrappedLine(); 2265 } else if (FormatTok->is(tok::r_brace)) { 2266 // Ignore stray "}". parseStructuralElement doesn't consume them. 2267 nextToken(); 2268 addUnwrappedLine(); 2269 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2270 nextToken(); 2271 parseObjCMethod(); 2272 } else { 2273 parseStructuralElement(); 2274 } 2275 } while (!eof()); 2276 } 2277 2278 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2279 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2280 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2281 nextToken(); 2282 nextToken(); // interface name 2283 2284 // @interface can be followed by a lightweight generic 2285 // specialization list, then either a base class or a category. 2286 if (FormatTok->Tok.is(tok::less)) { 2287 // Unlike protocol lists, generic parameterizations support 2288 // nested angles: 2289 // 2290 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2291 // NSObject <NSCopying, NSSecureCoding> 2292 // 2293 // so we need to count how many open angles we have left. 2294 unsigned NumOpenAngles = 1; 2295 do { 2296 nextToken(); 2297 // Early exit in case someone forgot a close angle. 2298 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2299 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2300 break; 2301 if (FormatTok->Tok.is(tok::less)) 2302 ++NumOpenAngles; 2303 else if (FormatTok->Tok.is(tok::greater)) { 2304 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2305 --NumOpenAngles; 2306 } 2307 } while (!eof() && NumOpenAngles != 0); 2308 nextToken(); // Skip '>'. 2309 } 2310 if (FormatTok->Tok.is(tok::colon)) { 2311 nextToken(); 2312 nextToken(); // base class name 2313 } else if (FormatTok->Tok.is(tok::l_paren)) 2314 // Skip category, if present. 2315 parseParens(); 2316 2317 if (FormatTok->Tok.is(tok::less)) 2318 parseObjCProtocolList(); 2319 2320 if (FormatTok->Tok.is(tok::l_brace)) { 2321 if (Style.BraceWrapping.AfterObjCDeclaration) 2322 addUnwrappedLine(); 2323 parseBlock(/*MustBeDeclaration=*/true); 2324 } 2325 2326 // With instance variables, this puts '}' on its own line. Without instance 2327 // variables, this ends the @interface line. 2328 addUnwrappedLine(); 2329 2330 parseObjCUntilAtEnd(); 2331 } 2332 2333 // Returns true for the declaration/definition form of @protocol, 2334 // false for the expression form. 2335 bool UnwrappedLineParser::parseObjCProtocol() { 2336 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2337 nextToken(); 2338 2339 if (FormatTok->is(tok::l_paren)) 2340 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2341 return false; 2342 2343 // The definition/declaration form, 2344 // @protocol Foo 2345 // - (int)someMethod; 2346 // @end 2347 2348 nextToken(); // protocol name 2349 2350 if (FormatTok->Tok.is(tok::less)) 2351 parseObjCProtocolList(); 2352 2353 // Check for protocol declaration. 2354 if (FormatTok->Tok.is(tok::semi)) { 2355 nextToken(); 2356 addUnwrappedLine(); 2357 return true; 2358 } 2359 2360 addUnwrappedLine(); 2361 parseObjCUntilAtEnd(); 2362 return true; 2363 } 2364 2365 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2366 bool IsImport = FormatTok->is(Keywords.kw_import); 2367 assert(IsImport || FormatTok->is(tok::kw_export)); 2368 nextToken(); 2369 2370 // Consume the "default" in "export default class/function". 2371 if (FormatTok->is(tok::kw_default)) 2372 nextToken(); 2373 2374 // Consume "async function", "function" and "default function", so that these 2375 // get parsed as free-standing JS functions, i.e. do not require a trailing 2376 // semicolon. 2377 if (FormatTok->is(Keywords.kw_async)) 2378 nextToken(); 2379 if (FormatTok->is(Keywords.kw_function)) { 2380 nextToken(); 2381 return; 2382 } 2383 2384 // For imports, `export *`, `export {...}`, consume the rest of the line up 2385 // to the terminating `;`. For everything else, just return and continue 2386 // parsing the structural element, i.e. the declaration or expression for 2387 // `export default`. 2388 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2389 !FormatTok->isStringLiteral()) 2390 return; 2391 2392 while (!eof()) { 2393 if (FormatTok->is(tok::semi)) 2394 return; 2395 if (Line->Tokens.empty()) { 2396 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2397 // import statement should terminate. 2398 return; 2399 } 2400 if (FormatTok->is(tok::l_brace)) { 2401 FormatTok->BlockKind = BK_Block; 2402 nextToken(); 2403 parseBracedList(); 2404 } else { 2405 nextToken(); 2406 } 2407 } 2408 } 2409 2410 void UnwrappedLineParser::parseStatementMacro() { 2411 nextToken(); 2412 if (FormatTok->is(tok::l_paren)) 2413 parseParens(); 2414 if (FormatTok->is(tok::semi)) 2415 nextToken(); 2416 addUnwrappedLine(); 2417 } 2418 2419 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2420 StringRef Prefix = "") { 2421 llvm::dbgs() << Prefix << "Line(" << Line.Level 2422 << ", FSC=" << Line.FirstStartColumn << ")" 2423 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2424 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2425 E = Line.Tokens.end(); 2426 I != E; ++I) { 2427 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2428 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2429 << "] "; 2430 } 2431 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2432 E = Line.Tokens.end(); 2433 I != E; ++I) { 2434 const UnwrappedLineNode &Node = *I; 2435 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2436 I = Node.Children.begin(), 2437 E = Node.Children.end(); 2438 I != E; ++I) { 2439 printDebugInfo(*I, "\nChild: "); 2440 } 2441 } 2442 llvm::dbgs() << "\n"; 2443 } 2444 2445 void UnwrappedLineParser::addUnwrappedLine() { 2446 if (Line->Tokens.empty()) 2447 return; 2448 LLVM_DEBUG({ 2449 if (CurrentLines == &Lines) 2450 printDebugInfo(*Line); 2451 }); 2452 CurrentLines->push_back(std::move(*Line)); 2453 Line->Tokens.clear(); 2454 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2455 Line->FirstStartColumn = 0; 2456 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2457 CurrentLines->append( 2458 std::make_move_iterator(PreprocessorDirectives.begin()), 2459 std::make_move_iterator(PreprocessorDirectives.end())); 2460 PreprocessorDirectives.clear(); 2461 } 2462 // Disconnect the current token from the last token on the previous line. 2463 FormatTok->Previous = nullptr; 2464 } 2465 2466 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2467 2468 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2469 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2470 FormatTok.NewlinesBefore > 0; 2471 } 2472 2473 // Checks if \p FormatTok is a line comment that continues the line comment 2474 // section on \p Line. 2475 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2476 const UnwrappedLine &Line, 2477 llvm::Regex &CommentPragmasRegex) { 2478 if (Line.Tokens.empty()) 2479 return false; 2480 2481 StringRef IndentContent = FormatTok.TokenText; 2482 if (FormatTok.TokenText.startswith("//") || 2483 FormatTok.TokenText.startswith("/*")) 2484 IndentContent = FormatTok.TokenText.substr(2); 2485 if (CommentPragmasRegex.match(IndentContent)) 2486 return false; 2487 2488 // If Line starts with a line comment, then FormatTok continues the comment 2489 // section if its original column is greater or equal to the original start 2490 // column of the line. 2491 // 2492 // Define the min column token of a line as follows: if a line ends in '{' or 2493 // contains a '{' followed by a line comment, then the min column token is 2494 // that '{'. Otherwise, the min column token of the line is the first token of 2495 // the line. 2496 // 2497 // If Line starts with a token other than a line comment, then FormatTok 2498 // continues the comment section if its original column is greater than the 2499 // original start column of the min column token of the line. 2500 // 2501 // For example, the second line comment continues the first in these cases: 2502 // 2503 // // first line 2504 // // second line 2505 // 2506 // and: 2507 // 2508 // // first line 2509 // // second line 2510 // 2511 // and: 2512 // 2513 // int i; // first line 2514 // // second line 2515 // 2516 // and: 2517 // 2518 // do { // first line 2519 // // second line 2520 // int i; 2521 // } while (true); 2522 // 2523 // and: 2524 // 2525 // enum { 2526 // a, // first line 2527 // // second line 2528 // b 2529 // }; 2530 // 2531 // The second line comment doesn't continue the first in these cases: 2532 // 2533 // // first line 2534 // // second line 2535 // 2536 // and: 2537 // 2538 // int i; // first line 2539 // // second line 2540 // 2541 // and: 2542 // 2543 // do { // first line 2544 // // second line 2545 // int i; 2546 // } while (true); 2547 // 2548 // and: 2549 // 2550 // enum { 2551 // a, // first line 2552 // // second line 2553 // }; 2554 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2555 2556 // Scan for '{//'. If found, use the column of '{' as a min column for line 2557 // comment section continuation. 2558 const FormatToken *PreviousToken = nullptr; 2559 for (const UnwrappedLineNode &Node : Line.Tokens) { 2560 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2561 isLineComment(*Node.Tok)) { 2562 MinColumnToken = PreviousToken; 2563 break; 2564 } 2565 PreviousToken = Node.Tok; 2566 2567 // Grab the last newline preceding a token in this unwrapped line. 2568 if (Node.Tok->NewlinesBefore > 0) { 2569 MinColumnToken = Node.Tok; 2570 } 2571 } 2572 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2573 MinColumnToken = PreviousToken; 2574 } 2575 2576 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2577 MinColumnToken); 2578 } 2579 2580 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2581 bool JustComments = Line->Tokens.empty(); 2582 for (SmallVectorImpl<FormatToken *>::const_iterator 2583 I = CommentsBeforeNextToken.begin(), 2584 E = CommentsBeforeNextToken.end(); 2585 I != E; ++I) { 2586 // Line comments that belong to the same line comment section are put on the 2587 // same line since later we might want to reflow content between them. 2588 // Additional fine-grained breaking of line comment sections is controlled 2589 // by the class BreakableLineCommentSection in case it is desirable to keep 2590 // several line comment sections in the same unwrapped line. 2591 // 2592 // FIXME: Consider putting separate line comment sections as children to the 2593 // unwrapped line instead. 2594 (*I)->ContinuesLineCommentSection = 2595 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2596 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2597 addUnwrappedLine(); 2598 pushToken(*I); 2599 } 2600 if (NewlineBeforeNext && JustComments) 2601 addUnwrappedLine(); 2602 CommentsBeforeNextToken.clear(); 2603 } 2604 2605 void UnwrappedLineParser::nextToken(int LevelDifference) { 2606 if (eof()) 2607 return; 2608 flushComments(isOnNewLine(*FormatTok)); 2609 pushToken(FormatTok); 2610 FormatToken *Previous = FormatTok; 2611 if (Style.Language != FormatStyle::LK_JavaScript) 2612 readToken(LevelDifference); 2613 else 2614 readTokenWithJavaScriptASI(); 2615 FormatTok->Previous = Previous; 2616 } 2617 2618 void UnwrappedLineParser::distributeComments( 2619 const SmallVectorImpl<FormatToken *> &Comments, 2620 const FormatToken *NextTok) { 2621 // Whether or not a line comment token continues a line is controlled by 2622 // the method continuesLineCommentSection, with the following caveat: 2623 // 2624 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2625 // that each comment line from the trail is aligned with the next token, if 2626 // the next token exists. If a trail exists, the beginning of the maximal 2627 // trail is marked as a start of a new comment section. 2628 // 2629 // For example in this code: 2630 // 2631 // int a; // line about a 2632 // // line 1 about b 2633 // // line 2 about b 2634 // int b; 2635 // 2636 // the two lines about b form a maximal trail, so there are two sections, the 2637 // first one consisting of the single comment "// line about a" and the 2638 // second one consisting of the next two comments. 2639 if (Comments.empty()) 2640 return; 2641 bool ShouldPushCommentsInCurrentLine = true; 2642 bool HasTrailAlignedWithNextToken = false; 2643 unsigned StartOfTrailAlignedWithNextToken = 0; 2644 if (NextTok) { 2645 // We are skipping the first element intentionally. 2646 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2647 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2648 HasTrailAlignedWithNextToken = true; 2649 StartOfTrailAlignedWithNextToken = i; 2650 } 2651 } 2652 } 2653 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2654 FormatToken *FormatTok = Comments[i]; 2655 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2656 FormatTok->ContinuesLineCommentSection = false; 2657 } else { 2658 FormatTok->ContinuesLineCommentSection = 2659 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2660 } 2661 if (!FormatTok->ContinuesLineCommentSection && 2662 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2663 ShouldPushCommentsInCurrentLine = false; 2664 } 2665 if (ShouldPushCommentsInCurrentLine) { 2666 pushToken(FormatTok); 2667 } else { 2668 CommentsBeforeNextToken.push_back(FormatTok); 2669 } 2670 } 2671 } 2672 2673 void UnwrappedLineParser::readToken(int LevelDifference) { 2674 SmallVector<FormatToken *, 1> Comments; 2675 do { 2676 FormatTok = Tokens->getNextToken(); 2677 assert(FormatTok); 2678 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2679 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2680 distributeComments(Comments, FormatTok); 2681 Comments.clear(); 2682 // If there is an unfinished unwrapped line, we flush the preprocessor 2683 // directives only after that unwrapped line was finished later. 2684 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2685 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2686 assert((LevelDifference >= 0 || 2687 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2688 "LevelDifference makes Line->Level negative"); 2689 Line->Level += LevelDifference; 2690 // Comments stored before the preprocessor directive need to be output 2691 // before the preprocessor directive, at the same level as the 2692 // preprocessor directive, as we consider them to apply to the directive. 2693 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 2694 PPBranchLevel > 0) 2695 Line->Level += PPBranchLevel; 2696 flushComments(isOnNewLine(*FormatTok)); 2697 parsePPDirective(); 2698 } 2699 while (FormatTok->Type == TT_ConflictStart || 2700 FormatTok->Type == TT_ConflictEnd || 2701 FormatTok->Type == TT_ConflictAlternative) { 2702 if (FormatTok->Type == TT_ConflictStart) { 2703 conditionalCompilationStart(/*Unreachable=*/false); 2704 } else if (FormatTok->Type == TT_ConflictAlternative) { 2705 conditionalCompilationAlternative(); 2706 } else if (FormatTok->Type == TT_ConflictEnd) { 2707 conditionalCompilationEnd(); 2708 } 2709 FormatTok = Tokens->getNextToken(); 2710 FormatTok->MustBreakBefore = true; 2711 } 2712 2713 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2714 !Line->InPPDirective) { 2715 continue; 2716 } 2717 2718 if (!FormatTok->Tok.is(tok::comment)) { 2719 distributeComments(Comments, FormatTok); 2720 Comments.clear(); 2721 return; 2722 } 2723 2724 Comments.push_back(FormatTok); 2725 } while (!eof()); 2726 2727 distributeComments(Comments, nullptr); 2728 Comments.clear(); 2729 } 2730 2731 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2732 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2733 if (MustBreakBeforeNextToken) { 2734 Line->Tokens.back().Tok->MustBreakBefore = true; 2735 MustBreakBeforeNextToken = false; 2736 } 2737 } 2738 2739 } // end namespace format 2740 } // end namespace clang 2741