1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : CompoundStatementIndenter(Parser, LineLevel, 176 Style.BraceWrapping.AfterControlStatement, 177 Style.BraceWrapping.IndentBraces) { 178 } 179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 180 bool WrapBrace, bool IndentBrace) 181 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 182 if (WrapBrace) 183 Parser->addUnwrappedLine(); 184 if (IndentBrace) 185 ++LineLevel; 186 } 187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 188 189 private: 190 unsigned &LineLevel; 191 unsigned OldLineLevel; 192 }; 193 194 namespace { 195 196 class IndexedTokenSource : public FormatTokenSource { 197 public: 198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 199 : Tokens(Tokens), Position(-1) {} 200 201 FormatToken *getNextToken() override { 202 ++Position; 203 return Tokens[Position]; 204 } 205 206 unsigned getPosition() override { 207 assert(Position >= 0); 208 return Position; 209 } 210 211 FormatToken *setPosition(unsigned P) override { 212 Position = P; 213 return Tokens[Position]; 214 } 215 216 void reset() { Position = -1; } 217 218 private: 219 ArrayRef<FormatToken *> Tokens; 220 int Position; 221 }; 222 223 } // end anonymous namespace 224 225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 226 const AdditionalKeywords &Keywords, 227 unsigned FirstStartColumn, 228 ArrayRef<FormatToken *> Tokens, 229 UnwrappedLineConsumer &Callback) 230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 231 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 235 ? IG_Rejected 236 : IG_Inited), 237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 238 239 void UnwrappedLineParser::reset() { 240 PPBranchLevel = -1; 241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 242 ? IG_Rejected 243 : IG_Inited; 244 IncludeGuardToken = nullptr; 245 Line.reset(new UnwrappedLine); 246 CommentsBeforeNextToken.clear(); 247 FormatTok = nullptr; 248 MustBreakBeforeNextToken = false; 249 PreprocessorDirectives.clear(); 250 CurrentLines = &Lines; 251 DeclarationScopeStack.clear(); 252 PPStack.clear(); 253 Line->FirstStartColumn = FirstStartColumn; 254 } 255 256 void UnwrappedLineParser::parse() { 257 IndexedTokenSource TokenSource(AllTokens); 258 Line->FirstStartColumn = FirstStartColumn; 259 do { 260 LLVM_DEBUG(llvm::dbgs() << "----\n"); 261 reset(); 262 Tokens = &TokenSource; 263 TokenSource.reset(); 264 265 readToken(); 266 parseFile(); 267 268 // If we found an include guard then all preprocessor directives (other than 269 // the guard) are over-indented by one. 270 if (IncludeGuard == IG_Found) 271 for (auto &Line : Lines) 272 if (Line.InPPDirective && Line.Level > 0) 273 --Line.Level; 274 275 // Create line with eof token. 276 pushToken(FormatTok); 277 addUnwrappedLine(); 278 279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 280 E = Lines.end(); 281 I != E; ++I) { 282 Callback.consumeUnwrappedLine(*I); 283 } 284 Callback.finishRun(); 285 Lines.clear(); 286 while (!PPLevelBranchIndex.empty() && 287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 290 } 291 if (!PPLevelBranchIndex.empty()) { 292 ++PPLevelBranchIndex.back(); 293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 295 } 296 } while (!PPLevelBranchIndex.empty()); 297 } 298 299 void UnwrappedLineParser::parseFile() { 300 // The top-level context in a file always has declarations, except for pre- 301 // processor directives and JavaScript files. 302 bool MustBeDeclaration = 303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 305 MustBeDeclaration); 306 if (Style.Language == FormatStyle::LK_TextProto) 307 parseBracedList(); 308 else 309 parseLevel(/*HasOpeningBrace=*/false); 310 // Make sure to format the remaining tokens. 311 // 312 // LK_TextProto is special since its top-level is parsed as the body of a 313 // braced list, which does not necessarily have natural line separators such 314 // as a semicolon. Comments after the last entry that have been determined to 315 // not belong to that line, as in: 316 // key: value 317 // // endfile comment 318 // do not have a chance to be put on a line of their own until this point. 319 // Here we add this newline before end-of-file comments. 320 if (Style.Language == FormatStyle::LK_TextProto && 321 !CommentsBeforeNextToken.empty()) 322 addUnwrappedLine(); 323 flushComments(true); 324 addUnwrappedLine(); 325 } 326 327 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 328 bool SwitchLabelEncountered = false; 329 do { 330 tok::TokenKind kind = FormatTok->Tok.getKind(); 331 if (FormatTok->Type == TT_MacroBlockBegin) { 332 kind = tok::l_brace; 333 } else if (FormatTok->Type == TT_MacroBlockEnd) { 334 kind = tok::r_brace; 335 } 336 337 switch (kind) { 338 case tok::comment: 339 nextToken(); 340 addUnwrappedLine(); 341 break; 342 case tok::l_brace: 343 // FIXME: Add parameter whether this can happen - if this happens, we must 344 // be in a non-declaration context. 345 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 346 continue; 347 parseBlock(/*MustBeDeclaration=*/false); 348 addUnwrappedLine(); 349 break; 350 case tok::r_brace: 351 if (HasOpeningBrace) 352 return; 353 nextToken(); 354 addUnwrappedLine(); 355 break; 356 case tok::kw_default: { 357 unsigned StoredPosition = Tokens->getPosition(); 358 FormatToken *Next; 359 do { 360 Next = Tokens->getNextToken(); 361 } while (Next && Next->is(tok::comment)); 362 FormatTok = Tokens->setPosition(StoredPosition); 363 if (Next && Next->isNot(tok::colon)) { 364 // default not followed by ':' is not a case label; treat it like 365 // an identifier. 366 parseStructuralElement(); 367 break; 368 } 369 // Else, if it is 'default:', fall through to the case handling. 370 LLVM_FALLTHROUGH; 371 } 372 case tok::kw_case: 373 if (Style.Language == FormatStyle::LK_JavaScript && 374 Line->MustBeDeclaration) { 375 // A 'case: string' style field declaration. 376 parseStructuralElement(); 377 break; 378 } 379 if (!SwitchLabelEncountered && 380 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 381 ++Line->Level; 382 SwitchLabelEncountered = true; 383 parseStructuralElement(); 384 break; 385 default: 386 parseStructuralElement(); 387 break; 388 } 389 } while (!eof()); 390 } 391 392 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 393 // We'll parse forward through the tokens until we hit 394 // a closing brace or eof - note that getNextToken() will 395 // parse macros, so this will magically work inside macro 396 // definitions, too. 397 unsigned StoredPosition = Tokens->getPosition(); 398 FormatToken *Tok = FormatTok; 399 const FormatToken *PrevTok = Tok->Previous; 400 // Keep a stack of positions of lbrace tokens. We will 401 // update information about whether an lbrace starts a 402 // braced init list or a different block during the loop. 403 SmallVector<FormatToken *, 8> LBraceStack; 404 assert(Tok->Tok.is(tok::l_brace)); 405 do { 406 // Get next non-comment token. 407 FormatToken *NextTok; 408 unsigned ReadTokens = 0; 409 do { 410 NextTok = Tokens->getNextToken(); 411 ++ReadTokens; 412 } while (NextTok->is(tok::comment)); 413 414 switch (Tok->Tok.getKind()) { 415 case tok::l_brace: 416 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 417 if (PrevTok->isOneOf(tok::colon, tok::less)) 418 // A ':' indicates this code is in a type, or a braced list 419 // following a label in an object literal ({a: {b: 1}}). 420 // A '<' could be an object used in a comparison, but that is nonsense 421 // code (can never return true), so more likely it is a generic type 422 // argument (`X<{a: string; b: number}>`). 423 // The code below could be confused by semicolons between the 424 // individual members in a type member list, which would normally 425 // trigger BK_Block. In both cases, this must be parsed as an inline 426 // braced init. 427 Tok->BlockKind = BK_BracedInit; 428 else if (PrevTok->is(tok::r_paren)) 429 // `) { }` can only occur in function or method declarations in JS. 430 Tok->BlockKind = BK_Block; 431 } else { 432 Tok->BlockKind = BK_Unknown; 433 } 434 LBraceStack.push_back(Tok); 435 break; 436 case tok::r_brace: 437 if (LBraceStack.empty()) 438 break; 439 if (LBraceStack.back()->BlockKind == BK_Unknown) { 440 bool ProbablyBracedList = false; 441 if (Style.Language == FormatStyle::LK_Proto) { 442 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 443 } else { 444 // Using OriginalColumn to distinguish between ObjC methods and 445 // binary operators is a bit hacky. 446 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 447 NextTok->OriginalColumn == 0; 448 449 // If there is a comma, semicolon or right paren after the closing 450 // brace, we assume this is a braced initializer list. Note that 451 // regardless how we mark inner braces here, we will overwrite the 452 // BlockKind later if we parse a braced list (where all blocks 453 // inside are by default braced lists), or when we explicitly detect 454 // blocks (for example while parsing lambdas). 455 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 456 // braced list in JS. 457 ProbablyBracedList = 458 (Style.Language == FormatStyle::LK_JavaScript && 459 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 460 Keywords.kw_as)) || 461 (Style.isCpp() && NextTok->is(tok::l_paren)) || 462 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 463 tok::r_paren, tok::r_square, tok::l_brace, 464 tok::ellipsis) || 465 (NextTok->is(tok::identifier) && 466 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 467 (NextTok->is(tok::semi) && 468 (!ExpectClassBody || LBraceStack.size() != 1)) || 469 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 470 if (NextTok->is(tok::l_square)) { 471 // We can have an array subscript after a braced init 472 // list, but C++11 attributes are expected after blocks. 473 NextTok = Tokens->getNextToken(); 474 ++ReadTokens; 475 ProbablyBracedList = NextTok->isNot(tok::l_square); 476 } 477 } 478 if (ProbablyBracedList) { 479 Tok->BlockKind = BK_BracedInit; 480 LBraceStack.back()->BlockKind = BK_BracedInit; 481 } else { 482 Tok->BlockKind = BK_Block; 483 LBraceStack.back()->BlockKind = BK_Block; 484 } 485 } 486 LBraceStack.pop_back(); 487 break; 488 case tok::identifier: 489 if (!Tok->is(TT_StatementMacro)) 490 break; 491 LLVM_FALLTHROUGH; 492 case tok::at: 493 case tok::semi: 494 case tok::kw_if: 495 case tok::kw_while: 496 case tok::kw_for: 497 case tok::kw_switch: 498 case tok::kw_try: 499 case tok::kw___try: 500 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 501 LBraceStack.back()->BlockKind = BK_Block; 502 break; 503 default: 504 break; 505 } 506 PrevTok = Tok; 507 Tok = NextTok; 508 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 509 510 // Assume other blocks for all unclosed opening braces. 511 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 512 if (LBraceStack[i]->BlockKind == BK_Unknown) 513 LBraceStack[i]->BlockKind = BK_Block; 514 } 515 516 FormatTok = Tokens->setPosition(StoredPosition); 517 } 518 519 template <class T> 520 static inline void hash_combine(std::size_t &seed, const T &v) { 521 std::hash<T> hasher; 522 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 523 } 524 525 size_t UnwrappedLineParser::computePPHash() const { 526 size_t h = 0; 527 for (const auto &i : PPStack) { 528 hash_combine(h, size_t(i.Kind)); 529 hash_combine(h, i.Line); 530 } 531 return h; 532 } 533 534 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 535 bool MunchSemi) { 536 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 537 "'{' or macro block token expected"); 538 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 539 FormatTok->BlockKind = BK_Block; 540 541 size_t PPStartHash = computePPHash(); 542 543 unsigned InitialLevel = Line->Level; 544 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 545 546 if (MacroBlock && FormatTok->is(tok::l_paren)) 547 parseParens(); 548 549 size_t NbPreprocessorDirectives = 550 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 551 addUnwrappedLine(); 552 size_t OpeningLineIndex = 553 CurrentLines->empty() 554 ? (UnwrappedLine::kInvalidIndex) 555 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 556 557 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 558 MustBeDeclaration); 559 if (AddLevel) 560 ++Line->Level; 561 parseLevel(/*HasOpeningBrace=*/true); 562 563 if (eof()) 564 return; 565 566 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 567 : !FormatTok->is(tok::r_brace)) { 568 Line->Level = InitialLevel; 569 FormatTok->BlockKind = BK_Block; 570 return; 571 } 572 573 size_t PPEndHash = computePPHash(); 574 575 // Munch the closing brace. 576 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 577 578 if (MacroBlock && FormatTok->is(tok::l_paren)) 579 parseParens(); 580 581 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 582 nextToken(); 583 Line->Level = InitialLevel; 584 585 if (PPStartHash == PPEndHash) { 586 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 587 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 588 // Update the opening line to add the forward reference as well 589 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 590 CurrentLines->size() - 1; 591 } 592 } 593 } 594 595 static bool isGoogScope(const UnwrappedLine &Line) { 596 // FIXME: Closure-library specific stuff should not be hard-coded but be 597 // configurable. 598 if (Line.Tokens.size() < 4) 599 return false; 600 auto I = Line.Tokens.begin(); 601 if (I->Tok->TokenText != "goog") 602 return false; 603 ++I; 604 if (I->Tok->isNot(tok::period)) 605 return false; 606 ++I; 607 if (I->Tok->TokenText != "scope") 608 return false; 609 ++I; 610 return I->Tok->is(tok::l_paren); 611 } 612 613 static bool isIIFE(const UnwrappedLine &Line, 614 const AdditionalKeywords &Keywords) { 615 // Look for the start of an immediately invoked anonymous function. 616 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 617 // This is commonly done in JavaScript to create a new, anonymous scope. 618 // Example: (function() { ... })() 619 if (Line.Tokens.size() < 3) 620 return false; 621 auto I = Line.Tokens.begin(); 622 if (I->Tok->isNot(tok::l_paren)) 623 return false; 624 ++I; 625 if (I->Tok->isNot(Keywords.kw_function)) 626 return false; 627 ++I; 628 return I->Tok->is(tok::l_paren); 629 } 630 631 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 632 const FormatToken &InitialToken) { 633 if (InitialToken.is(tok::kw_namespace)) 634 return Style.BraceWrapping.AfterNamespace; 635 if (InitialToken.is(tok::kw_class)) 636 return Style.BraceWrapping.AfterClass; 637 if (InitialToken.is(tok::kw_union)) 638 return Style.BraceWrapping.AfterUnion; 639 if (InitialToken.is(tok::kw_struct)) 640 return Style.BraceWrapping.AfterStruct; 641 return false; 642 } 643 644 void UnwrappedLineParser::parseChildBlock() { 645 FormatTok->BlockKind = BK_Block; 646 nextToken(); 647 { 648 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 649 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 650 ScopedLineState LineState(*this); 651 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 652 /*MustBeDeclaration=*/false); 653 Line->Level += SkipIndent ? 0 : 1; 654 parseLevel(/*HasOpeningBrace=*/true); 655 flushComments(isOnNewLine(*FormatTok)); 656 Line->Level -= SkipIndent ? 0 : 1; 657 } 658 nextToken(); 659 } 660 661 void UnwrappedLineParser::parsePPDirective() { 662 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 663 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 664 665 nextToken(); 666 667 if (!FormatTok->Tok.getIdentifierInfo()) { 668 parsePPUnknown(); 669 return; 670 } 671 672 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 673 case tok::pp_define: 674 parsePPDefine(); 675 return; 676 case tok::pp_if: 677 parsePPIf(/*IfDef=*/false); 678 break; 679 case tok::pp_ifdef: 680 case tok::pp_ifndef: 681 parsePPIf(/*IfDef=*/true); 682 break; 683 case tok::pp_else: 684 parsePPElse(); 685 break; 686 case tok::pp_elif: 687 parsePPElIf(); 688 break; 689 case tok::pp_endif: 690 parsePPEndIf(); 691 break; 692 default: 693 parsePPUnknown(); 694 break; 695 } 696 } 697 698 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 699 size_t Line = CurrentLines->size(); 700 if (CurrentLines == &PreprocessorDirectives) 701 Line += Lines.size(); 702 703 if (Unreachable || 704 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 705 PPStack.push_back({PP_Unreachable, Line}); 706 else 707 PPStack.push_back({PP_Conditional, Line}); 708 } 709 710 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 711 ++PPBranchLevel; 712 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 713 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 714 PPLevelBranchIndex.push_back(0); 715 PPLevelBranchCount.push_back(0); 716 } 717 PPChainBranchIndex.push(0); 718 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 719 conditionalCompilationCondition(Unreachable || Skip); 720 } 721 722 void UnwrappedLineParser::conditionalCompilationAlternative() { 723 if (!PPStack.empty()) 724 PPStack.pop_back(); 725 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 726 if (!PPChainBranchIndex.empty()) 727 ++PPChainBranchIndex.top(); 728 conditionalCompilationCondition( 729 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 730 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 731 } 732 733 void UnwrappedLineParser::conditionalCompilationEnd() { 734 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 735 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 736 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 737 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 738 } 739 } 740 // Guard against #endif's without #if. 741 if (PPBranchLevel > -1) 742 --PPBranchLevel; 743 if (!PPChainBranchIndex.empty()) 744 PPChainBranchIndex.pop(); 745 if (!PPStack.empty()) 746 PPStack.pop_back(); 747 } 748 749 void UnwrappedLineParser::parsePPIf(bool IfDef) { 750 bool IfNDef = FormatTok->is(tok::pp_ifndef); 751 nextToken(); 752 bool Unreachable = false; 753 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 754 Unreachable = true; 755 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 756 Unreachable = true; 757 conditionalCompilationStart(Unreachable); 758 FormatToken *IfCondition = FormatTok; 759 // If there's a #ifndef on the first line, and the only lines before it are 760 // comments, it could be an include guard. 761 bool MaybeIncludeGuard = IfNDef; 762 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 763 for (auto &Line : Lines) { 764 if (!Line.Tokens.front().Tok->is(tok::comment)) { 765 MaybeIncludeGuard = false; 766 IncludeGuard = IG_Rejected; 767 break; 768 } 769 } 770 --PPBranchLevel; 771 parsePPUnknown(); 772 ++PPBranchLevel; 773 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 774 IncludeGuard = IG_IfNdefed; 775 IncludeGuardToken = IfCondition; 776 } 777 } 778 779 void UnwrappedLineParser::parsePPElse() { 780 // If a potential include guard has an #else, it's not an include guard. 781 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 782 IncludeGuard = IG_Rejected; 783 conditionalCompilationAlternative(); 784 if (PPBranchLevel > -1) 785 --PPBranchLevel; 786 parsePPUnknown(); 787 ++PPBranchLevel; 788 } 789 790 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 791 792 void UnwrappedLineParser::parsePPEndIf() { 793 conditionalCompilationEnd(); 794 parsePPUnknown(); 795 // If the #endif of a potential include guard is the last thing in the file, 796 // then we found an include guard. 797 unsigned TokenPosition = Tokens->getPosition(); 798 FormatToken *PeekNext = AllTokens[TokenPosition]; 799 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 800 PeekNext->is(tok::eof) && 801 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 802 IncludeGuard = IG_Found; 803 } 804 805 void UnwrappedLineParser::parsePPDefine() { 806 nextToken(); 807 808 if (FormatTok->Tok.getKind() != tok::identifier) { 809 IncludeGuard = IG_Rejected; 810 IncludeGuardToken = nullptr; 811 parsePPUnknown(); 812 return; 813 } 814 815 if (IncludeGuard == IG_IfNdefed && 816 IncludeGuardToken->TokenText == FormatTok->TokenText) { 817 IncludeGuard = IG_Defined; 818 IncludeGuardToken = nullptr; 819 for (auto &Line : Lines) { 820 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 821 IncludeGuard = IG_Rejected; 822 break; 823 } 824 } 825 } 826 827 nextToken(); 828 if (FormatTok->Tok.getKind() == tok::l_paren && 829 FormatTok->WhitespaceRange.getBegin() == 830 FormatTok->WhitespaceRange.getEnd()) { 831 parseParens(); 832 } 833 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 834 Line->Level += PPBranchLevel + 1; 835 addUnwrappedLine(); 836 ++Line->Level; 837 838 // Errors during a preprocessor directive can only affect the layout of the 839 // preprocessor directive, and thus we ignore them. An alternative approach 840 // would be to use the same approach we use on the file level (no 841 // re-indentation if there was a structural error) within the macro 842 // definition. 843 parseFile(); 844 } 845 846 void UnwrappedLineParser::parsePPUnknown() { 847 do { 848 nextToken(); 849 } while (!eof()); 850 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 851 Line->Level += PPBranchLevel + 1; 852 addUnwrappedLine(); 853 } 854 855 // Here we blacklist certain tokens that are not usually the first token in an 856 // unwrapped line. This is used in attempt to distinguish macro calls without 857 // trailing semicolons from other constructs split to several lines. 858 static bool tokenCanStartNewLine(const clang::Token &Tok) { 859 // Semicolon can be a null-statement, l_square can be a start of a macro or 860 // a C++11 attribute, but this doesn't seem to be common. 861 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 862 Tok.isNot(tok::l_square) && 863 // Tokens that can only be used as binary operators and a part of 864 // overloaded operator names. 865 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 866 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 867 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 868 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 869 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 870 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 871 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 872 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 873 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 874 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 875 Tok.isNot(tok::lesslessequal) && 876 // Colon is used in labels, base class lists, initializer lists, 877 // range-based for loops, ternary operator, but should never be the 878 // first token in an unwrapped line. 879 Tok.isNot(tok::colon) && 880 // 'noexcept' is a trailing annotation. 881 Tok.isNot(tok::kw_noexcept); 882 } 883 884 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 885 const FormatToken *FormatTok) { 886 // FIXME: This returns true for C/C++ keywords like 'struct'. 887 return FormatTok->is(tok::identifier) && 888 (FormatTok->Tok.getIdentifierInfo() == nullptr || 889 !FormatTok->isOneOf( 890 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 891 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 892 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 893 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 894 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 895 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 896 Keywords.kw_from)); 897 } 898 899 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 900 const FormatToken *FormatTok) { 901 return FormatTok->Tok.isLiteral() || 902 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 903 mustBeJSIdent(Keywords, FormatTok); 904 } 905 906 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 907 // when encountered after a value (see mustBeJSIdentOrValue). 908 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 909 const FormatToken *FormatTok) { 910 return FormatTok->isOneOf( 911 tok::kw_return, Keywords.kw_yield, 912 // conditionals 913 tok::kw_if, tok::kw_else, 914 // loops 915 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 916 // switch/case 917 tok::kw_switch, tok::kw_case, 918 // exceptions 919 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 920 // declaration 921 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 922 Keywords.kw_async, Keywords.kw_function, 923 // import/export 924 Keywords.kw_import, tok::kw_export); 925 } 926 927 // readTokenWithJavaScriptASI reads the next token and terminates the current 928 // line if JavaScript Automatic Semicolon Insertion must 929 // happen between the current token and the next token. 930 // 931 // This method is conservative - it cannot cover all edge cases of JavaScript, 932 // but only aims to correctly handle certain well known cases. It *must not* 933 // return true in speculative cases. 934 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 935 FormatToken *Previous = FormatTok; 936 readToken(); 937 FormatToken *Next = FormatTok; 938 939 bool IsOnSameLine = 940 CommentsBeforeNextToken.empty() 941 ? Next->NewlinesBefore == 0 942 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 943 if (IsOnSameLine) 944 return; 945 946 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 947 bool PreviousStartsTemplateExpr = 948 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 949 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 950 // If the line contains an '@' sign, the previous token might be an 951 // annotation, which can precede another identifier/value. 952 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 953 [](UnwrappedLineNode &LineNode) { 954 return LineNode.Tok->is(tok::at); 955 }) != Line->Tokens.end(); 956 if (HasAt) 957 return; 958 } 959 if (Next->is(tok::exclaim) && PreviousMustBeValue) 960 return addUnwrappedLine(); 961 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 962 bool NextEndsTemplateExpr = 963 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 964 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 965 (PreviousMustBeValue || 966 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 967 tok::minusminus))) 968 return addUnwrappedLine(); 969 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 970 isJSDeclOrStmt(Keywords, Next)) 971 return addUnwrappedLine(); 972 } 973 974 void UnwrappedLineParser::parseStructuralElement() { 975 assert(!FormatTok->is(tok::l_brace)); 976 if (Style.Language == FormatStyle::LK_TableGen && 977 FormatTok->is(tok::pp_include)) { 978 nextToken(); 979 if (FormatTok->is(tok::string_literal)) 980 nextToken(); 981 addUnwrappedLine(); 982 return; 983 } 984 switch (FormatTok->Tok.getKind()) { 985 case tok::kw_asm: 986 nextToken(); 987 if (FormatTok->is(tok::l_brace)) { 988 FormatTok->Type = TT_InlineASMBrace; 989 nextToken(); 990 while (FormatTok && FormatTok->isNot(tok::eof)) { 991 if (FormatTok->is(tok::r_brace)) { 992 FormatTok->Type = TT_InlineASMBrace; 993 nextToken(); 994 addUnwrappedLine(); 995 break; 996 } 997 FormatTok->Finalized = true; 998 nextToken(); 999 } 1000 } 1001 break; 1002 case tok::kw_namespace: 1003 parseNamespace(); 1004 return; 1005 case tok::kw_public: 1006 case tok::kw_protected: 1007 case tok::kw_private: 1008 if (Style.Language == FormatStyle::LK_Java || 1009 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) 1010 nextToken(); 1011 else 1012 parseAccessSpecifier(); 1013 return; 1014 case tok::kw_if: 1015 parseIfThenElse(); 1016 return; 1017 case tok::kw_for: 1018 case tok::kw_while: 1019 parseForOrWhileLoop(); 1020 return; 1021 case tok::kw_do: 1022 parseDoWhile(); 1023 return; 1024 case tok::kw_switch: 1025 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1026 // 'switch: string' field declaration. 1027 break; 1028 parseSwitch(); 1029 return; 1030 case tok::kw_default: 1031 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1032 // 'default: string' field declaration. 1033 break; 1034 nextToken(); 1035 if (FormatTok->is(tok::colon)) { 1036 parseLabel(); 1037 return; 1038 } 1039 // e.g. "default void f() {}" in a Java interface. 1040 break; 1041 case tok::kw_case: 1042 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1043 // 'case: string' field declaration. 1044 break; 1045 parseCaseLabel(); 1046 return; 1047 case tok::kw_try: 1048 case tok::kw___try: 1049 parseTryCatch(); 1050 return; 1051 case tok::kw_extern: 1052 nextToken(); 1053 if (FormatTok->Tok.is(tok::string_literal)) { 1054 nextToken(); 1055 if (FormatTok->Tok.is(tok::l_brace)) { 1056 if (Style.BraceWrapping.AfterExternBlock) { 1057 addUnwrappedLine(); 1058 parseBlock(/*MustBeDeclaration=*/true); 1059 } else { 1060 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1061 } 1062 addUnwrappedLine(); 1063 return; 1064 } 1065 } 1066 break; 1067 case tok::kw_export: 1068 if (Style.Language == FormatStyle::LK_JavaScript) { 1069 parseJavaScriptEs6ImportExport(); 1070 return; 1071 } 1072 if (!Style.isCpp()) 1073 break; 1074 // Handle C++ "(inline|export) namespace". 1075 LLVM_FALLTHROUGH; 1076 case tok::kw_inline: 1077 nextToken(); 1078 if (FormatTok->Tok.is(tok::kw_namespace)) { 1079 parseNamespace(); 1080 return; 1081 } 1082 break; 1083 case tok::identifier: 1084 if (FormatTok->is(TT_ForEachMacro)) { 1085 parseForOrWhileLoop(); 1086 return; 1087 } 1088 if (FormatTok->is(TT_MacroBlockBegin)) { 1089 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1090 /*MunchSemi=*/false); 1091 return; 1092 } 1093 if (FormatTok->is(Keywords.kw_import)) { 1094 if (Style.Language == FormatStyle::LK_JavaScript) { 1095 parseJavaScriptEs6ImportExport(); 1096 return; 1097 } 1098 if (Style.Language == FormatStyle::LK_Proto) { 1099 nextToken(); 1100 if (FormatTok->is(tok::kw_public)) 1101 nextToken(); 1102 if (!FormatTok->is(tok::string_literal)) 1103 return; 1104 nextToken(); 1105 if (FormatTok->is(tok::semi)) 1106 nextToken(); 1107 addUnwrappedLine(); 1108 return; 1109 } 1110 } 1111 if (Style.isCpp() && 1112 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1113 Keywords.kw_slots, Keywords.kw_qslots)) { 1114 nextToken(); 1115 if (FormatTok->is(tok::colon)) { 1116 nextToken(); 1117 addUnwrappedLine(); 1118 return; 1119 } 1120 } 1121 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1122 parseStatementMacro(); 1123 return; 1124 } 1125 // In all other cases, parse the declaration. 1126 break; 1127 default: 1128 break; 1129 } 1130 do { 1131 const FormatToken *Previous = FormatTok->Previous; 1132 switch (FormatTok->Tok.getKind()) { 1133 case tok::at: 1134 nextToken(); 1135 if (FormatTok->Tok.is(tok::l_brace)) { 1136 nextToken(); 1137 parseBracedList(); 1138 break; 1139 } else if (Style.Language == FormatStyle::LK_Java && 1140 FormatTok->is(Keywords.kw_interface)) { 1141 nextToken(); 1142 break; 1143 } 1144 switch (FormatTok->Tok.getObjCKeywordID()) { 1145 case tok::objc_public: 1146 case tok::objc_protected: 1147 case tok::objc_package: 1148 case tok::objc_private: 1149 return parseAccessSpecifier(); 1150 case tok::objc_interface: 1151 case tok::objc_implementation: 1152 return parseObjCInterfaceOrImplementation(); 1153 case tok::objc_protocol: 1154 if (parseObjCProtocol()) 1155 return; 1156 break; 1157 case tok::objc_end: 1158 return; // Handled by the caller. 1159 case tok::objc_optional: 1160 case tok::objc_required: 1161 nextToken(); 1162 addUnwrappedLine(); 1163 return; 1164 case tok::objc_autoreleasepool: 1165 nextToken(); 1166 if (FormatTok->Tok.is(tok::l_brace)) { 1167 if (Style.BraceWrapping.AfterControlStatement) 1168 addUnwrappedLine(); 1169 parseBlock(/*MustBeDeclaration=*/false); 1170 } 1171 addUnwrappedLine(); 1172 return; 1173 case tok::objc_synchronized: 1174 nextToken(); 1175 if (FormatTok->Tok.is(tok::l_paren)) 1176 // Skip synchronization object 1177 parseParens(); 1178 if (FormatTok->Tok.is(tok::l_brace)) { 1179 if (Style.BraceWrapping.AfterControlStatement) 1180 addUnwrappedLine(); 1181 parseBlock(/*MustBeDeclaration=*/false); 1182 } 1183 addUnwrappedLine(); 1184 return; 1185 case tok::objc_try: 1186 // This branch isn't strictly necessary (the kw_try case below would 1187 // do this too after the tok::at is parsed above). But be explicit. 1188 parseTryCatch(); 1189 return; 1190 default: 1191 break; 1192 } 1193 break; 1194 case tok::kw_enum: 1195 // Ignore if this is part of "template <enum ...". 1196 if (Previous && Previous->is(tok::less)) { 1197 nextToken(); 1198 break; 1199 } 1200 1201 // parseEnum falls through and does not yet add an unwrapped line as an 1202 // enum definition can start a structural element. 1203 if (!parseEnum()) 1204 break; 1205 // This only applies for C++. 1206 if (!Style.isCpp()) { 1207 addUnwrappedLine(); 1208 return; 1209 } 1210 break; 1211 case tok::kw_typedef: 1212 nextToken(); 1213 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1214 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 1215 parseEnum(); 1216 break; 1217 case tok::kw_struct: 1218 case tok::kw_union: 1219 case tok::kw_class: 1220 // parseRecord falls through and does not yet add an unwrapped line as a 1221 // record declaration or definition can start a structural element. 1222 parseRecord(); 1223 // This does not apply for Java, JavaScript and C#. 1224 if (Style.Language == FormatStyle::LK_Java || 1225 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { 1226 if (FormatTok->is(tok::semi)) 1227 nextToken(); 1228 addUnwrappedLine(); 1229 return; 1230 } 1231 break; 1232 case tok::period: 1233 nextToken(); 1234 // In Java, classes have an implicit static member "class". 1235 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1236 FormatTok->is(tok::kw_class)) 1237 nextToken(); 1238 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1239 FormatTok->Tok.getIdentifierInfo()) 1240 // JavaScript only has pseudo keywords, all keywords are allowed to 1241 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1242 nextToken(); 1243 break; 1244 case tok::semi: 1245 nextToken(); 1246 addUnwrappedLine(); 1247 return; 1248 case tok::r_brace: 1249 addUnwrappedLine(); 1250 return; 1251 case tok::l_paren: 1252 parseParens(); 1253 break; 1254 case tok::kw_operator: 1255 nextToken(); 1256 if (FormatTok->isBinaryOperator()) 1257 nextToken(); 1258 break; 1259 case tok::caret: 1260 nextToken(); 1261 if (FormatTok->Tok.isAnyIdentifier() || 1262 FormatTok->isSimpleTypeSpecifier()) 1263 nextToken(); 1264 if (FormatTok->is(tok::l_paren)) 1265 parseParens(); 1266 if (FormatTok->is(tok::l_brace)) 1267 parseChildBlock(); 1268 break; 1269 case tok::l_brace: 1270 if (!tryToParseBracedList()) { 1271 // A block outside of parentheses must be the last part of a 1272 // structural element. 1273 // FIXME: Figure out cases where this is not true, and add projections 1274 // for them (the one we know is missing are lambdas). 1275 if (Style.BraceWrapping.AfterFunction) 1276 addUnwrappedLine(); 1277 FormatTok->Type = TT_FunctionLBrace; 1278 parseBlock(/*MustBeDeclaration=*/false); 1279 addUnwrappedLine(); 1280 return; 1281 } 1282 // Otherwise this was a braced init list, and the structural 1283 // element continues. 1284 break; 1285 case tok::kw_try: 1286 // We arrive here when parsing function-try blocks. 1287 if (Style.BraceWrapping.AfterFunction) 1288 addUnwrappedLine(); 1289 parseTryCatch(); 1290 return; 1291 case tok::identifier: { 1292 if (FormatTok->is(TT_MacroBlockEnd)) { 1293 addUnwrappedLine(); 1294 return; 1295 } 1296 1297 // Function declarations (as opposed to function expressions) are parsed 1298 // on their own unwrapped line by continuing this loop. Function 1299 // expressions (functions that are not on their own line) must not create 1300 // a new unwrapped line, so they are special cased below. 1301 size_t TokenCount = Line->Tokens.size(); 1302 if (Style.Language == FormatStyle::LK_JavaScript && 1303 FormatTok->is(Keywords.kw_function) && 1304 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1305 Keywords.kw_async)))) { 1306 tryToParseJSFunction(); 1307 break; 1308 } 1309 if ((Style.Language == FormatStyle::LK_JavaScript || 1310 Style.Language == FormatStyle::LK_Java) && 1311 FormatTok->is(Keywords.kw_interface)) { 1312 if (Style.Language == FormatStyle::LK_JavaScript) { 1313 // In JavaScript/TypeScript, "interface" can be used as a standalone 1314 // identifier, e.g. in `var interface = 1;`. If "interface" is 1315 // followed by another identifier, it is very like to be an actual 1316 // interface declaration. 1317 unsigned StoredPosition = Tokens->getPosition(); 1318 FormatToken *Next = Tokens->getNextToken(); 1319 FormatTok = Tokens->setPosition(StoredPosition); 1320 if (Next && !mustBeJSIdent(Keywords, Next)) { 1321 nextToken(); 1322 break; 1323 } 1324 } 1325 parseRecord(); 1326 addUnwrappedLine(); 1327 return; 1328 } 1329 1330 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1331 parseStatementMacro(); 1332 return; 1333 } 1334 1335 // See if the following token should start a new unwrapped line. 1336 StringRef Text = FormatTok->TokenText; 1337 nextToken(); 1338 if (Line->Tokens.size() == 1 && 1339 // JS doesn't have macros, and within classes colons indicate fields, 1340 // not labels. 1341 Style.Language != FormatStyle::LK_JavaScript) { 1342 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1343 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1344 parseLabel(); 1345 return; 1346 } 1347 // Recognize function-like macro usages without trailing semicolon as 1348 // well as free-standing macros like Q_OBJECT. 1349 bool FunctionLike = FormatTok->is(tok::l_paren); 1350 if (FunctionLike) 1351 parseParens(); 1352 1353 bool FollowedByNewline = 1354 CommentsBeforeNextToken.empty() 1355 ? FormatTok->NewlinesBefore > 0 1356 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1357 1358 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1359 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1360 addUnwrappedLine(); 1361 return; 1362 } 1363 } 1364 break; 1365 } 1366 case tok::equal: 1367 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1368 // TT_JsFatArrow. The always start an expression or a child block if 1369 // followed by a curly. 1370 if (FormatTok->is(TT_JsFatArrow)) { 1371 nextToken(); 1372 if (FormatTok->is(tok::l_brace)) 1373 parseChildBlock(); 1374 break; 1375 } 1376 1377 nextToken(); 1378 if (FormatTok->Tok.is(tok::l_brace)) { 1379 nextToken(); 1380 parseBracedList(); 1381 } else if (Style.Language == FormatStyle::LK_Proto && 1382 FormatTok->Tok.is(tok::less)) { 1383 nextToken(); 1384 parseBracedList(/*ContinueOnSemicolons=*/false, 1385 /*ClosingBraceKind=*/tok::greater); 1386 } 1387 break; 1388 case tok::l_square: 1389 parseSquare(); 1390 break; 1391 case tok::kw_new: 1392 parseNew(); 1393 break; 1394 default: 1395 nextToken(); 1396 break; 1397 } 1398 } while (!eof()); 1399 } 1400 1401 bool UnwrappedLineParser::tryToParseLambda() { 1402 if (!Style.isCpp()) { 1403 nextToken(); 1404 return false; 1405 } 1406 assert(FormatTok->is(tok::l_square)); 1407 FormatToken &LSquare = *FormatTok; 1408 if (!tryToParseLambdaIntroducer()) 1409 return false; 1410 1411 bool SeenArrow = false; 1412 1413 while (FormatTok->isNot(tok::l_brace)) { 1414 if (FormatTok->isSimpleTypeSpecifier()) { 1415 nextToken(); 1416 continue; 1417 } 1418 switch (FormatTok->Tok.getKind()) { 1419 case tok::l_brace: 1420 break; 1421 case tok::l_paren: 1422 parseParens(); 1423 break; 1424 case tok::amp: 1425 case tok::star: 1426 case tok::kw_const: 1427 case tok::comma: 1428 case tok::less: 1429 case tok::greater: 1430 case tok::identifier: 1431 case tok::numeric_constant: 1432 case tok::coloncolon: 1433 case tok::kw_mutable: 1434 case tok::kw_noexcept: 1435 nextToken(); 1436 break; 1437 // Specialization of a template with an integer parameter can contain 1438 // arithmetic, logical, comparison and ternary operators. 1439 // 1440 // FIXME: This also accepts sequences of operators that are not in the scope 1441 // of a template argument list. 1442 // 1443 // In a C++ lambda a template type can only occur after an arrow. We use 1444 // this as an heuristic to distinguish between Objective-C expressions 1445 // followed by an `a->b` expression, such as: 1446 // ([obj func:arg] + a->b) 1447 // Otherwise the code below would parse as a lambda. 1448 case tok::plus: 1449 case tok::minus: 1450 case tok::exclaim: 1451 case tok::tilde: 1452 case tok::slash: 1453 case tok::percent: 1454 case tok::lessless: 1455 case tok::pipe: 1456 case tok::pipepipe: 1457 case tok::ampamp: 1458 case tok::caret: 1459 case tok::equalequal: 1460 case tok::exclaimequal: 1461 case tok::greaterequal: 1462 case tok::lessequal: 1463 case tok::question: 1464 case tok::colon: 1465 case tok::kw_true: 1466 case tok::kw_false: 1467 if (SeenArrow) { 1468 nextToken(); 1469 break; 1470 } 1471 return true; 1472 case tok::arrow: 1473 // This might or might not actually be a lambda arrow (this could be an 1474 // ObjC method invocation followed by a dereferencing arrow). We might 1475 // reset this back to TT_Unknown in TokenAnnotator. 1476 FormatTok->Type = TT_LambdaArrow; 1477 SeenArrow = true; 1478 nextToken(); 1479 break; 1480 default: 1481 return true; 1482 } 1483 } 1484 FormatTok->Type = TT_LambdaLBrace; 1485 LSquare.Type = TT_LambdaLSquare; 1486 parseChildBlock(); 1487 return true; 1488 } 1489 1490 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1491 const FormatToken *Previous = FormatTok->Previous; 1492 if (Previous && 1493 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1494 tok::kw_delete, tok::l_square) || 1495 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1496 Previous->isSimpleTypeSpecifier())) { 1497 nextToken(); 1498 return false; 1499 } 1500 nextToken(); 1501 if (FormatTok->is(tok::l_square)) { 1502 return false; 1503 } 1504 parseSquare(/*LambdaIntroducer=*/true); 1505 return true; 1506 } 1507 1508 void UnwrappedLineParser::tryToParseJSFunction() { 1509 assert(FormatTok->is(Keywords.kw_function) || 1510 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1511 if (FormatTok->is(Keywords.kw_async)) 1512 nextToken(); 1513 // Consume "function". 1514 nextToken(); 1515 1516 // Consume * (generator function). Treat it like C++'s overloaded operators. 1517 if (FormatTok->is(tok::star)) { 1518 FormatTok->Type = TT_OverloadedOperator; 1519 nextToken(); 1520 } 1521 1522 // Consume function name. 1523 if (FormatTok->is(tok::identifier)) 1524 nextToken(); 1525 1526 if (FormatTok->isNot(tok::l_paren)) 1527 return; 1528 1529 // Parse formal parameter list. 1530 parseParens(); 1531 1532 if (FormatTok->is(tok::colon)) { 1533 // Parse a type definition. 1534 nextToken(); 1535 1536 // Eat the type declaration. For braced inline object types, balance braces, 1537 // otherwise just parse until finding an l_brace for the function body. 1538 if (FormatTok->is(tok::l_brace)) 1539 tryToParseBracedList(); 1540 else 1541 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1542 nextToken(); 1543 } 1544 1545 if (FormatTok->is(tok::semi)) 1546 return; 1547 1548 parseChildBlock(); 1549 } 1550 1551 bool UnwrappedLineParser::tryToParseBracedList() { 1552 if (FormatTok->BlockKind == BK_Unknown) 1553 calculateBraceTypes(); 1554 assert(FormatTok->BlockKind != BK_Unknown); 1555 if (FormatTok->BlockKind == BK_Block) 1556 return false; 1557 nextToken(); 1558 parseBracedList(); 1559 return true; 1560 } 1561 1562 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1563 tok::TokenKind ClosingBraceKind) { 1564 bool HasError = false; 1565 1566 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1567 // replace this by using parseAssigmentExpression() inside. 1568 do { 1569 if (Style.Language == FormatStyle::LK_JavaScript) { 1570 if (FormatTok->is(Keywords.kw_function) || 1571 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1572 tryToParseJSFunction(); 1573 continue; 1574 } 1575 if (FormatTok->is(TT_JsFatArrow)) { 1576 nextToken(); 1577 // Fat arrows can be followed by simple expressions or by child blocks 1578 // in curly braces. 1579 if (FormatTok->is(tok::l_brace)) { 1580 parseChildBlock(); 1581 continue; 1582 } 1583 } 1584 if (FormatTok->is(tok::l_brace)) { 1585 // Could be a method inside of a braced list `{a() { return 1; }}`. 1586 if (tryToParseBracedList()) 1587 continue; 1588 parseChildBlock(); 1589 } 1590 } 1591 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1592 nextToken(); 1593 return !HasError; 1594 } 1595 switch (FormatTok->Tok.getKind()) { 1596 case tok::caret: 1597 nextToken(); 1598 if (FormatTok->is(tok::l_brace)) { 1599 parseChildBlock(); 1600 } 1601 break; 1602 case tok::l_square: 1603 tryToParseLambda(); 1604 break; 1605 case tok::l_paren: 1606 parseParens(); 1607 // JavaScript can just have free standing methods and getters/setters in 1608 // object literals. Detect them by a "{" following ")". 1609 if (Style.Language == FormatStyle::LK_JavaScript) { 1610 if (FormatTok->is(tok::l_brace)) 1611 parseChildBlock(); 1612 break; 1613 } 1614 break; 1615 case tok::l_brace: 1616 // Assume there are no blocks inside a braced init list apart 1617 // from the ones we explicitly parse out (like lambdas). 1618 FormatTok->BlockKind = BK_BracedInit; 1619 nextToken(); 1620 parseBracedList(); 1621 break; 1622 case tok::less: 1623 if (Style.Language == FormatStyle::LK_Proto) { 1624 nextToken(); 1625 parseBracedList(/*ContinueOnSemicolons=*/false, 1626 /*ClosingBraceKind=*/tok::greater); 1627 } else { 1628 nextToken(); 1629 } 1630 break; 1631 case tok::semi: 1632 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1633 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1634 // used for error recovery if we have otherwise determined that this is 1635 // a braced list. 1636 if (Style.Language == FormatStyle::LK_JavaScript) { 1637 nextToken(); 1638 break; 1639 } 1640 HasError = true; 1641 if (!ContinueOnSemicolons) 1642 return !HasError; 1643 nextToken(); 1644 break; 1645 case tok::comma: 1646 nextToken(); 1647 break; 1648 default: 1649 nextToken(); 1650 break; 1651 } 1652 } while (!eof()); 1653 return false; 1654 } 1655 1656 void UnwrappedLineParser::parseParens() { 1657 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1658 nextToken(); 1659 do { 1660 switch (FormatTok->Tok.getKind()) { 1661 case tok::l_paren: 1662 parseParens(); 1663 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1664 parseChildBlock(); 1665 break; 1666 case tok::r_paren: 1667 nextToken(); 1668 return; 1669 case tok::r_brace: 1670 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1671 return; 1672 case tok::l_square: 1673 tryToParseLambda(); 1674 break; 1675 case tok::l_brace: 1676 if (!tryToParseBracedList()) 1677 parseChildBlock(); 1678 break; 1679 case tok::at: 1680 nextToken(); 1681 if (FormatTok->Tok.is(tok::l_brace)) { 1682 nextToken(); 1683 parseBracedList(); 1684 } 1685 break; 1686 case tok::kw_class: 1687 if (Style.Language == FormatStyle::LK_JavaScript) 1688 parseRecord(/*ParseAsExpr=*/true); 1689 else 1690 nextToken(); 1691 break; 1692 case tok::identifier: 1693 if (Style.Language == FormatStyle::LK_JavaScript && 1694 (FormatTok->is(Keywords.kw_function) || 1695 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1696 tryToParseJSFunction(); 1697 else 1698 nextToken(); 1699 break; 1700 default: 1701 nextToken(); 1702 break; 1703 } 1704 } while (!eof()); 1705 } 1706 1707 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1708 if (!LambdaIntroducer) { 1709 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1710 if (tryToParseLambda()) 1711 return; 1712 } 1713 do { 1714 switch (FormatTok->Tok.getKind()) { 1715 case tok::l_paren: 1716 parseParens(); 1717 break; 1718 case tok::r_square: 1719 nextToken(); 1720 return; 1721 case tok::r_brace: 1722 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1723 return; 1724 case tok::l_square: 1725 parseSquare(); 1726 break; 1727 case tok::l_brace: { 1728 if (!tryToParseBracedList()) 1729 parseChildBlock(); 1730 break; 1731 } 1732 case tok::at: 1733 nextToken(); 1734 if (FormatTok->Tok.is(tok::l_brace)) { 1735 nextToken(); 1736 parseBracedList(); 1737 } 1738 break; 1739 default: 1740 nextToken(); 1741 break; 1742 } 1743 } while (!eof()); 1744 } 1745 1746 void UnwrappedLineParser::parseIfThenElse() { 1747 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1748 nextToken(); 1749 if (FormatTok->Tok.is(tok::kw_constexpr)) 1750 nextToken(); 1751 if (FormatTok->Tok.is(tok::l_paren)) 1752 parseParens(); 1753 bool NeedsUnwrappedLine = false; 1754 if (FormatTok->Tok.is(tok::l_brace)) { 1755 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1756 parseBlock(/*MustBeDeclaration=*/false); 1757 if (Style.BraceWrapping.BeforeElse) 1758 addUnwrappedLine(); 1759 else 1760 NeedsUnwrappedLine = true; 1761 } else { 1762 addUnwrappedLine(); 1763 ++Line->Level; 1764 parseStructuralElement(); 1765 --Line->Level; 1766 } 1767 if (FormatTok->Tok.is(tok::kw_else)) { 1768 nextToken(); 1769 if (FormatTok->Tok.is(tok::l_brace)) { 1770 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1771 parseBlock(/*MustBeDeclaration=*/false); 1772 addUnwrappedLine(); 1773 } else if (FormatTok->Tok.is(tok::kw_if)) { 1774 parseIfThenElse(); 1775 } else { 1776 addUnwrappedLine(); 1777 ++Line->Level; 1778 parseStructuralElement(); 1779 if (FormatTok->is(tok::eof)) 1780 addUnwrappedLine(); 1781 --Line->Level; 1782 } 1783 } else if (NeedsUnwrappedLine) { 1784 addUnwrappedLine(); 1785 } 1786 } 1787 1788 void UnwrappedLineParser::parseTryCatch() { 1789 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1790 nextToken(); 1791 bool NeedsUnwrappedLine = false; 1792 if (FormatTok->is(tok::colon)) { 1793 // We are in a function try block, what comes is an initializer list. 1794 nextToken(); 1795 while (FormatTok->is(tok::identifier)) { 1796 nextToken(); 1797 if (FormatTok->is(tok::l_paren)) 1798 parseParens(); 1799 if (FormatTok->is(tok::comma)) 1800 nextToken(); 1801 } 1802 } 1803 // Parse try with resource. 1804 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1805 parseParens(); 1806 } 1807 if (FormatTok->is(tok::l_brace)) { 1808 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1809 parseBlock(/*MustBeDeclaration=*/false); 1810 if (Style.BraceWrapping.BeforeCatch) { 1811 addUnwrappedLine(); 1812 } else { 1813 NeedsUnwrappedLine = true; 1814 } 1815 } else if (!FormatTok->is(tok::kw_catch)) { 1816 // The C++ standard requires a compound-statement after a try. 1817 // If there's none, we try to assume there's a structuralElement 1818 // and try to continue. 1819 addUnwrappedLine(); 1820 ++Line->Level; 1821 parseStructuralElement(); 1822 --Line->Level; 1823 } 1824 while (1) { 1825 if (FormatTok->is(tok::at)) 1826 nextToken(); 1827 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1828 tok::kw___finally) || 1829 ((Style.Language == FormatStyle::LK_Java || 1830 Style.Language == FormatStyle::LK_JavaScript) && 1831 FormatTok->is(Keywords.kw_finally)) || 1832 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1833 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1834 break; 1835 nextToken(); 1836 while (FormatTok->isNot(tok::l_brace)) { 1837 if (FormatTok->is(tok::l_paren)) { 1838 parseParens(); 1839 continue; 1840 } 1841 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1842 return; 1843 nextToken(); 1844 } 1845 NeedsUnwrappedLine = false; 1846 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1847 parseBlock(/*MustBeDeclaration=*/false); 1848 if (Style.BraceWrapping.BeforeCatch) 1849 addUnwrappedLine(); 1850 else 1851 NeedsUnwrappedLine = true; 1852 } 1853 if (NeedsUnwrappedLine) 1854 addUnwrappedLine(); 1855 } 1856 1857 void UnwrappedLineParser::parseNamespace() { 1858 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1859 1860 const FormatToken &InitialToken = *FormatTok; 1861 nextToken(); 1862 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1863 nextToken(); 1864 if (FormatTok->Tok.is(tok::l_brace)) { 1865 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1866 addUnwrappedLine(); 1867 1868 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1869 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1870 DeclarationScopeStack.size() > 1); 1871 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1872 // Munch the semicolon after a namespace. This is more common than one would 1873 // think. Puttin the semicolon into its own line is very ugly. 1874 if (FormatTok->Tok.is(tok::semi)) 1875 nextToken(); 1876 addUnwrappedLine(); 1877 } 1878 // FIXME: Add error handling. 1879 } 1880 1881 void UnwrappedLineParser::parseNew() { 1882 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1883 nextToken(); 1884 if (Style.Language != FormatStyle::LK_Java) 1885 return; 1886 1887 // In Java, we can parse everything up to the parens, which aren't optional. 1888 do { 1889 // There should not be a ;, { or } before the new's open paren. 1890 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1891 return; 1892 1893 // Consume the parens. 1894 if (FormatTok->is(tok::l_paren)) { 1895 parseParens(); 1896 1897 // If there is a class body of an anonymous class, consume that as child. 1898 if (FormatTok->is(tok::l_brace)) 1899 parseChildBlock(); 1900 return; 1901 } 1902 nextToken(); 1903 } while (!eof()); 1904 } 1905 1906 void UnwrappedLineParser::parseForOrWhileLoop() { 1907 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1908 "'for', 'while' or foreach macro expected"); 1909 nextToken(); 1910 // JS' for await ( ... 1911 if (Style.Language == FormatStyle::LK_JavaScript && 1912 FormatTok->is(Keywords.kw_await)) 1913 nextToken(); 1914 if (FormatTok->Tok.is(tok::l_paren)) 1915 parseParens(); 1916 if (FormatTok->Tok.is(tok::l_brace)) { 1917 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1918 parseBlock(/*MustBeDeclaration=*/false); 1919 addUnwrappedLine(); 1920 } else { 1921 addUnwrappedLine(); 1922 ++Line->Level; 1923 parseStructuralElement(); 1924 --Line->Level; 1925 } 1926 } 1927 1928 void UnwrappedLineParser::parseDoWhile() { 1929 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1930 nextToken(); 1931 if (FormatTok->Tok.is(tok::l_brace)) { 1932 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1933 parseBlock(/*MustBeDeclaration=*/false); 1934 if (Style.BraceWrapping.IndentBraces) 1935 addUnwrappedLine(); 1936 } else { 1937 addUnwrappedLine(); 1938 ++Line->Level; 1939 parseStructuralElement(); 1940 --Line->Level; 1941 } 1942 1943 // FIXME: Add error handling. 1944 if (!FormatTok->Tok.is(tok::kw_while)) { 1945 addUnwrappedLine(); 1946 return; 1947 } 1948 1949 nextToken(); 1950 parseStructuralElement(); 1951 } 1952 1953 void UnwrappedLineParser::parseLabel() { 1954 nextToken(); 1955 unsigned OldLineLevel = Line->Level; 1956 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1957 --Line->Level; 1958 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1959 CompoundStatementIndenter Indenter(this, Line->Level, 1960 Style.BraceWrapping.AfterCaseLabel, 1961 Style.BraceWrapping.IndentBraces); 1962 parseBlock(/*MustBeDeclaration=*/false); 1963 if (FormatTok->Tok.is(tok::kw_break)) { 1964 if (Style.BraceWrapping.AfterControlStatement) 1965 addUnwrappedLine(); 1966 parseStructuralElement(); 1967 } 1968 addUnwrappedLine(); 1969 } else { 1970 if (FormatTok->is(tok::semi)) 1971 nextToken(); 1972 addUnwrappedLine(); 1973 } 1974 Line->Level = OldLineLevel; 1975 if (FormatTok->isNot(tok::l_brace)) { 1976 parseStructuralElement(); 1977 addUnwrappedLine(); 1978 } 1979 } 1980 1981 void UnwrappedLineParser::parseCaseLabel() { 1982 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1983 // FIXME: fix handling of complex expressions here. 1984 do { 1985 nextToken(); 1986 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1987 parseLabel(); 1988 } 1989 1990 void UnwrappedLineParser::parseSwitch() { 1991 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1992 nextToken(); 1993 if (FormatTok->Tok.is(tok::l_paren)) 1994 parseParens(); 1995 if (FormatTok->Tok.is(tok::l_brace)) { 1996 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1997 parseBlock(/*MustBeDeclaration=*/false); 1998 addUnwrappedLine(); 1999 } else { 2000 addUnwrappedLine(); 2001 ++Line->Level; 2002 parseStructuralElement(); 2003 --Line->Level; 2004 } 2005 } 2006 2007 void UnwrappedLineParser::parseAccessSpecifier() { 2008 nextToken(); 2009 // Understand Qt's slots. 2010 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2011 nextToken(); 2012 // Otherwise, we don't know what it is, and we'd better keep the next token. 2013 if (FormatTok->Tok.is(tok::colon)) 2014 nextToken(); 2015 addUnwrappedLine(); 2016 } 2017 2018 bool UnwrappedLineParser::parseEnum() { 2019 // Won't be 'enum' for NS_ENUMs. 2020 if (FormatTok->Tok.is(tok::kw_enum)) 2021 nextToken(); 2022 2023 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2024 // declarations. An "enum" keyword followed by a colon would be a syntax 2025 // error and thus assume it is just an identifier. 2026 if (Style.Language == FormatStyle::LK_JavaScript && 2027 FormatTok->isOneOf(tok::colon, tok::question)) 2028 return false; 2029 2030 // In protobuf, "enum" can be used as a field name. 2031 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2032 return false; 2033 2034 // Eat up enum class ... 2035 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2036 nextToken(); 2037 2038 while (FormatTok->Tok.getIdentifierInfo() || 2039 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2040 tok::greater, tok::comma, tok::question)) { 2041 nextToken(); 2042 // We can have macros or attributes in between 'enum' and the enum name. 2043 if (FormatTok->is(tok::l_paren)) 2044 parseParens(); 2045 if (FormatTok->is(tok::identifier)) { 2046 nextToken(); 2047 // If there are two identifiers in a row, this is likely an elaborate 2048 // return type. In Java, this can be "implements", etc. 2049 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2050 return false; 2051 } 2052 } 2053 2054 // Just a declaration or something is wrong. 2055 if (FormatTok->isNot(tok::l_brace)) 2056 return true; 2057 FormatTok->BlockKind = BK_Block; 2058 2059 if (Style.Language == FormatStyle::LK_Java) { 2060 // Java enums are different. 2061 parseJavaEnumBody(); 2062 return true; 2063 } 2064 if (Style.Language == FormatStyle::LK_Proto) { 2065 parseBlock(/*MustBeDeclaration=*/true); 2066 return true; 2067 } 2068 2069 // Parse enum body. 2070 nextToken(); 2071 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2072 if (HasError) { 2073 if (FormatTok->is(tok::semi)) 2074 nextToken(); 2075 addUnwrappedLine(); 2076 } 2077 return true; 2078 2079 // There is no addUnwrappedLine() here so that we fall through to parsing a 2080 // structural element afterwards. Thus, in "enum A {} n, m;", 2081 // "} n, m;" will end up in one unwrapped line. 2082 } 2083 2084 void UnwrappedLineParser::parseJavaEnumBody() { 2085 // Determine whether the enum is simple, i.e. does not have a semicolon or 2086 // constants with class bodies. Simple enums can be formatted like braced 2087 // lists, contracted to a single line, etc. 2088 unsigned StoredPosition = Tokens->getPosition(); 2089 bool IsSimple = true; 2090 FormatToken *Tok = Tokens->getNextToken(); 2091 while (Tok) { 2092 if (Tok->is(tok::r_brace)) 2093 break; 2094 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2095 IsSimple = false; 2096 break; 2097 } 2098 // FIXME: This will also mark enums with braces in the arguments to enum 2099 // constants as "not simple". This is probably fine in practice, though. 2100 Tok = Tokens->getNextToken(); 2101 } 2102 FormatTok = Tokens->setPosition(StoredPosition); 2103 2104 if (IsSimple) { 2105 nextToken(); 2106 parseBracedList(); 2107 addUnwrappedLine(); 2108 return; 2109 } 2110 2111 // Parse the body of a more complex enum. 2112 // First add a line for everything up to the "{". 2113 nextToken(); 2114 addUnwrappedLine(); 2115 ++Line->Level; 2116 2117 // Parse the enum constants. 2118 while (FormatTok) { 2119 if (FormatTok->is(tok::l_brace)) { 2120 // Parse the constant's class body. 2121 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2122 /*MunchSemi=*/false); 2123 } else if (FormatTok->is(tok::l_paren)) { 2124 parseParens(); 2125 } else if (FormatTok->is(tok::comma)) { 2126 nextToken(); 2127 addUnwrappedLine(); 2128 } else if (FormatTok->is(tok::semi)) { 2129 nextToken(); 2130 addUnwrappedLine(); 2131 break; 2132 } else if (FormatTok->is(tok::r_brace)) { 2133 addUnwrappedLine(); 2134 break; 2135 } else { 2136 nextToken(); 2137 } 2138 } 2139 2140 // Parse the class body after the enum's ";" if any. 2141 parseLevel(/*HasOpeningBrace=*/true); 2142 nextToken(); 2143 --Line->Level; 2144 addUnwrappedLine(); 2145 } 2146 2147 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2148 const FormatToken &InitialToken = *FormatTok; 2149 nextToken(); 2150 2151 // The actual identifier can be a nested name specifier, and in macros 2152 // it is often token-pasted. 2153 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2154 tok::kw___attribute, tok::kw___declspec, 2155 tok::kw_alignas) || 2156 ((Style.Language == FormatStyle::LK_Java || 2157 Style.Language == FormatStyle::LK_JavaScript) && 2158 FormatTok->isOneOf(tok::period, tok::comma))) { 2159 if (Style.Language == FormatStyle::LK_JavaScript && 2160 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2161 // JavaScript/TypeScript supports inline object types in 2162 // extends/implements positions: 2163 // class Foo implements {bar: number} { } 2164 nextToken(); 2165 if (FormatTok->is(tok::l_brace)) { 2166 tryToParseBracedList(); 2167 continue; 2168 } 2169 } 2170 bool IsNonMacroIdentifier = 2171 FormatTok->is(tok::identifier) && 2172 FormatTok->TokenText != FormatTok->TokenText.upper(); 2173 nextToken(); 2174 // We can have macros or attributes in between 'class' and the class name. 2175 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2176 parseParens(); 2177 } 2178 2179 // Note that parsing away template declarations here leads to incorrectly 2180 // accepting function declarations as record declarations. 2181 // In general, we cannot solve this problem. Consider: 2182 // class A<int> B() {} 2183 // which can be a function definition or a class definition when B() is a 2184 // macro. If we find enough real-world cases where this is a problem, we 2185 // can parse for the 'template' keyword in the beginning of the statement, 2186 // and thus rule out the record production in case there is no template 2187 // (this would still leave us with an ambiguity between template function 2188 // and class declarations). 2189 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2190 while (!eof()) { 2191 if (FormatTok->is(tok::l_brace)) { 2192 calculateBraceTypes(/*ExpectClassBody=*/true); 2193 if (!tryToParseBracedList()) 2194 break; 2195 } 2196 if (FormatTok->Tok.is(tok::semi)) 2197 return; 2198 nextToken(); 2199 } 2200 } 2201 if (FormatTok->Tok.is(tok::l_brace)) { 2202 if (ParseAsExpr) { 2203 parseChildBlock(); 2204 } else { 2205 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2206 addUnwrappedLine(); 2207 2208 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2209 /*MunchSemi=*/false); 2210 } 2211 } 2212 // There is no addUnwrappedLine() here so that we fall through to parsing a 2213 // structural element afterwards. Thus, in "class A {} n, m;", 2214 // "} n, m;" will end up in one unwrapped line. 2215 } 2216 2217 void UnwrappedLineParser::parseObjCMethod() { 2218 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2219 "'(' or identifier expected."); 2220 do { 2221 if (FormatTok->Tok.is(tok::semi)) { 2222 nextToken(); 2223 addUnwrappedLine(); 2224 return; 2225 } else if (FormatTok->Tok.is(tok::l_brace)) { 2226 if (Style.BraceWrapping.AfterFunction) 2227 addUnwrappedLine(); 2228 parseBlock(/*MustBeDeclaration=*/false); 2229 addUnwrappedLine(); 2230 return; 2231 } else { 2232 nextToken(); 2233 } 2234 } while (!eof()); 2235 } 2236 2237 void UnwrappedLineParser::parseObjCProtocolList() { 2238 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2239 do { 2240 nextToken(); 2241 // Early exit in case someone forgot a close angle. 2242 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2243 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2244 return; 2245 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2246 nextToken(); // Skip '>'. 2247 } 2248 2249 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2250 do { 2251 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2252 nextToken(); 2253 addUnwrappedLine(); 2254 break; 2255 } 2256 if (FormatTok->is(tok::l_brace)) { 2257 parseBlock(/*MustBeDeclaration=*/false); 2258 // In ObjC interfaces, nothing should be following the "}". 2259 addUnwrappedLine(); 2260 } else if (FormatTok->is(tok::r_brace)) { 2261 // Ignore stray "}". parseStructuralElement doesn't consume them. 2262 nextToken(); 2263 addUnwrappedLine(); 2264 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2265 nextToken(); 2266 parseObjCMethod(); 2267 } else { 2268 parseStructuralElement(); 2269 } 2270 } while (!eof()); 2271 } 2272 2273 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2274 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2275 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2276 nextToken(); 2277 nextToken(); // interface name 2278 2279 // @interface can be followed by a lightweight generic 2280 // specialization list, then either a base class or a category. 2281 if (FormatTok->Tok.is(tok::less)) { 2282 // Unlike protocol lists, generic parameterizations support 2283 // nested angles: 2284 // 2285 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2286 // NSObject <NSCopying, NSSecureCoding> 2287 // 2288 // so we need to count how many open angles we have left. 2289 unsigned NumOpenAngles = 1; 2290 do { 2291 nextToken(); 2292 // Early exit in case someone forgot a close angle. 2293 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2294 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2295 break; 2296 if (FormatTok->Tok.is(tok::less)) 2297 ++NumOpenAngles; 2298 else if (FormatTok->Tok.is(tok::greater)) { 2299 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2300 --NumOpenAngles; 2301 } 2302 } while (!eof() && NumOpenAngles != 0); 2303 nextToken(); // Skip '>'. 2304 } 2305 if (FormatTok->Tok.is(tok::colon)) { 2306 nextToken(); 2307 nextToken(); // base class name 2308 } else if (FormatTok->Tok.is(tok::l_paren)) 2309 // Skip category, if present. 2310 parseParens(); 2311 2312 if (FormatTok->Tok.is(tok::less)) 2313 parseObjCProtocolList(); 2314 2315 if (FormatTok->Tok.is(tok::l_brace)) { 2316 if (Style.BraceWrapping.AfterObjCDeclaration) 2317 addUnwrappedLine(); 2318 parseBlock(/*MustBeDeclaration=*/true); 2319 } 2320 2321 // With instance variables, this puts '}' on its own line. Without instance 2322 // variables, this ends the @interface line. 2323 addUnwrappedLine(); 2324 2325 parseObjCUntilAtEnd(); 2326 } 2327 2328 // Returns true for the declaration/definition form of @protocol, 2329 // false for the expression form. 2330 bool UnwrappedLineParser::parseObjCProtocol() { 2331 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2332 nextToken(); 2333 2334 if (FormatTok->is(tok::l_paren)) 2335 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2336 return false; 2337 2338 // The definition/declaration form, 2339 // @protocol Foo 2340 // - (int)someMethod; 2341 // @end 2342 2343 nextToken(); // protocol name 2344 2345 if (FormatTok->Tok.is(tok::less)) 2346 parseObjCProtocolList(); 2347 2348 // Check for protocol declaration. 2349 if (FormatTok->Tok.is(tok::semi)) { 2350 nextToken(); 2351 addUnwrappedLine(); 2352 return true; 2353 } 2354 2355 addUnwrappedLine(); 2356 parseObjCUntilAtEnd(); 2357 return true; 2358 } 2359 2360 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2361 bool IsImport = FormatTok->is(Keywords.kw_import); 2362 assert(IsImport || FormatTok->is(tok::kw_export)); 2363 nextToken(); 2364 2365 // Consume the "default" in "export default class/function". 2366 if (FormatTok->is(tok::kw_default)) 2367 nextToken(); 2368 2369 // Consume "async function", "function" and "default function", so that these 2370 // get parsed as free-standing JS functions, i.e. do not require a trailing 2371 // semicolon. 2372 if (FormatTok->is(Keywords.kw_async)) 2373 nextToken(); 2374 if (FormatTok->is(Keywords.kw_function)) { 2375 nextToken(); 2376 return; 2377 } 2378 2379 // For imports, `export *`, `export {...}`, consume the rest of the line up 2380 // to the terminating `;`. For everything else, just return and continue 2381 // parsing the structural element, i.e. the declaration or expression for 2382 // `export default`. 2383 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2384 !FormatTok->isStringLiteral()) 2385 return; 2386 2387 while (!eof()) { 2388 if (FormatTok->is(tok::semi)) 2389 return; 2390 if (Line->Tokens.empty()) { 2391 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2392 // import statement should terminate. 2393 return; 2394 } 2395 if (FormatTok->is(tok::l_brace)) { 2396 FormatTok->BlockKind = BK_Block; 2397 nextToken(); 2398 parseBracedList(); 2399 } else { 2400 nextToken(); 2401 } 2402 } 2403 } 2404 2405 void UnwrappedLineParser::parseStatementMacro() { 2406 nextToken(); 2407 if (FormatTok->is(tok::l_paren)) 2408 parseParens(); 2409 if (FormatTok->is(tok::semi)) 2410 nextToken(); 2411 addUnwrappedLine(); 2412 } 2413 2414 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2415 StringRef Prefix = "") { 2416 llvm::dbgs() << Prefix << "Line(" << Line.Level 2417 << ", FSC=" << Line.FirstStartColumn << ")" 2418 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2419 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2420 E = Line.Tokens.end(); 2421 I != E; ++I) { 2422 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2423 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2424 << "] "; 2425 } 2426 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2427 E = Line.Tokens.end(); 2428 I != E; ++I) { 2429 const UnwrappedLineNode &Node = *I; 2430 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2431 I = Node.Children.begin(), 2432 E = Node.Children.end(); 2433 I != E; ++I) { 2434 printDebugInfo(*I, "\nChild: "); 2435 } 2436 } 2437 llvm::dbgs() << "\n"; 2438 } 2439 2440 void UnwrappedLineParser::addUnwrappedLine() { 2441 if (Line->Tokens.empty()) 2442 return; 2443 LLVM_DEBUG({ 2444 if (CurrentLines == &Lines) 2445 printDebugInfo(*Line); 2446 }); 2447 CurrentLines->push_back(std::move(*Line)); 2448 Line->Tokens.clear(); 2449 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2450 Line->FirstStartColumn = 0; 2451 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2452 CurrentLines->append( 2453 std::make_move_iterator(PreprocessorDirectives.begin()), 2454 std::make_move_iterator(PreprocessorDirectives.end())); 2455 PreprocessorDirectives.clear(); 2456 } 2457 // Disconnect the current token from the last token on the previous line. 2458 FormatTok->Previous = nullptr; 2459 } 2460 2461 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2462 2463 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2464 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2465 FormatTok.NewlinesBefore > 0; 2466 } 2467 2468 // Checks if \p FormatTok is a line comment that continues the line comment 2469 // section on \p Line. 2470 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2471 const UnwrappedLine &Line, 2472 llvm::Regex &CommentPragmasRegex) { 2473 if (Line.Tokens.empty()) 2474 return false; 2475 2476 StringRef IndentContent = FormatTok.TokenText; 2477 if (FormatTok.TokenText.startswith("//") || 2478 FormatTok.TokenText.startswith("/*")) 2479 IndentContent = FormatTok.TokenText.substr(2); 2480 if (CommentPragmasRegex.match(IndentContent)) 2481 return false; 2482 2483 // If Line starts with a line comment, then FormatTok continues the comment 2484 // section if its original column is greater or equal to the original start 2485 // column of the line. 2486 // 2487 // Define the min column token of a line as follows: if a line ends in '{' or 2488 // contains a '{' followed by a line comment, then the min column token is 2489 // that '{'. Otherwise, the min column token of the line is the first token of 2490 // the line. 2491 // 2492 // If Line starts with a token other than a line comment, then FormatTok 2493 // continues the comment section if its original column is greater than the 2494 // original start column of the min column token of the line. 2495 // 2496 // For example, the second line comment continues the first in these cases: 2497 // 2498 // // first line 2499 // // second line 2500 // 2501 // and: 2502 // 2503 // // first line 2504 // // second line 2505 // 2506 // and: 2507 // 2508 // int i; // first line 2509 // // second line 2510 // 2511 // and: 2512 // 2513 // do { // first line 2514 // // second line 2515 // int i; 2516 // } while (true); 2517 // 2518 // and: 2519 // 2520 // enum { 2521 // a, // first line 2522 // // second line 2523 // b 2524 // }; 2525 // 2526 // The second line comment doesn't continue the first in these cases: 2527 // 2528 // // first line 2529 // // second line 2530 // 2531 // and: 2532 // 2533 // int i; // first line 2534 // // second line 2535 // 2536 // and: 2537 // 2538 // do { // first line 2539 // // second line 2540 // int i; 2541 // } while (true); 2542 // 2543 // and: 2544 // 2545 // enum { 2546 // a, // first line 2547 // // second line 2548 // }; 2549 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2550 2551 // Scan for '{//'. If found, use the column of '{' as a min column for line 2552 // comment section continuation. 2553 const FormatToken *PreviousToken = nullptr; 2554 for (const UnwrappedLineNode &Node : Line.Tokens) { 2555 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2556 isLineComment(*Node.Tok)) { 2557 MinColumnToken = PreviousToken; 2558 break; 2559 } 2560 PreviousToken = Node.Tok; 2561 2562 // Grab the last newline preceding a token in this unwrapped line. 2563 if (Node.Tok->NewlinesBefore > 0) { 2564 MinColumnToken = Node.Tok; 2565 } 2566 } 2567 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2568 MinColumnToken = PreviousToken; 2569 } 2570 2571 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2572 MinColumnToken); 2573 } 2574 2575 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2576 bool JustComments = Line->Tokens.empty(); 2577 for (SmallVectorImpl<FormatToken *>::const_iterator 2578 I = CommentsBeforeNextToken.begin(), 2579 E = CommentsBeforeNextToken.end(); 2580 I != E; ++I) { 2581 // Line comments that belong to the same line comment section are put on the 2582 // same line since later we might want to reflow content between them. 2583 // Additional fine-grained breaking of line comment sections is controlled 2584 // by the class BreakableLineCommentSection in case it is desirable to keep 2585 // several line comment sections in the same unwrapped line. 2586 // 2587 // FIXME: Consider putting separate line comment sections as children to the 2588 // unwrapped line instead. 2589 (*I)->ContinuesLineCommentSection = 2590 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2591 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2592 addUnwrappedLine(); 2593 pushToken(*I); 2594 } 2595 if (NewlineBeforeNext && JustComments) 2596 addUnwrappedLine(); 2597 CommentsBeforeNextToken.clear(); 2598 } 2599 2600 void UnwrappedLineParser::nextToken(int LevelDifference) { 2601 if (eof()) 2602 return; 2603 flushComments(isOnNewLine(*FormatTok)); 2604 pushToken(FormatTok); 2605 FormatToken *Previous = FormatTok; 2606 if (Style.Language != FormatStyle::LK_JavaScript) 2607 readToken(LevelDifference); 2608 else 2609 readTokenWithJavaScriptASI(); 2610 FormatTok->Previous = Previous; 2611 } 2612 2613 void UnwrappedLineParser::distributeComments( 2614 const SmallVectorImpl<FormatToken *> &Comments, 2615 const FormatToken *NextTok) { 2616 // Whether or not a line comment token continues a line is controlled by 2617 // the method continuesLineCommentSection, with the following caveat: 2618 // 2619 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2620 // that each comment line from the trail is aligned with the next token, if 2621 // the next token exists. If a trail exists, the beginning of the maximal 2622 // trail is marked as a start of a new comment section. 2623 // 2624 // For example in this code: 2625 // 2626 // int a; // line about a 2627 // // line 1 about b 2628 // // line 2 about b 2629 // int b; 2630 // 2631 // the two lines about b form a maximal trail, so there are two sections, the 2632 // first one consisting of the single comment "// line about a" and the 2633 // second one consisting of the next two comments. 2634 if (Comments.empty()) 2635 return; 2636 bool ShouldPushCommentsInCurrentLine = true; 2637 bool HasTrailAlignedWithNextToken = false; 2638 unsigned StartOfTrailAlignedWithNextToken = 0; 2639 if (NextTok) { 2640 // We are skipping the first element intentionally. 2641 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2642 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2643 HasTrailAlignedWithNextToken = true; 2644 StartOfTrailAlignedWithNextToken = i; 2645 } 2646 } 2647 } 2648 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2649 FormatToken *FormatTok = Comments[i]; 2650 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2651 FormatTok->ContinuesLineCommentSection = false; 2652 } else { 2653 FormatTok->ContinuesLineCommentSection = 2654 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2655 } 2656 if (!FormatTok->ContinuesLineCommentSection && 2657 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2658 ShouldPushCommentsInCurrentLine = false; 2659 } 2660 if (ShouldPushCommentsInCurrentLine) { 2661 pushToken(FormatTok); 2662 } else { 2663 CommentsBeforeNextToken.push_back(FormatTok); 2664 } 2665 } 2666 } 2667 2668 void UnwrappedLineParser::readToken(int LevelDifference) { 2669 SmallVector<FormatToken *, 1> Comments; 2670 do { 2671 FormatTok = Tokens->getNextToken(); 2672 assert(FormatTok); 2673 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2674 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2675 distributeComments(Comments, FormatTok); 2676 Comments.clear(); 2677 // If there is an unfinished unwrapped line, we flush the preprocessor 2678 // directives only after that unwrapped line was finished later. 2679 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2680 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2681 assert((LevelDifference >= 0 || 2682 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2683 "LevelDifference makes Line->Level negative"); 2684 Line->Level += LevelDifference; 2685 // Comments stored before the preprocessor directive need to be output 2686 // before the preprocessor directive, at the same level as the 2687 // preprocessor directive, as we consider them to apply to the directive. 2688 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 2689 PPBranchLevel > 0) 2690 Line->Level += PPBranchLevel; 2691 flushComments(isOnNewLine(*FormatTok)); 2692 parsePPDirective(); 2693 } 2694 while (FormatTok->Type == TT_ConflictStart || 2695 FormatTok->Type == TT_ConflictEnd || 2696 FormatTok->Type == TT_ConflictAlternative) { 2697 if (FormatTok->Type == TT_ConflictStart) { 2698 conditionalCompilationStart(/*Unreachable=*/false); 2699 } else if (FormatTok->Type == TT_ConflictAlternative) { 2700 conditionalCompilationAlternative(); 2701 } else if (FormatTok->Type == TT_ConflictEnd) { 2702 conditionalCompilationEnd(); 2703 } 2704 FormatTok = Tokens->getNextToken(); 2705 FormatTok->MustBreakBefore = true; 2706 } 2707 2708 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2709 !Line->InPPDirective) { 2710 continue; 2711 } 2712 2713 if (!FormatTok->Tok.is(tok::comment)) { 2714 distributeComments(Comments, FormatTok); 2715 Comments.clear(); 2716 return; 2717 } 2718 2719 Comments.push_back(FormatTok); 2720 } while (!eof()); 2721 2722 distributeComments(Comments, nullptr); 2723 Comments.clear(); 2724 } 2725 2726 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2727 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2728 if (MustBreakBeforeNextToken) { 2729 Line->Tokens.back().Tok->MustBreakBefore = true; 2730 MustBreakBeforeNextToken = false; 2731 } 2732 } 2733 2734 } // end namespace format 2735 } // end namespace clang 2736