1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <algorithm> 21 22 #define DEBUG_TYPE "format-parser" 23 24 namespace clang { 25 namespace format { 26 27 class FormatTokenSource { 28 public: 29 virtual ~FormatTokenSource() {} 30 virtual FormatToken *getNextToken() = 0; 31 32 virtual unsigned getPosition() = 0; 33 virtual FormatToken *setPosition(unsigned Position) = 0; 34 }; 35 36 namespace { 37 38 class ScopedDeclarationState { 39 public: 40 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 41 bool MustBeDeclaration) 42 : Line(Line), Stack(Stack) { 43 Line.MustBeDeclaration = MustBeDeclaration; 44 Stack.push_back(MustBeDeclaration); 45 } 46 ~ScopedDeclarationState() { 47 Stack.pop_back(); 48 if (!Stack.empty()) 49 Line.MustBeDeclaration = Stack.back(); 50 else 51 Line.MustBeDeclaration = true; 52 } 53 54 private: 55 UnwrappedLine &Line; 56 std::vector<bool> &Stack; 57 }; 58 59 static bool isLineComment(const FormatToken &FormatTok) { 60 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 61 } 62 63 // Checks if \p FormatTok is a line comment that continues the line comment 64 // \p Previous. The original column of \p MinColumnToken is used to determine 65 // whether \p FormatTok is indented enough to the right to continue \p Previous. 66 static bool continuesLineComment(const FormatToken &FormatTok, 67 const FormatToken *Previous, 68 const FormatToken *MinColumnToken) { 69 if (!Previous || !MinColumnToken) 70 return false; 71 unsigned MinContinueColumn = 72 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 73 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 74 isLineComment(*Previous) && 75 FormatTok.OriginalColumn >= MinContinueColumn; 76 } 77 78 class ScopedMacroState : public FormatTokenSource { 79 public: 80 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 81 FormatToken *&ResetToken) 82 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 83 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 84 Token(nullptr), PreviousToken(nullptr) { 85 FakeEOF.Tok.startToken(); 86 FakeEOF.Tok.setKind(tok::eof); 87 TokenSource = this; 88 Line.Level = 0; 89 Line.InPPDirective = true; 90 } 91 92 ~ScopedMacroState() override { 93 TokenSource = PreviousTokenSource; 94 ResetToken = Token; 95 Line.InPPDirective = false; 96 Line.Level = PreviousLineLevel; 97 } 98 99 FormatToken *getNextToken() override { 100 // The \c UnwrappedLineParser guards against this by never calling 101 // \c getNextToken() after it has encountered the first eof token. 102 assert(!eof()); 103 PreviousToken = Token; 104 Token = PreviousTokenSource->getNextToken(); 105 if (eof()) 106 return &FakeEOF; 107 return Token; 108 } 109 110 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 111 112 FormatToken *setPosition(unsigned Position) override { 113 PreviousToken = nullptr; 114 Token = PreviousTokenSource->setPosition(Position); 115 return Token; 116 } 117 118 private: 119 bool eof() { 120 return Token && Token->HasUnescapedNewline && 121 !continuesLineComment(*Token, PreviousToken, 122 /*MinColumnToken=*/PreviousToken); 123 } 124 125 FormatToken FakeEOF; 126 UnwrappedLine &Line; 127 FormatTokenSource *&TokenSource; 128 FormatToken *&ResetToken; 129 unsigned PreviousLineLevel; 130 FormatTokenSource *PreviousTokenSource; 131 132 FormatToken *Token; 133 FormatToken *PreviousToken; 134 }; 135 136 } // end anonymous namespace 137 138 class ScopedLineState { 139 public: 140 ScopedLineState(UnwrappedLineParser &Parser, 141 bool SwitchToPreprocessorLines = false) 142 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 143 if (SwitchToPreprocessorLines) 144 Parser.CurrentLines = &Parser.PreprocessorDirectives; 145 else if (!Parser.Line->Tokens.empty()) 146 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 147 PreBlockLine = std::move(Parser.Line); 148 Parser.Line = llvm::make_unique<UnwrappedLine>(); 149 Parser.Line->Level = PreBlockLine->Level; 150 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 151 } 152 153 ~ScopedLineState() { 154 if (!Parser.Line->Tokens.empty()) { 155 Parser.addUnwrappedLine(); 156 } 157 assert(Parser.Line->Tokens.empty()); 158 Parser.Line = std::move(PreBlockLine); 159 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 160 Parser.MustBreakBeforeNextToken = true; 161 Parser.CurrentLines = OriginalLines; 162 } 163 164 private: 165 UnwrappedLineParser &Parser; 166 167 std::unique_ptr<UnwrappedLine> PreBlockLine; 168 SmallVectorImpl<UnwrappedLine> *OriginalLines; 169 }; 170 171 class CompoundStatementIndenter { 172 public: 173 CompoundStatementIndenter(UnwrappedLineParser *Parser, 174 const FormatStyle &Style, unsigned &LineLevel) 175 : CompoundStatementIndenter(Parser, LineLevel, 176 Style.BraceWrapping.AfterControlStatement, 177 Style.BraceWrapping.IndentBraces) { 178 } 179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 180 bool WrapBrace, bool IndentBrace) 181 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 182 if (WrapBrace) 183 Parser->addUnwrappedLine(); 184 if (IndentBrace) 185 ++LineLevel; 186 } 187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 188 189 private: 190 unsigned &LineLevel; 191 unsigned OldLineLevel; 192 }; 193 194 namespace { 195 196 class IndexedTokenSource : public FormatTokenSource { 197 public: 198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 199 : Tokens(Tokens), Position(-1) {} 200 201 FormatToken *getNextToken() override { 202 ++Position; 203 return Tokens[Position]; 204 } 205 206 unsigned getPosition() override { 207 assert(Position >= 0); 208 return Position; 209 } 210 211 FormatToken *setPosition(unsigned P) override { 212 Position = P; 213 return Tokens[Position]; 214 } 215 216 void reset() { Position = -1; } 217 218 private: 219 ArrayRef<FormatToken *> Tokens; 220 int Position; 221 }; 222 223 } // end anonymous namespace 224 225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 226 const AdditionalKeywords &Keywords, 227 unsigned FirstStartColumn, 228 ArrayRef<FormatToken *> Tokens, 229 UnwrappedLineConsumer &Callback) 230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 231 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 235 ? IG_Rejected 236 : IG_Inited), 237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 238 239 void UnwrappedLineParser::reset() { 240 PPBranchLevel = -1; 241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 242 ? IG_Rejected 243 : IG_Inited; 244 IncludeGuardToken = nullptr; 245 Line.reset(new UnwrappedLine); 246 CommentsBeforeNextToken.clear(); 247 FormatTok = nullptr; 248 MustBreakBeforeNextToken = false; 249 PreprocessorDirectives.clear(); 250 CurrentLines = &Lines; 251 DeclarationScopeStack.clear(); 252 PPStack.clear(); 253 Line->FirstStartColumn = FirstStartColumn; 254 } 255 256 void UnwrappedLineParser::parse() { 257 IndexedTokenSource TokenSource(AllTokens); 258 Line->FirstStartColumn = FirstStartColumn; 259 do { 260 LLVM_DEBUG(llvm::dbgs() << "----\n"); 261 reset(); 262 Tokens = &TokenSource; 263 TokenSource.reset(); 264 265 readToken(); 266 parseFile(); 267 268 // If we found an include guard then all preprocessor directives (other than 269 // the guard) are over-indented by one. 270 if (IncludeGuard == IG_Found) 271 for (auto &Line : Lines) 272 if (Line.InPPDirective && Line.Level > 0) 273 --Line.Level; 274 275 // Create line with eof token. 276 pushToken(FormatTok); 277 addUnwrappedLine(); 278 279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 280 E = Lines.end(); 281 I != E; ++I) { 282 Callback.consumeUnwrappedLine(*I); 283 } 284 Callback.finishRun(); 285 Lines.clear(); 286 while (!PPLevelBranchIndex.empty() && 287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 290 } 291 if (!PPLevelBranchIndex.empty()) { 292 ++PPLevelBranchIndex.back(); 293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 295 } 296 } while (!PPLevelBranchIndex.empty()); 297 } 298 299 void UnwrappedLineParser::parseFile() { 300 // The top-level context in a file always has declarations, except for pre- 301 // processor directives and JavaScript files. 302 bool MustBeDeclaration = 303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 305 MustBeDeclaration); 306 if (Style.Language == FormatStyle::LK_TextProto) 307 parseBracedList(); 308 else 309 parseLevel(/*HasOpeningBrace=*/false); 310 // Make sure to format the remaining tokens. 311 // 312 // LK_TextProto is special since its top-level is parsed as the body of a 313 // braced list, which does not necessarily have natural line separators such 314 // as a semicolon. Comments after the last entry that have been determined to 315 // not belong to that line, as in: 316 // key: value 317 // // endfile comment 318 // do not have a chance to be put on a line of their own until this point. 319 // Here we add this newline before end-of-file comments. 320 if (Style.Language == FormatStyle::LK_TextProto && 321 !CommentsBeforeNextToken.empty()) 322 addUnwrappedLine(); 323 flushComments(true); 324 addUnwrappedLine(); 325 } 326 327 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 328 bool SwitchLabelEncountered = false; 329 do { 330 tok::TokenKind kind = FormatTok->Tok.getKind(); 331 if (FormatTok->Type == TT_MacroBlockBegin) { 332 kind = tok::l_brace; 333 } else if (FormatTok->Type == TT_MacroBlockEnd) { 334 kind = tok::r_brace; 335 } 336 337 switch (kind) { 338 case tok::comment: 339 nextToken(); 340 addUnwrappedLine(); 341 break; 342 case tok::l_brace: 343 // FIXME: Add parameter whether this can happen - if this happens, we must 344 // be in a non-declaration context. 345 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 346 continue; 347 parseBlock(/*MustBeDeclaration=*/false); 348 addUnwrappedLine(); 349 break; 350 case tok::r_brace: 351 if (HasOpeningBrace) 352 return; 353 nextToken(); 354 addUnwrappedLine(); 355 break; 356 case tok::kw_default: { 357 unsigned StoredPosition = Tokens->getPosition(); 358 FormatToken *Next; 359 do { 360 Next = Tokens->getNextToken(); 361 } while (Next && Next->is(tok::comment)); 362 FormatTok = Tokens->setPosition(StoredPosition); 363 if (Next && Next->isNot(tok::colon)) { 364 // default not followed by ':' is not a case label; treat it like 365 // an identifier. 366 parseStructuralElement(); 367 break; 368 } 369 // Else, if it is 'default:', fall through to the case handling. 370 LLVM_FALLTHROUGH; 371 } 372 case tok::kw_case: 373 if (Style.Language == FormatStyle::LK_JavaScript && 374 Line->MustBeDeclaration) { 375 // A 'case: string' style field declaration. 376 parseStructuralElement(); 377 break; 378 } 379 if (!SwitchLabelEncountered && 380 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 381 ++Line->Level; 382 SwitchLabelEncountered = true; 383 parseStructuralElement(); 384 break; 385 default: 386 parseStructuralElement(); 387 break; 388 } 389 } while (!eof()); 390 } 391 392 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 393 // We'll parse forward through the tokens until we hit 394 // a closing brace or eof - note that getNextToken() will 395 // parse macros, so this will magically work inside macro 396 // definitions, too. 397 unsigned StoredPosition = Tokens->getPosition(); 398 FormatToken *Tok = FormatTok; 399 const FormatToken *PrevTok = Tok->Previous; 400 // Keep a stack of positions of lbrace tokens. We will 401 // update information about whether an lbrace starts a 402 // braced init list or a different block during the loop. 403 SmallVector<FormatToken *, 8> LBraceStack; 404 assert(Tok->Tok.is(tok::l_brace)); 405 do { 406 // Get next non-comment token. 407 FormatToken *NextTok; 408 unsigned ReadTokens = 0; 409 do { 410 NextTok = Tokens->getNextToken(); 411 ++ReadTokens; 412 } while (NextTok->is(tok::comment)); 413 414 switch (Tok->Tok.getKind()) { 415 case tok::l_brace: 416 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { 417 if (PrevTok->isOneOf(tok::colon, tok::less)) 418 // A ':' indicates this code is in a type, or a braced list 419 // following a label in an object literal ({a: {b: 1}}). 420 // A '<' could be an object used in a comparison, but that is nonsense 421 // code (can never return true), so more likely it is a generic type 422 // argument (`X<{a: string; b: number}>`). 423 // The code below could be confused by semicolons between the 424 // individual members in a type member list, which would normally 425 // trigger BK_Block. In both cases, this must be parsed as an inline 426 // braced init. 427 Tok->BlockKind = BK_BracedInit; 428 else if (PrevTok->is(tok::r_paren)) 429 // `) { }` can only occur in function or method declarations in JS. 430 Tok->BlockKind = BK_Block; 431 } else { 432 Tok->BlockKind = BK_Unknown; 433 } 434 LBraceStack.push_back(Tok); 435 break; 436 case tok::r_brace: 437 if (LBraceStack.empty()) 438 break; 439 if (LBraceStack.back()->BlockKind == BK_Unknown) { 440 bool ProbablyBracedList = false; 441 if (Style.Language == FormatStyle::LK_Proto) { 442 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 443 } else { 444 // Using OriginalColumn to distinguish between ObjC methods and 445 // binary operators is a bit hacky. 446 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 447 NextTok->OriginalColumn == 0; 448 449 // If there is a comma, semicolon or right paren after the closing 450 // brace, we assume this is a braced initializer list. Note that 451 // regardless how we mark inner braces here, we will overwrite the 452 // BlockKind later if we parse a braced list (where all blocks 453 // inside are by default braced lists), or when we explicitly detect 454 // blocks (for example while parsing lambdas). 455 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 456 // braced list in JS. 457 ProbablyBracedList = 458 (Style.Language == FormatStyle::LK_JavaScript && 459 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 460 Keywords.kw_as)) || 461 (Style.isCpp() && NextTok->is(tok::l_paren)) || 462 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 463 tok::r_paren, tok::r_square, tok::l_brace, 464 tok::ellipsis) || 465 (NextTok->is(tok::identifier) && 466 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 467 (NextTok->is(tok::semi) && 468 (!ExpectClassBody || LBraceStack.size() != 1)) || 469 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 470 if (NextTok->is(tok::l_square)) { 471 // We can have an array subscript after a braced init 472 // list, but C++11 attributes are expected after blocks. 473 NextTok = Tokens->getNextToken(); 474 ++ReadTokens; 475 ProbablyBracedList = NextTok->isNot(tok::l_square); 476 } 477 } 478 if (ProbablyBracedList) { 479 Tok->BlockKind = BK_BracedInit; 480 LBraceStack.back()->BlockKind = BK_BracedInit; 481 } else { 482 Tok->BlockKind = BK_Block; 483 LBraceStack.back()->BlockKind = BK_Block; 484 } 485 } 486 LBraceStack.pop_back(); 487 break; 488 case tok::identifier: 489 if (!Tok->is(TT_StatementMacro)) 490 break; 491 LLVM_FALLTHROUGH; 492 case tok::at: 493 case tok::semi: 494 case tok::kw_if: 495 case tok::kw_while: 496 case tok::kw_for: 497 case tok::kw_switch: 498 case tok::kw_try: 499 case tok::kw___try: 500 if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) 501 LBraceStack.back()->BlockKind = BK_Block; 502 break; 503 default: 504 break; 505 } 506 PrevTok = Tok; 507 Tok = NextTok; 508 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 509 510 // Assume other blocks for all unclosed opening braces. 511 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 512 if (LBraceStack[i]->BlockKind == BK_Unknown) 513 LBraceStack[i]->BlockKind = BK_Block; 514 } 515 516 FormatTok = Tokens->setPosition(StoredPosition); 517 } 518 519 template <class T> 520 static inline void hash_combine(std::size_t &seed, const T &v) { 521 std::hash<T> hasher; 522 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 523 } 524 525 size_t UnwrappedLineParser::computePPHash() const { 526 size_t h = 0; 527 for (const auto &i : PPStack) { 528 hash_combine(h, size_t(i.Kind)); 529 hash_combine(h, i.Line); 530 } 531 return h; 532 } 533 534 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 535 bool MunchSemi) { 536 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 537 "'{' or macro block token expected"); 538 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 539 FormatTok->BlockKind = BK_Block; 540 541 size_t PPStartHash = computePPHash(); 542 543 unsigned InitialLevel = Line->Level; 544 nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); 545 546 if (MacroBlock && FormatTok->is(tok::l_paren)) 547 parseParens(); 548 549 size_t NbPreprocessorDirectives = 550 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 551 addUnwrappedLine(); 552 size_t OpeningLineIndex = 553 CurrentLines->empty() 554 ? (UnwrappedLine::kInvalidIndex) 555 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 556 557 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 558 MustBeDeclaration); 559 if (AddLevel) 560 ++Line->Level; 561 parseLevel(/*HasOpeningBrace=*/true); 562 563 if (eof()) 564 return; 565 566 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 567 : !FormatTok->is(tok::r_brace)) { 568 Line->Level = InitialLevel; 569 FormatTok->BlockKind = BK_Block; 570 return; 571 } 572 573 size_t PPEndHash = computePPHash(); 574 575 // Munch the closing brace. 576 nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); 577 578 if (MacroBlock && FormatTok->is(tok::l_paren)) 579 parseParens(); 580 581 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 582 nextToken(); 583 Line->Level = InitialLevel; 584 585 if (PPStartHash == PPEndHash) { 586 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 587 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 588 // Update the opening line to add the forward reference as well 589 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 590 CurrentLines->size() - 1; 591 } 592 } 593 } 594 595 static bool isGoogScope(const UnwrappedLine &Line) { 596 // FIXME: Closure-library specific stuff should not be hard-coded but be 597 // configurable. 598 if (Line.Tokens.size() < 4) 599 return false; 600 auto I = Line.Tokens.begin(); 601 if (I->Tok->TokenText != "goog") 602 return false; 603 ++I; 604 if (I->Tok->isNot(tok::period)) 605 return false; 606 ++I; 607 if (I->Tok->TokenText != "scope") 608 return false; 609 ++I; 610 return I->Tok->is(tok::l_paren); 611 } 612 613 static bool isIIFE(const UnwrappedLine &Line, 614 const AdditionalKeywords &Keywords) { 615 // Look for the start of an immediately invoked anonymous function. 616 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 617 // This is commonly done in JavaScript to create a new, anonymous scope. 618 // Example: (function() { ... })() 619 if (Line.Tokens.size() < 3) 620 return false; 621 auto I = Line.Tokens.begin(); 622 if (I->Tok->isNot(tok::l_paren)) 623 return false; 624 ++I; 625 if (I->Tok->isNot(Keywords.kw_function)) 626 return false; 627 ++I; 628 return I->Tok->is(tok::l_paren); 629 } 630 631 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 632 const FormatToken &InitialToken) { 633 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 634 return Style.BraceWrapping.AfterNamespace; 635 if (InitialToken.is(tok::kw_class)) 636 return Style.BraceWrapping.AfterClass; 637 if (InitialToken.is(tok::kw_union)) 638 return Style.BraceWrapping.AfterUnion; 639 if (InitialToken.is(tok::kw_struct)) 640 return Style.BraceWrapping.AfterStruct; 641 return false; 642 } 643 644 void UnwrappedLineParser::parseChildBlock() { 645 FormatTok->BlockKind = BK_Block; 646 nextToken(); 647 { 648 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && 649 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 650 ScopedLineState LineState(*this); 651 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 652 /*MustBeDeclaration=*/false); 653 Line->Level += SkipIndent ? 0 : 1; 654 parseLevel(/*HasOpeningBrace=*/true); 655 flushComments(isOnNewLine(*FormatTok)); 656 Line->Level -= SkipIndent ? 0 : 1; 657 } 658 nextToken(); 659 } 660 661 void UnwrappedLineParser::parsePPDirective() { 662 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 663 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 664 665 nextToken(); 666 667 if (!FormatTok->Tok.getIdentifierInfo()) { 668 parsePPUnknown(); 669 return; 670 } 671 672 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 673 case tok::pp_define: 674 parsePPDefine(); 675 return; 676 case tok::pp_if: 677 parsePPIf(/*IfDef=*/false); 678 break; 679 case tok::pp_ifdef: 680 case tok::pp_ifndef: 681 parsePPIf(/*IfDef=*/true); 682 break; 683 case tok::pp_else: 684 parsePPElse(); 685 break; 686 case tok::pp_elif: 687 parsePPElIf(); 688 break; 689 case tok::pp_endif: 690 parsePPEndIf(); 691 break; 692 default: 693 parsePPUnknown(); 694 break; 695 } 696 } 697 698 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 699 size_t Line = CurrentLines->size(); 700 if (CurrentLines == &PreprocessorDirectives) 701 Line += Lines.size(); 702 703 if (Unreachable || 704 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 705 PPStack.push_back({PP_Unreachable, Line}); 706 else 707 PPStack.push_back({PP_Conditional, Line}); 708 } 709 710 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 711 ++PPBranchLevel; 712 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 713 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 714 PPLevelBranchIndex.push_back(0); 715 PPLevelBranchCount.push_back(0); 716 } 717 PPChainBranchIndex.push(0); 718 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 719 conditionalCompilationCondition(Unreachable || Skip); 720 } 721 722 void UnwrappedLineParser::conditionalCompilationAlternative() { 723 if (!PPStack.empty()) 724 PPStack.pop_back(); 725 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 726 if (!PPChainBranchIndex.empty()) 727 ++PPChainBranchIndex.top(); 728 conditionalCompilationCondition( 729 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 730 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 731 } 732 733 void UnwrappedLineParser::conditionalCompilationEnd() { 734 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 735 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 736 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 737 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 738 } 739 } 740 // Guard against #endif's without #if. 741 if (PPBranchLevel > -1) 742 --PPBranchLevel; 743 if (!PPChainBranchIndex.empty()) 744 PPChainBranchIndex.pop(); 745 if (!PPStack.empty()) 746 PPStack.pop_back(); 747 } 748 749 void UnwrappedLineParser::parsePPIf(bool IfDef) { 750 bool IfNDef = FormatTok->is(tok::pp_ifndef); 751 nextToken(); 752 bool Unreachable = false; 753 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 754 Unreachable = true; 755 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 756 Unreachable = true; 757 conditionalCompilationStart(Unreachable); 758 FormatToken *IfCondition = FormatTok; 759 // If there's a #ifndef on the first line, and the only lines before it are 760 // comments, it could be an include guard. 761 bool MaybeIncludeGuard = IfNDef; 762 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 763 for (auto &Line : Lines) { 764 if (!Line.Tokens.front().Tok->is(tok::comment)) { 765 MaybeIncludeGuard = false; 766 IncludeGuard = IG_Rejected; 767 break; 768 } 769 } 770 --PPBranchLevel; 771 parsePPUnknown(); 772 ++PPBranchLevel; 773 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 774 IncludeGuard = IG_IfNdefed; 775 IncludeGuardToken = IfCondition; 776 } 777 } 778 779 void UnwrappedLineParser::parsePPElse() { 780 // If a potential include guard has an #else, it's not an include guard. 781 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 782 IncludeGuard = IG_Rejected; 783 conditionalCompilationAlternative(); 784 if (PPBranchLevel > -1) 785 --PPBranchLevel; 786 parsePPUnknown(); 787 ++PPBranchLevel; 788 } 789 790 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 791 792 void UnwrappedLineParser::parsePPEndIf() { 793 conditionalCompilationEnd(); 794 parsePPUnknown(); 795 // If the #endif of a potential include guard is the last thing in the file, 796 // then we found an include guard. 797 unsigned TokenPosition = Tokens->getPosition(); 798 FormatToken *PeekNext = AllTokens[TokenPosition]; 799 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && 800 PeekNext->is(tok::eof) && 801 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 802 IncludeGuard = IG_Found; 803 } 804 805 void UnwrappedLineParser::parsePPDefine() { 806 nextToken(); 807 808 if (!FormatTok->Tok.getIdentifierInfo()) { 809 IncludeGuard = IG_Rejected; 810 IncludeGuardToken = nullptr; 811 parsePPUnknown(); 812 return; 813 } 814 815 if (IncludeGuard == IG_IfNdefed && 816 IncludeGuardToken->TokenText == FormatTok->TokenText) { 817 IncludeGuard = IG_Defined; 818 IncludeGuardToken = nullptr; 819 for (auto &Line : Lines) { 820 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 821 IncludeGuard = IG_Rejected; 822 break; 823 } 824 } 825 } 826 827 nextToken(); 828 if (FormatTok->Tok.getKind() == tok::l_paren && 829 FormatTok->WhitespaceRange.getBegin() == 830 FormatTok->WhitespaceRange.getEnd()) { 831 parseParens(); 832 } 833 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 834 Line->Level += PPBranchLevel + 1; 835 addUnwrappedLine(); 836 ++Line->Level; 837 838 // Errors during a preprocessor directive can only affect the layout of the 839 // preprocessor directive, and thus we ignore them. An alternative approach 840 // would be to use the same approach we use on the file level (no 841 // re-indentation if there was a structural error) within the macro 842 // definition. 843 parseFile(); 844 } 845 846 void UnwrappedLineParser::parsePPUnknown() { 847 do { 848 nextToken(); 849 } while (!eof()); 850 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 851 Line->Level += PPBranchLevel + 1; 852 addUnwrappedLine(); 853 } 854 855 // Here we blacklist certain tokens that are not usually the first token in an 856 // unwrapped line. This is used in attempt to distinguish macro calls without 857 // trailing semicolons from other constructs split to several lines. 858 static bool tokenCanStartNewLine(const clang::Token &Tok) { 859 // Semicolon can be a null-statement, l_square can be a start of a macro or 860 // a C++11 attribute, but this doesn't seem to be common. 861 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 862 Tok.isNot(tok::l_square) && 863 // Tokens that can only be used as binary operators and a part of 864 // overloaded operator names. 865 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 866 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 867 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 868 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 869 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 870 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 871 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 872 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 873 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 874 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 875 Tok.isNot(tok::lesslessequal) && 876 // Colon is used in labels, base class lists, initializer lists, 877 // range-based for loops, ternary operator, but should never be the 878 // first token in an unwrapped line. 879 Tok.isNot(tok::colon) && 880 // 'noexcept' is a trailing annotation. 881 Tok.isNot(tok::kw_noexcept); 882 } 883 884 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 885 const FormatToken *FormatTok) { 886 // FIXME: This returns true for C/C++ keywords like 'struct'. 887 return FormatTok->is(tok::identifier) && 888 (FormatTok->Tok.getIdentifierInfo() == nullptr || 889 !FormatTok->isOneOf( 890 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 891 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 892 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 893 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 894 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 895 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, 896 Keywords.kw_from)); 897 } 898 899 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 900 const FormatToken *FormatTok) { 901 return FormatTok->Tok.isLiteral() || 902 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 903 mustBeJSIdent(Keywords, FormatTok); 904 } 905 906 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 907 // when encountered after a value (see mustBeJSIdentOrValue). 908 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 909 const FormatToken *FormatTok) { 910 return FormatTok->isOneOf( 911 tok::kw_return, Keywords.kw_yield, 912 // conditionals 913 tok::kw_if, tok::kw_else, 914 // loops 915 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 916 // switch/case 917 tok::kw_switch, tok::kw_case, 918 // exceptions 919 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 920 // declaration 921 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 922 Keywords.kw_async, Keywords.kw_function, 923 // import/export 924 Keywords.kw_import, tok::kw_export); 925 } 926 927 // readTokenWithJavaScriptASI reads the next token and terminates the current 928 // line if JavaScript Automatic Semicolon Insertion must 929 // happen between the current token and the next token. 930 // 931 // This method is conservative - it cannot cover all edge cases of JavaScript, 932 // but only aims to correctly handle certain well known cases. It *must not* 933 // return true in speculative cases. 934 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 935 FormatToken *Previous = FormatTok; 936 readToken(); 937 FormatToken *Next = FormatTok; 938 939 bool IsOnSameLine = 940 CommentsBeforeNextToken.empty() 941 ? Next->NewlinesBefore == 0 942 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 943 if (IsOnSameLine) 944 return; 945 946 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 947 bool PreviousStartsTemplateExpr = 948 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 949 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 950 // If the line contains an '@' sign, the previous token might be an 951 // annotation, which can precede another identifier/value. 952 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), 953 [](UnwrappedLineNode &LineNode) { 954 return LineNode.Tok->is(tok::at); 955 }) != Line->Tokens.end(); 956 if (HasAt) 957 return; 958 } 959 if (Next->is(tok::exclaim) && PreviousMustBeValue) 960 return addUnwrappedLine(); 961 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 962 bool NextEndsTemplateExpr = 963 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 964 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 965 (PreviousMustBeValue || 966 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 967 tok::minusminus))) 968 return addUnwrappedLine(); 969 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 970 isJSDeclOrStmt(Keywords, Next)) 971 return addUnwrappedLine(); 972 } 973 974 void UnwrappedLineParser::parseStructuralElement() { 975 assert(!FormatTok->is(tok::l_brace)); 976 if (Style.Language == FormatStyle::LK_TableGen && 977 FormatTok->is(tok::pp_include)) { 978 nextToken(); 979 if (FormatTok->is(tok::string_literal)) 980 nextToken(); 981 addUnwrappedLine(); 982 return; 983 } 984 switch (FormatTok->Tok.getKind()) { 985 case tok::kw_asm: 986 nextToken(); 987 if (FormatTok->is(tok::l_brace)) { 988 FormatTok->Type = TT_InlineASMBrace; 989 nextToken(); 990 while (FormatTok && FormatTok->isNot(tok::eof)) { 991 if (FormatTok->is(tok::r_brace)) { 992 FormatTok->Type = TT_InlineASMBrace; 993 nextToken(); 994 addUnwrappedLine(); 995 break; 996 } 997 FormatTok->Finalized = true; 998 nextToken(); 999 } 1000 } 1001 break; 1002 case tok::kw_namespace: 1003 parseNamespace(); 1004 return; 1005 case tok::kw_public: 1006 case tok::kw_protected: 1007 case tok::kw_private: 1008 if (Style.Language == FormatStyle::LK_Java || 1009 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) 1010 nextToken(); 1011 else 1012 parseAccessSpecifier(); 1013 return; 1014 case tok::kw_if: 1015 parseIfThenElse(); 1016 return; 1017 case tok::kw_for: 1018 case tok::kw_while: 1019 parseForOrWhileLoop(); 1020 return; 1021 case tok::kw_do: 1022 parseDoWhile(); 1023 return; 1024 case tok::kw_switch: 1025 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1026 // 'switch: string' field declaration. 1027 break; 1028 parseSwitch(); 1029 return; 1030 case tok::kw_default: 1031 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1032 // 'default: string' field declaration. 1033 break; 1034 nextToken(); 1035 if (FormatTok->is(tok::colon)) { 1036 parseLabel(); 1037 return; 1038 } 1039 // e.g. "default void f() {}" in a Java interface. 1040 break; 1041 case tok::kw_case: 1042 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) 1043 // 'case: string' field declaration. 1044 break; 1045 parseCaseLabel(); 1046 return; 1047 case tok::kw_try: 1048 case tok::kw___try: 1049 parseTryCatch(); 1050 return; 1051 case tok::kw_extern: 1052 nextToken(); 1053 if (FormatTok->Tok.is(tok::string_literal)) { 1054 nextToken(); 1055 if (FormatTok->Tok.is(tok::l_brace)) { 1056 if (Style.BraceWrapping.AfterExternBlock) { 1057 addUnwrappedLine(); 1058 parseBlock(/*MustBeDeclaration=*/true); 1059 } else { 1060 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 1061 } 1062 addUnwrappedLine(); 1063 return; 1064 } 1065 } 1066 break; 1067 case tok::kw_export: 1068 if (Style.Language == FormatStyle::LK_JavaScript) { 1069 parseJavaScriptEs6ImportExport(); 1070 return; 1071 } 1072 if (!Style.isCpp()) 1073 break; 1074 // Handle C++ "(inline|export) namespace". 1075 LLVM_FALLTHROUGH; 1076 case tok::kw_inline: 1077 nextToken(); 1078 if (FormatTok->Tok.is(tok::kw_namespace)) { 1079 parseNamespace(); 1080 return; 1081 } 1082 break; 1083 case tok::identifier: 1084 if (FormatTok->is(TT_ForEachMacro)) { 1085 parseForOrWhileLoop(); 1086 return; 1087 } 1088 if (FormatTok->is(TT_MacroBlockBegin)) { 1089 parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, 1090 /*MunchSemi=*/false); 1091 return; 1092 } 1093 if (FormatTok->is(Keywords.kw_import)) { 1094 if (Style.Language == FormatStyle::LK_JavaScript) { 1095 parseJavaScriptEs6ImportExport(); 1096 return; 1097 } 1098 if (Style.Language == FormatStyle::LK_Proto) { 1099 nextToken(); 1100 if (FormatTok->is(tok::kw_public)) 1101 nextToken(); 1102 if (!FormatTok->is(tok::string_literal)) 1103 return; 1104 nextToken(); 1105 if (FormatTok->is(tok::semi)) 1106 nextToken(); 1107 addUnwrappedLine(); 1108 return; 1109 } 1110 } 1111 if (Style.isCpp() && 1112 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1113 Keywords.kw_slots, Keywords.kw_qslots)) { 1114 nextToken(); 1115 if (FormatTok->is(tok::colon)) { 1116 nextToken(); 1117 addUnwrappedLine(); 1118 return; 1119 } 1120 } 1121 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1122 parseStatementMacro(); 1123 return; 1124 } 1125 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1126 parseNamespace(); 1127 return; 1128 } 1129 // In all other cases, parse the declaration. 1130 break; 1131 default: 1132 break; 1133 } 1134 do { 1135 const FormatToken *Previous = FormatTok->Previous; 1136 switch (FormatTok->Tok.getKind()) { 1137 case tok::at: 1138 nextToken(); 1139 if (FormatTok->Tok.is(tok::l_brace)) { 1140 nextToken(); 1141 parseBracedList(); 1142 break; 1143 } else if (Style.Language == FormatStyle::LK_Java && 1144 FormatTok->is(Keywords.kw_interface)) { 1145 nextToken(); 1146 break; 1147 } 1148 switch (FormatTok->Tok.getObjCKeywordID()) { 1149 case tok::objc_public: 1150 case tok::objc_protected: 1151 case tok::objc_package: 1152 case tok::objc_private: 1153 return parseAccessSpecifier(); 1154 case tok::objc_interface: 1155 case tok::objc_implementation: 1156 return parseObjCInterfaceOrImplementation(); 1157 case tok::objc_protocol: 1158 if (parseObjCProtocol()) 1159 return; 1160 break; 1161 case tok::objc_end: 1162 return; // Handled by the caller. 1163 case tok::objc_optional: 1164 case tok::objc_required: 1165 nextToken(); 1166 addUnwrappedLine(); 1167 return; 1168 case tok::objc_autoreleasepool: 1169 nextToken(); 1170 if (FormatTok->Tok.is(tok::l_brace)) { 1171 if (Style.BraceWrapping.AfterControlStatement) 1172 addUnwrappedLine(); 1173 parseBlock(/*MustBeDeclaration=*/false); 1174 } 1175 addUnwrappedLine(); 1176 return; 1177 case tok::objc_synchronized: 1178 nextToken(); 1179 if (FormatTok->Tok.is(tok::l_paren)) 1180 // Skip synchronization object 1181 parseParens(); 1182 if (FormatTok->Tok.is(tok::l_brace)) { 1183 if (Style.BraceWrapping.AfterControlStatement) 1184 addUnwrappedLine(); 1185 parseBlock(/*MustBeDeclaration=*/false); 1186 } 1187 addUnwrappedLine(); 1188 return; 1189 case tok::objc_try: 1190 // This branch isn't strictly necessary (the kw_try case below would 1191 // do this too after the tok::at is parsed above). But be explicit. 1192 parseTryCatch(); 1193 return; 1194 default: 1195 break; 1196 } 1197 break; 1198 case tok::kw_enum: 1199 // Ignore if this is part of "template <enum ...". 1200 if (Previous && Previous->is(tok::less)) { 1201 nextToken(); 1202 break; 1203 } 1204 1205 // parseEnum falls through and does not yet add an unwrapped line as an 1206 // enum definition can start a structural element. 1207 if (!parseEnum()) 1208 break; 1209 // This only applies for C++. 1210 if (!Style.isCpp()) { 1211 addUnwrappedLine(); 1212 return; 1213 } 1214 break; 1215 case tok::kw_typedef: 1216 nextToken(); 1217 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1218 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1219 Keywords.kw_CF_CLOSED_ENUM, Keywords.kw_NS_CLOSED_ENUM)) 1220 parseEnum(); 1221 break; 1222 case tok::kw_struct: 1223 case tok::kw_union: 1224 case tok::kw_class: 1225 // parseRecord falls through and does not yet add an unwrapped line as a 1226 // record declaration or definition can start a structural element. 1227 parseRecord(); 1228 // This does not apply for Java, JavaScript and C#. 1229 if (Style.Language == FormatStyle::LK_Java || 1230 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { 1231 if (FormatTok->is(tok::semi)) 1232 nextToken(); 1233 addUnwrappedLine(); 1234 return; 1235 } 1236 break; 1237 case tok::period: 1238 nextToken(); 1239 // In Java, classes have an implicit static member "class". 1240 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1241 FormatTok->is(tok::kw_class)) 1242 nextToken(); 1243 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && 1244 FormatTok->Tok.getIdentifierInfo()) 1245 // JavaScript only has pseudo keywords, all keywords are allowed to 1246 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1247 nextToken(); 1248 break; 1249 case tok::semi: 1250 nextToken(); 1251 addUnwrappedLine(); 1252 return; 1253 case tok::r_brace: 1254 addUnwrappedLine(); 1255 return; 1256 case tok::l_paren: 1257 parseParens(); 1258 break; 1259 case tok::kw_operator: 1260 nextToken(); 1261 if (FormatTok->isBinaryOperator()) 1262 nextToken(); 1263 break; 1264 case tok::caret: 1265 nextToken(); 1266 if (FormatTok->Tok.isAnyIdentifier() || 1267 FormatTok->isSimpleTypeSpecifier()) 1268 nextToken(); 1269 if (FormatTok->is(tok::l_paren)) 1270 parseParens(); 1271 if (FormatTok->is(tok::l_brace)) 1272 parseChildBlock(); 1273 break; 1274 case tok::l_brace: 1275 if (!tryToParseBracedList()) { 1276 // A block outside of parentheses must be the last part of a 1277 // structural element. 1278 // FIXME: Figure out cases where this is not true, and add projections 1279 // for them (the one we know is missing are lambdas). 1280 if (Style.BraceWrapping.AfterFunction) 1281 addUnwrappedLine(); 1282 FormatTok->Type = TT_FunctionLBrace; 1283 parseBlock(/*MustBeDeclaration=*/false); 1284 addUnwrappedLine(); 1285 return; 1286 } 1287 // Otherwise this was a braced init list, and the structural 1288 // element continues. 1289 break; 1290 case tok::kw_try: 1291 // We arrive here when parsing function-try blocks. 1292 if (Style.BraceWrapping.AfterFunction) 1293 addUnwrappedLine(); 1294 parseTryCatch(); 1295 return; 1296 case tok::identifier: { 1297 if (FormatTok->is(TT_MacroBlockEnd)) { 1298 addUnwrappedLine(); 1299 return; 1300 } 1301 1302 // Function declarations (as opposed to function expressions) are parsed 1303 // on their own unwrapped line by continuing this loop. Function 1304 // expressions (functions that are not on their own line) must not create 1305 // a new unwrapped line, so they are special cased below. 1306 size_t TokenCount = Line->Tokens.size(); 1307 if (Style.Language == FormatStyle::LK_JavaScript && 1308 FormatTok->is(Keywords.kw_function) && 1309 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1310 Keywords.kw_async)))) { 1311 tryToParseJSFunction(); 1312 break; 1313 } 1314 if ((Style.Language == FormatStyle::LK_JavaScript || 1315 Style.Language == FormatStyle::LK_Java) && 1316 FormatTok->is(Keywords.kw_interface)) { 1317 if (Style.Language == FormatStyle::LK_JavaScript) { 1318 // In JavaScript/TypeScript, "interface" can be used as a standalone 1319 // identifier, e.g. in `var interface = 1;`. If "interface" is 1320 // followed by another identifier, it is very like to be an actual 1321 // interface declaration. 1322 unsigned StoredPosition = Tokens->getPosition(); 1323 FormatToken *Next = Tokens->getNextToken(); 1324 FormatTok = Tokens->setPosition(StoredPosition); 1325 if (Next && !mustBeJSIdent(Keywords, Next)) { 1326 nextToken(); 1327 break; 1328 } 1329 } 1330 parseRecord(); 1331 addUnwrappedLine(); 1332 return; 1333 } 1334 1335 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1336 parseStatementMacro(); 1337 return; 1338 } 1339 1340 // See if the following token should start a new unwrapped line. 1341 StringRef Text = FormatTok->TokenText; 1342 nextToken(); 1343 1344 // JS doesn't have macros, and within classes colons indicate fields, not 1345 // labels. 1346 if (Style.Language == FormatStyle::LK_JavaScript) 1347 break; 1348 1349 TokenCount = Line->Tokens.size(); 1350 if (TokenCount == 1 || 1351 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1352 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1353 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1354 parseLabel(); 1355 return; 1356 } 1357 // Recognize function-like macro usages without trailing semicolon as 1358 // well as free-standing macros like Q_OBJECT. 1359 bool FunctionLike = FormatTok->is(tok::l_paren); 1360 if (FunctionLike) 1361 parseParens(); 1362 1363 bool FollowedByNewline = 1364 CommentsBeforeNextToken.empty() 1365 ? FormatTok->NewlinesBefore > 0 1366 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1367 1368 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1369 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 1370 addUnwrappedLine(); 1371 return; 1372 } 1373 } 1374 break; 1375 } 1376 case tok::equal: 1377 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType 1378 // TT_JsFatArrow. The always start an expression or a child block if 1379 // followed by a curly. 1380 if (FormatTok->is(TT_JsFatArrow)) { 1381 nextToken(); 1382 if (FormatTok->is(tok::l_brace)) 1383 parseChildBlock(); 1384 break; 1385 } 1386 1387 nextToken(); 1388 if (FormatTok->Tok.is(tok::l_brace)) { 1389 nextToken(); 1390 parseBracedList(); 1391 } else if (Style.Language == FormatStyle::LK_Proto && 1392 FormatTok->Tok.is(tok::less)) { 1393 nextToken(); 1394 parseBracedList(/*ContinueOnSemicolons=*/false, 1395 /*ClosingBraceKind=*/tok::greater); 1396 } 1397 break; 1398 case tok::l_square: 1399 parseSquare(); 1400 break; 1401 case tok::kw_new: 1402 parseNew(); 1403 break; 1404 default: 1405 nextToken(); 1406 break; 1407 } 1408 } while (!eof()); 1409 } 1410 1411 bool UnwrappedLineParser::tryToParseLambda() { 1412 if (!Style.isCpp()) { 1413 nextToken(); 1414 return false; 1415 } 1416 assert(FormatTok->is(tok::l_square)); 1417 FormatToken &LSquare = *FormatTok; 1418 if (!tryToParseLambdaIntroducer()) 1419 return false; 1420 1421 bool SeenArrow = false; 1422 1423 while (FormatTok->isNot(tok::l_brace)) { 1424 if (FormatTok->isSimpleTypeSpecifier()) { 1425 nextToken(); 1426 continue; 1427 } 1428 switch (FormatTok->Tok.getKind()) { 1429 case tok::l_brace: 1430 break; 1431 case tok::l_paren: 1432 parseParens(); 1433 break; 1434 case tok::amp: 1435 case tok::star: 1436 case tok::kw_const: 1437 case tok::comma: 1438 case tok::less: 1439 case tok::greater: 1440 case tok::identifier: 1441 case tok::numeric_constant: 1442 case tok::coloncolon: 1443 case tok::kw_mutable: 1444 case tok::kw_noexcept: 1445 nextToken(); 1446 break; 1447 // Specialization of a template with an integer parameter can contain 1448 // arithmetic, logical, comparison and ternary operators. 1449 // 1450 // FIXME: This also accepts sequences of operators that are not in the scope 1451 // of a template argument list. 1452 // 1453 // In a C++ lambda a template type can only occur after an arrow. We use 1454 // this as an heuristic to distinguish between Objective-C expressions 1455 // followed by an `a->b` expression, such as: 1456 // ([obj func:arg] + a->b) 1457 // Otherwise the code below would parse as a lambda. 1458 case tok::plus: 1459 case tok::minus: 1460 case tok::exclaim: 1461 case tok::tilde: 1462 case tok::slash: 1463 case tok::percent: 1464 case tok::lessless: 1465 case tok::pipe: 1466 case tok::pipepipe: 1467 case tok::ampamp: 1468 case tok::caret: 1469 case tok::equalequal: 1470 case tok::exclaimequal: 1471 case tok::greaterequal: 1472 case tok::lessequal: 1473 case tok::question: 1474 case tok::colon: 1475 case tok::kw_true: 1476 case tok::kw_false: 1477 if (SeenArrow) { 1478 nextToken(); 1479 break; 1480 } 1481 return true; 1482 case tok::arrow: 1483 // This might or might not actually be a lambda arrow (this could be an 1484 // ObjC method invocation followed by a dereferencing arrow). We might 1485 // reset this back to TT_Unknown in TokenAnnotator. 1486 FormatTok->Type = TT_LambdaArrow; 1487 SeenArrow = true; 1488 nextToken(); 1489 break; 1490 default: 1491 return true; 1492 } 1493 } 1494 FormatTok->Type = TT_LambdaLBrace; 1495 LSquare.Type = TT_LambdaLSquare; 1496 parseChildBlock(); 1497 return true; 1498 } 1499 1500 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1501 const FormatToken *Previous = FormatTok->Previous; 1502 if (Previous && 1503 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1504 tok::kw_delete, tok::l_square) || 1505 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1506 Previous->isSimpleTypeSpecifier())) { 1507 nextToken(); 1508 return false; 1509 } 1510 nextToken(); 1511 if (FormatTok->is(tok::l_square)) { 1512 return false; 1513 } 1514 parseSquare(/*LambdaIntroducer=*/true); 1515 return true; 1516 } 1517 1518 void UnwrappedLineParser::tryToParseJSFunction() { 1519 assert(FormatTok->is(Keywords.kw_function) || 1520 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1521 if (FormatTok->is(Keywords.kw_async)) 1522 nextToken(); 1523 // Consume "function". 1524 nextToken(); 1525 1526 // Consume * (generator function). Treat it like C++'s overloaded operators. 1527 if (FormatTok->is(tok::star)) { 1528 FormatTok->Type = TT_OverloadedOperator; 1529 nextToken(); 1530 } 1531 1532 // Consume function name. 1533 if (FormatTok->is(tok::identifier)) 1534 nextToken(); 1535 1536 if (FormatTok->isNot(tok::l_paren)) 1537 return; 1538 1539 // Parse formal parameter list. 1540 parseParens(); 1541 1542 if (FormatTok->is(tok::colon)) { 1543 // Parse a type definition. 1544 nextToken(); 1545 1546 // Eat the type declaration. For braced inline object types, balance braces, 1547 // otherwise just parse until finding an l_brace for the function body. 1548 if (FormatTok->is(tok::l_brace)) 1549 tryToParseBracedList(); 1550 else 1551 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1552 nextToken(); 1553 } 1554 1555 if (FormatTok->is(tok::semi)) 1556 return; 1557 1558 parseChildBlock(); 1559 } 1560 1561 bool UnwrappedLineParser::tryToParseBracedList() { 1562 if (FormatTok->BlockKind == BK_Unknown) 1563 calculateBraceTypes(); 1564 assert(FormatTok->BlockKind != BK_Unknown); 1565 if (FormatTok->BlockKind == BK_Block) 1566 return false; 1567 nextToken(); 1568 parseBracedList(); 1569 return true; 1570 } 1571 1572 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1573 tok::TokenKind ClosingBraceKind) { 1574 bool HasError = false; 1575 1576 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1577 // replace this by using parseAssigmentExpression() inside. 1578 do { 1579 if (Style.Language == FormatStyle::LK_JavaScript) { 1580 if (FormatTok->is(Keywords.kw_function) || 1581 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1582 tryToParseJSFunction(); 1583 continue; 1584 } 1585 if (FormatTok->is(TT_JsFatArrow)) { 1586 nextToken(); 1587 // Fat arrows can be followed by simple expressions or by child blocks 1588 // in curly braces. 1589 if (FormatTok->is(tok::l_brace)) { 1590 parseChildBlock(); 1591 continue; 1592 } 1593 } 1594 if (FormatTok->is(tok::l_brace)) { 1595 // Could be a method inside of a braced list `{a() { return 1; }}`. 1596 if (tryToParseBracedList()) 1597 continue; 1598 parseChildBlock(); 1599 } 1600 } 1601 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1602 nextToken(); 1603 return !HasError; 1604 } 1605 switch (FormatTok->Tok.getKind()) { 1606 case tok::caret: 1607 nextToken(); 1608 if (FormatTok->is(tok::l_brace)) { 1609 parseChildBlock(); 1610 } 1611 break; 1612 case tok::l_square: 1613 tryToParseLambda(); 1614 break; 1615 case tok::l_paren: 1616 parseParens(); 1617 // JavaScript can just have free standing methods and getters/setters in 1618 // object literals. Detect them by a "{" following ")". 1619 if (Style.Language == FormatStyle::LK_JavaScript) { 1620 if (FormatTok->is(tok::l_brace)) 1621 parseChildBlock(); 1622 break; 1623 } 1624 break; 1625 case tok::l_brace: 1626 // Assume there are no blocks inside a braced init list apart 1627 // from the ones we explicitly parse out (like lambdas). 1628 FormatTok->BlockKind = BK_BracedInit; 1629 nextToken(); 1630 parseBracedList(); 1631 break; 1632 case tok::less: 1633 if (Style.Language == FormatStyle::LK_Proto) { 1634 nextToken(); 1635 parseBracedList(/*ContinueOnSemicolons=*/false, 1636 /*ClosingBraceKind=*/tok::greater); 1637 } else { 1638 nextToken(); 1639 } 1640 break; 1641 case tok::semi: 1642 // JavaScript (or more precisely TypeScript) can have semicolons in braced 1643 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 1644 // used for error recovery if we have otherwise determined that this is 1645 // a braced list. 1646 if (Style.Language == FormatStyle::LK_JavaScript) { 1647 nextToken(); 1648 break; 1649 } 1650 HasError = true; 1651 if (!ContinueOnSemicolons) 1652 return !HasError; 1653 nextToken(); 1654 break; 1655 case tok::comma: 1656 nextToken(); 1657 break; 1658 default: 1659 nextToken(); 1660 break; 1661 } 1662 } while (!eof()); 1663 return false; 1664 } 1665 1666 void UnwrappedLineParser::parseParens() { 1667 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1668 nextToken(); 1669 do { 1670 switch (FormatTok->Tok.getKind()) { 1671 case tok::l_paren: 1672 parseParens(); 1673 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1674 parseChildBlock(); 1675 break; 1676 case tok::r_paren: 1677 nextToken(); 1678 return; 1679 case tok::r_brace: 1680 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1681 return; 1682 case tok::l_square: 1683 tryToParseLambda(); 1684 break; 1685 case tok::l_brace: 1686 if (!tryToParseBracedList()) 1687 parseChildBlock(); 1688 break; 1689 case tok::at: 1690 nextToken(); 1691 if (FormatTok->Tok.is(tok::l_brace)) { 1692 nextToken(); 1693 parseBracedList(); 1694 } 1695 break; 1696 case tok::kw_class: 1697 if (Style.Language == FormatStyle::LK_JavaScript) 1698 parseRecord(/*ParseAsExpr=*/true); 1699 else 1700 nextToken(); 1701 break; 1702 case tok::identifier: 1703 if (Style.Language == FormatStyle::LK_JavaScript && 1704 (FormatTok->is(Keywords.kw_function) || 1705 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 1706 tryToParseJSFunction(); 1707 else 1708 nextToken(); 1709 break; 1710 default: 1711 nextToken(); 1712 break; 1713 } 1714 } while (!eof()); 1715 } 1716 1717 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 1718 if (!LambdaIntroducer) { 1719 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1720 if (tryToParseLambda()) 1721 return; 1722 } 1723 do { 1724 switch (FormatTok->Tok.getKind()) { 1725 case tok::l_paren: 1726 parseParens(); 1727 break; 1728 case tok::r_square: 1729 nextToken(); 1730 return; 1731 case tok::r_brace: 1732 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1733 return; 1734 case tok::l_square: 1735 parseSquare(); 1736 break; 1737 case tok::l_brace: { 1738 if (!tryToParseBracedList()) 1739 parseChildBlock(); 1740 break; 1741 } 1742 case tok::at: 1743 nextToken(); 1744 if (FormatTok->Tok.is(tok::l_brace)) { 1745 nextToken(); 1746 parseBracedList(); 1747 } 1748 break; 1749 default: 1750 nextToken(); 1751 break; 1752 } 1753 } while (!eof()); 1754 } 1755 1756 void UnwrappedLineParser::parseIfThenElse() { 1757 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1758 nextToken(); 1759 if (FormatTok->Tok.is(tok::kw_constexpr)) 1760 nextToken(); 1761 if (FormatTok->Tok.is(tok::l_paren)) 1762 parseParens(); 1763 bool NeedsUnwrappedLine = false; 1764 if (FormatTok->Tok.is(tok::l_brace)) { 1765 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1766 parseBlock(/*MustBeDeclaration=*/false); 1767 if (Style.BraceWrapping.BeforeElse) 1768 addUnwrappedLine(); 1769 else 1770 NeedsUnwrappedLine = true; 1771 } else { 1772 addUnwrappedLine(); 1773 ++Line->Level; 1774 parseStructuralElement(); 1775 --Line->Level; 1776 } 1777 if (FormatTok->Tok.is(tok::kw_else)) { 1778 nextToken(); 1779 if (FormatTok->Tok.is(tok::l_brace)) { 1780 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1781 parseBlock(/*MustBeDeclaration=*/false); 1782 addUnwrappedLine(); 1783 } else if (FormatTok->Tok.is(tok::kw_if)) { 1784 parseIfThenElse(); 1785 } else { 1786 addUnwrappedLine(); 1787 ++Line->Level; 1788 parseStructuralElement(); 1789 if (FormatTok->is(tok::eof)) 1790 addUnwrappedLine(); 1791 --Line->Level; 1792 } 1793 } else if (NeedsUnwrappedLine) { 1794 addUnwrappedLine(); 1795 } 1796 } 1797 1798 void UnwrappedLineParser::parseTryCatch() { 1799 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1800 nextToken(); 1801 bool NeedsUnwrappedLine = false; 1802 if (FormatTok->is(tok::colon)) { 1803 // We are in a function try block, what comes is an initializer list. 1804 nextToken(); 1805 while (FormatTok->is(tok::identifier)) { 1806 nextToken(); 1807 if (FormatTok->is(tok::l_paren)) 1808 parseParens(); 1809 if (FormatTok->is(tok::comma)) 1810 nextToken(); 1811 } 1812 } 1813 // Parse try with resource. 1814 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1815 parseParens(); 1816 } 1817 if (FormatTok->is(tok::l_brace)) { 1818 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1819 parseBlock(/*MustBeDeclaration=*/false); 1820 if (Style.BraceWrapping.BeforeCatch) { 1821 addUnwrappedLine(); 1822 } else { 1823 NeedsUnwrappedLine = true; 1824 } 1825 } else if (!FormatTok->is(tok::kw_catch)) { 1826 // The C++ standard requires a compound-statement after a try. 1827 // If there's none, we try to assume there's a structuralElement 1828 // and try to continue. 1829 addUnwrappedLine(); 1830 ++Line->Level; 1831 parseStructuralElement(); 1832 --Line->Level; 1833 } 1834 while (1) { 1835 if (FormatTok->is(tok::at)) 1836 nextToken(); 1837 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1838 tok::kw___finally) || 1839 ((Style.Language == FormatStyle::LK_Java || 1840 Style.Language == FormatStyle::LK_JavaScript) && 1841 FormatTok->is(Keywords.kw_finally)) || 1842 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1843 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1844 break; 1845 nextToken(); 1846 while (FormatTok->isNot(tok::l_brace)) { 1847 if (FormatTok->is(tok::l_paren)) { 1848 parseParens(); 1849 continue; 1850 } 1851 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1852 return; 1853 nextToken(); 1854 } 1855 NeedsUnwrappedLine = false; 1856 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1857 parseBlock(/*MustBeDeclaration=*/false); 1858 if (Style.BraceWrapping.BeforeCatch) 1859 addUnwrappedLine(); 1860 else 1861 NeedsUnwrappedLine = true; 1862 } 1863 if (NeedsUnwrappedLine) 1864 addUnwrappedLine(); 1865 } 1866 1867 void UnwrappedLineParser::parseNamespace() { 1868 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 1869 "'namespace' expected"); 1870 1871 const FormatToken &InitialToken = *FormatTok; 1872 nextToken(); 1873 if (InitialToken.is(TT_NamespaceMacro)) { 1874 parseParens(); 1875 } else { 1876 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) 1877 nextToken(); 1878 } 1879 if (FormatTok->Tok.is(tok::l_brace)) { 1880 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1881 addUnwrappedLine(); 1882 1883 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1884 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1885 DeclarationScopeStack.size() > 1); 1886 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1887 // Munch the semicolon after a namespace. This is more common than one would 1888 // think. Puttin the semicolon into its own line is very ugly. 1889 if (FormatTok->Tok.is(tok::semi)) 1890 nextToken(); 1891 addUnwrappedLine(); 1892 } 1893 // FIXME: Add error handling. 1894 } 1895 1896 void UnwrappedLineParser::parseNew() { 1897 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1898 nextToken(); 1899 if (Style.Language != FormatStyle::LK_Java) 1900 return; 1901 1902 // In Java, we can parse everything up to the parens, which aren't optional. 1903 do { 1904 // There should not be a ;, { or } before the new's open paren. 1905 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1906 return; 1907 1908 // Consume the parens. 1909 if (FormatTok->is(tok::l_paren)) { 1910 parseParens(); 1911 1912 // If there is a class body of an anonymous class, consume that as child. 1913 if (FormatTok->is(tok::l_brace)) 1914 parseChildBlock(); 1915 return; 1916 } 1917 nextToken(); 1918 } while (!eof()); 1919 } 1920 1921 void UnwrappedLineParser::parseForOrWhileLoop() { 1922 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1923 "'for', 'while' or foreach macro expected"); 1924 nextToken(); 1925 // JS' for await ( ... 1926 if (Style.Language == FormatStyle::LK_JavaScript && 1927 FormatTok->is(Keywords.kw_await)) 1928 nextToken(); 1929 if (FormatTok->Tok.is(tok::l_paren)) 1930 parseParens(); 1931 if (FormatTok->Tok.is(tok::l_brace)) { 1932 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1933 parseBlock(/*MustBeDeclaration=*/false); 1934 addUnwrappedLine(); 1935 } else { 1936 addUnwrappedLine(); 1937 ++Line->Level; 1938 parseStructuralElement(); 1939 --Line->Level; 1940 } 1941 } 1942 1943 void UnwrappedLineParser::parseDoWhile() { 1944 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1945 nextToken(); 1946 if (FormatTok->Tok.is(tok::l_brace)) { 1947 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1948 parseBlock(/*MustBeDeclaration=*/false); 1949 if (Style.BraceWrapping.IndentBraces) 1950 addUnwrappedLine(); 1951 } else { 1952 addUnwrappedLine(); 1953 ++Line->Level; 1954 parseStructuralElement(); 1955 --Line->Level; 1956 } 1957 1958 // FIXME: Add error handling. 1959 if (!FormatTok->Tok.is(tok::kw_while)) { 1960 addUnwrappedLine(); 1961 return; 1962 } 1963 1964 nextToken(); 1965 parseStructuralElement(); 1966 } 1967 1968 void UnwrappedLineParser::parseLabel() { 1969 nextToken(); 1970 unsigned OldLineLevel = Line->Level; 1971 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1972 --Line->Level; 1973 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1974 CompoundStatementIndenter Indenter(this, Line->Level, 1975 Style.BraceWrapping.AfterCaseLabel, 1976 Style.BraceWrapping.IndentBraces); 1977 parseBlock(/*MustBeDeclaration=*/false); 1978 if (FormatTok->Tok.is(tok::kw_break)) { 1979 if (Style.BraceWrapping.AfterControlStatement) 1980 addUnwrappedLine(); 1981 parseStructuralElement(); 1982 } 1983 addUnwrappedLine(); 1984 } else { 1985 if (FormatTok->is(tok::semi)) 1986 nextToken(); 1987 addUnwrappedLine(); 1988 } 1989 Line->Level = OldLineLevel; 1990 if (FormatTok->isNot(tok::l_brace)) { 1991 parseStructuralElement(); 1992 addUnwrappedLine(); 1993 } 1994 } 1995 1996 void UnwrappedLineParser::parseCaseLabel() { 1997 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1998 // FIXME: fix handling of complex expressions here. 1999 do { 2000 nextToken(); 2001 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2002 parseLabel(); 2003 } 2004 2005 void UnwrappedLineParser::parseSwitch() { 2006 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2007 nextToken(); 2008 if (FormatTok->Tok.is(tok::l_paren)) 2009 parseParens(); 2010 if (FormatTok->Tok.is(tok::l_brace)) { 2011 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2012 parseBlock(/*MustBeDeclaration=*/false); 2013 addUnwrappedLine(); 2014 } else { 2015 addUnwrappedLine(); 2016 ++Line->Level; 2017 parseStructuralElement(); 2018 --Line->Level; 2019 } 2020 } 2021 2022 void UnwrappedLineParser::parseAccessSpecifier() { 2023 nextToken(); 2024 // Understand Qt's slots. 2025 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2026 nextToken(); 2027 // Otherwise, we don't know what it is, and we'd better keep the next token. 2028 if (FormatTok->Tok.is(tok::colon)) 2029 nextToken(); 2030 addUnwrappedLine(); 2031 } 2032 2033 bool UnwrappedLineParser::parseEnum() { 2034 // Won't be 'enum' for NS_ENUMs. 2035 if (FormatTok->Tok.is(tok::kw_enum)) 2036 nextToken(); 2037 2038 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2039 // declarations. An "enum" keyword followed by a colon would be a syntax 2040 // error and thus assume it is just an identifier. 2041 if (Style.Language == FormatStyle::LK_JavaScript && 2042 FormatTok->isOneOf(tok::colon, tok::question)) 2043 return false; 2044 2045 // In protobuf, "enum" can be used as a field name. 2046 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2047 return false; 2048 2049 // Eat up enum class ... 2050 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2051 nextToken(); 2052 2053 while (FormatTok->Tok.getIdentifierInfo() || 2054 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2055 tok::greater, tok::comma, tok::question)) { 2056 nextToken(); 2057 // We can have macros or attributes in between 'enum' and the enum name. 2058 if (FormatTok->is(tok::l_paren)) 2059 parseParens(); 2060 if (FormatTok->is(tok::identifier)) { 2061 nextToken(); 2062 // If there are two identifiers in a row, this is likely an elaborate 2063 // return type. In Java, this can be "implements", etc. 2064 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2065 return false; 2066 } 2067 } 2068 2069 // Just a declaration or something is wrong. 2070 if (FormatTok->isNot(tok::l_brace)) 2071 return true; 2072 FormatTok->BlockKind = BK_Block; 2073 2074 if (Style.Language == FormatStyle::LK_Java) { 2075 // Java enums are different. 2076 parseJavaEnumBody(); 2077 return true; 2078 } 2079 if (Style.Language == FormatStyle::LK_Proto) { 2080 parseBlock(/*MustBeDeclaration=*/true); 2081 return true; 2082 } 2083 2084 // Parse enum body. 2085 nextToken(); 2086 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 2087 if (HasError) { 2088 if (FormatTok->is(tok::semi)) 2089 nextToken(); 2090 addUnwrappedLine(); 2091 } 2092 return true; 2093 2094 // There is no addUnwrappedLine() here so that we fall through to parsing a 2095 // structural element afterwards. Thus, in "enum A {} n, m;", 2096 // "} n, m;" will end up in one unwrapped line. 2097 } 2098 2099 void UnwrappedLineParser::parseJavaEnumBody() { 2100 // Determine whether the enum is simple, i.e. does not have a semicolon or 2101 // constants with class bodies. Simple enums can be formatted like braced 2102 // lists, contracted to a single line, etc. 2103 unsigned StoredPosition = Tokens->getPosition(); 2104 bool IsSimple = true; 2105 FormatToken *Tok = Tokens->getNextToken(); 2106 while (Tok) { 2107 if (Tok->is(tok::r_brace)) 2108 break; 2109 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2110 IsSimple = false; 2111 break; 2112 } 2113 // FIXME: This will also mark enums with braces in the arguments to enum 2114 // constants as "not simple". This is probably fine in practice, though. 2115 Tok = Tokens->getNextToken(); 2116 } 2117 FormatTok = Tokens->setPosition(StoredPosition); 2118 2119 if (IsSimple) { 2120 nextToken(); 2121 parseBracedList(); 2122 addUnwrappedLine(); 2123 return; 2124 } 2125 2126 // Parse the body of a more complex enum. 2127 // First add a line for everything up to the "{". 2128 nextToken(); 2129 addUnwrappedLine(); 2130 ++Line->Level; 2131 2132 // Parse the enum constants. 2133 while (FormatTok) { 2134 if (FormatTok->is(tok::l_brace)) { 2135 // Parse the constant's class body. 2136 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2137 /*MunchSemi=*/false); 2138 } else if (FormatTok->is(tok::l_paren)) { 2139 parseParens(); 2140 } else if (FormatTok->is(tok::comma)) { 2141 nextToken(); 2142 addUnwrappedLine(); 2143 } else if (FormatTok->is(tok::semi)) { 2144 nextToken(); 2145 addUnwrappedLine(); 2146 break; 2147 } else if (FormatTok->is(tok::r_brace)) { 2148 addUnwrappedLine(); 2149 break; 2150 } else { 2151 nextToken(); 2152 } 2153 } 2154 2155 // Parse the class body after the enum's ";" if any. 2156 parseLevel(/*HasOpeningBrace=*/true); 2157 nextToken(); 2158 --Line->Level; 2159 addUnwrappedLine(); 2160 } 2161 2162 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2163 const FormatToken &InitialToken = *FormatTok; 2164 nextToken(); 2165 2166 // The actual identifier can be a nested name specifier, and in macros 2167 // it is often token-pasted. 2168 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2169 tok::kw___attribute, tok::kw___declspec, 2170 tok::kw_alignas) || 2171 ((Style.Language == FormatStyle::LK_Java || 2172 Style.Language == FormatStyle::LK_JavaScript) && 2173 FormatTok->isOneOf(tok::period, tok::comma))) { 2174 if (Style.Language == FormatStyle::LK_JavaScript && 2175 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2176 // JavaScript/TypeScript supports inline object types in 2177 // extends/implements positions: 2178 // class Foo implements {bar: number} { } 2179 nextToken(); 2180 if (FormatTok->is(tok::l_brace)) { 2181 tryToParseBracedList(); 2182 continue; 2183 } 2184 } 2185 bool IsNonMacroIdentifier = 2186 FormatTok->is(tok::identifier) && 2187 FormatTok->TokenText != FormatTok->TokenText.upper(); 2188 nextToken(); 2189 // We can have macros or attributes in between 'class' and the class name. 2190 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 2191 parseParens(); 2192 } 2193 2194 // Note that parsing away template declarations here leads to incorrectly 2195 // accepting function declarations as record declarations. 2196 // In general, we cannot solve this problem. Consider: 2197 // class A<int> B() {} 2198 // which can be a function definition or a class definition when B() is a 2199 // macro. If we find enough real-world cases where this is a problem, we 2200 // can parse for the 'template' keyword in the beginning of the statement, 2201 // and thus rule out the record production in case there is no template 2202 // (this would still leave us with an ambiguity between template function 2203 // and class declarations). 2204 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2205 while (!eof()) { 2206 if (FormatTok->is(tok::l_brace)) { 2207 calculateBraceTypes(/*ExpectClassBody=*/true); 2208 if (!tryToParseBracedList()) 2209 break; 2210 } 2211 if (FormatTok->Tok.is(tok::semi)) 2212 return; 2213 nextToken(); 2214 } 2215 } 2216 if (FormatTok->Tok.is(tok::l_brace)) { 2217 if (ParseAsExpr) { 2218 parseChildBlock(); 2219 } else { 2220 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2221 addUnwrappedLine(); 2222 2223 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 2224 /*MunchSemi=*/false); 2225 } 2226 } 2227 // There is no addUnwrappedLine() here so that we fall through to parsing a 2228 // structural element afterwards. Thus, in "class A {} n, m;", 2229 // "} n, m;" will end up in one unwrapped line. 2230 } 2231 2232 void UnwrappedLineParser::parseObjCMethod() { 2233 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2234 "'(' or identifier expected."); 2235 do { 2236 if (FormatTok->Tok.is(tok::semi)) { 2237 nextToken(); 2238 addUnwrappedLine(); 2239 return; 2240 } else if (FormatTok->Tok.is(tok::l_brace)) { 2241 if (Style.BraceWrapping.AfterFunction) 2242 addUnwrappedLine(); 2243 parseBlock(/*MustBeDeclaration=*/false); 2244 addUnwrappedLine(); 2245 return; 2246 } else { 2247 nextToken(); 2248 } 2249 } while (!eof()); 2250 } 2251 2252 void UnwrappedLineParser::parseObjCProtocolList() { 2253 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2254 do { 2255 nextToken(); 2256 // Early exit in case someone forgot a close angle. 2257 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2258 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2259 return; 2260 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2261 nextToken(); // Skip '>'. 2262 } 2263 2264 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2265 do { 2266 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2267 nextToken(); 2268 addUnwrappedLine(); 2269 break; 2270 } 2271 if (FormatTok->is(tok::l_brace)) { 2272 parseBlock(/*MustBeDeclaration=*/false); 2273 // In ObjC interfaces, nothing should be following the "}". 2274 addUnwrappedLine(); 2275 } else if (FormatTok->is(tok::r_brace)) { 2276 // Ignore stray "}". parseStructuralElement doesn't consume them. 2277 nextToken(); 2278 addUnwrappedLine(); 2279 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2280 nextToken(); 2281 parseObjCMethod(); 2282 } else { 2283 parseStructuralElement(); 2284 } 2285 } while (!eof()); 2286 } 2287 2288 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2289 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2290 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2291 nextToken(); 2292 nextToken(); // interface name 2293 2294 // @interface can be followed by a lightweight generic 2295 // specialization list, then either a base class or a category. 2296 if (FormatTok->Tok.is(tok::less)) { 2297 // Unlike protocol lists, generic parameterizations support 2298 // nested angles: 2299 // 2300 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2301 // NSObject <NSCopying, NSSecureCoding> 2302 // 2303 // so we need to count how many open angles we have left. 2304 unsigned NumOpenAngles = 1; 2305 do { 2306 nextToken(); 2307 // Early exit in case someone forgot a close angle. 2308 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2309 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2310 break; 2311 if (FormatTok->Tok.is(tok::less)) 2312 ++NumOpenAngles; 2313 else if (FormatTok->Tok.is(tok::greater)) { 2314 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 2315 --NumOpenAngles; 2316 } 2317 } while (!eof() && NumOpenAngles != 0); 2318 nextToken(); // Skip '>'. 2319 } 2320 if (FormatTok->Tok.is(tok::colon)) { 2321 nextToken(); 2322 nextToken(); // base class name 2323 } else if (FormatTok->Tok.is(tok::l_paren)) 2324 // Skip category, if present. 2325 parseParens(); 2326 2327 if (FormatTok->Tok.is(tok::less)) 2328 parseObjCProtocolList(); 2329 2330 if (FormatTok->Tok.is(tok::l_brace)) { 2331 if (Style.BraceWrapping.AfterObjCDeclaration) 2332 addUnwrappedLine(); 2333 parseBlock(/*MustBeDeclaration=*/true); 2334 } 2335 2336 // With instance variables, this puts '}' on its own line. Without instance 2337 // variables, this ends the @interface line. 2338 addUnwrappedLine(); 2339 2340 parseObjCUntilAtEnd(); 2341 } 2342 2343 // Returns true for the declaration/definition form of @protocol, 2344 // false for the expression form. 2345 bool UnwrappedLineParser::parseObjCProtocol() { 2346 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 2347 nextToken(); 2348 2349 if (FormatTok->is(tok::l_paren)) 2350 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 2351 return false; 2352 2353 // The definition/declaration form, 2354 // @protocol Foo 2355 // - (int)someMethod; 2356 // @end 2357 2358 nextToken(); // protocol name 2359 2360 if (FormatTok->Tok.is(tok::less)) 2361 parseObjCProtocolList(); 2362 2363 // Check for protocol declaration. 2364 if (FormatTok->Tok.is(tok::semi)) { 2365 nextToken(); 2366 addUnwrappedLine(); 2367 return true; 2368 } 2369 2370 addUnwrappedLine(); 2371 parseObjCUntilAtEnd(); 2372 return true; 2373 } 2374 2375 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 2376 bool IsImport = FormatTok->is(Keywords.kw_import); 2377 assert(IsImport || FormatTok->is(tok::kw_export)); 2378 nextToken(); 2379 2380 // Consume the "default" in "export default class/function". 2381 if (FormatTok->is(tok::kw_default)) 2382 nextToken(); 2383 2384 // Consume "async function", "function" and "default function", so that these 2385 // get parsed as free-standing JS functions, i.e. do not require a trailing 2386 // semicolon. 2387 if (FormatTok->is(Keywords.kw_async)) 2388 nextToken(); 2389 if (FormatTok->is(Keywords.kw_function)) { 2390 nextToken(); 2391 return; 2392 } 2393 2394 // For imports, `export *`, `export {...}`, consume the rest of the line up 2395 // to the terminating `;`. For everything else, just return and continue 2396 // parsing the structural element, i.e. the declaration or expression for 2397 // `export default`. 2398 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 2399 !FormatTok->isStringLiteral()) 2400 return; 2401 2402 while (!eof()) { 2403 if (FormatTok->is(tok::semi)) 2404 return; 2405 if (Line->Tokens.empty()) { 2406 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 2407 // import statement should terminate. 2408 return; 2409 } 2410 if (FormatTok->is(tok::l_brace)) { 2411 FormatTok->BlockKind = BK_Block; 2412 nextToken(); 2413 parseBracedList(); 2414 } else { 2415 nextToken(); 2416 } 2417 } 2418 } 2419 2420 void UnwrappedLineParser::parseStatementMacro() { 2421 nextToken(); 2422 if (FormatTok->is(tok::l_paren)) 2423 parseParens(); 2424 if (FormatTok->is(tok::semi)) 2425 nextToken(); 2426 addUnwrappedLine(); 2427 } 2428 2429 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 2430 StringRef Prefix = "") { 2431 llvm::dbgs() << Prefix << "Line(" << Line.Level 2432 << ", FSC=" << Line.FirstStartColumn << ")" 2433 << (Line.InPPDirective ? " MACRO" : "") << ": "; 2434 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2435 E = Line.Tokens.end(); 2436 I != E; ++I) { 2437 llvm::dbgs() << I->Tok->Tok.getName() << "[" 2438 << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn 2439 << "] "; 2440 } 2441 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 2442 E = Line.Tokens.end(); 2443 I != E; ++I) { 2444 const UnwrappedLineNode &Node = *I; 2445 for (SmallVectorImpl<UnwrappedLine>::const_iterator 2446 I = Node.Children.begin(), 2447 E = Node.Children.end(); 2448 I != E; ++I) { 2449 printDebugInfo(*I, "\nChild: "); 2450 } 2451 } 2452 llvm::dbgs() << "\n"; 2453 } 2454 2455 void UnwrappedLineParser::addUnwrappedLine() { 2456 if (Line->Tokens.empty()) 2457 return; 2458 LLVM_DEBUG({ 2459 if (CurrentLines == &Lines) 2460 printDebugInfo(*Line); 2461 }); 2462 CurrentLines->push_back(std::move(*Line)); 2463 Line->Tokens.clear(); 2464 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 2465 Line->FirstStartColumn = 0; 2466 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 2467 CurrentLines->append( 2468 std::make_move_iterator(PreprocessorDirectives.begin()), 2469 std::make_move_iterator(PreprocessorDirectives.end())); 2470 PreprocessorDirectives.clear(); 2471 } 2472 // Disconnect the current token from the last token on the previous line. 2473 FormatTok->Previous = nullptr; 2474 } 2475 2476 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 2477 2478 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 2479 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 2480 FormatTok.NewlinesBefore > 0; 2481 } 2482 2483 // Checks if \p FormatTok is a line comment that continues the line comment 2484 // section on \p Line. 2485 static bool continuesLineCommentSection(const FormatToken &FormatTok, 2486 const UnwrappedLine &Line, 2487 llvm::Regex &CommentPragmasRegex) { 2488 if (Line.Tokens.empty()) 2489 return false; 2490 2491 StringRef IndentContent = FormatTok.TokenText; 2492 if (FormatTok.TokenText.startswith("//") || 2493 FormatTok.TokenText.startswith("/*")) 2494 IndentContent = FormatTok.TokenText.substr(2); 2495 if (CommentPragmasRegex.match(IndentContent)) 2496 return false; 2497 2498 // If Line starts with a line comment, then FormatTok continues the comment 2499 // section if its original column is greater or equal to the original start 2500 // column of the line. 2501 // 2502 // Define the min column token of a line as follows: if a line ends in '{' or 2503 // contains a '{' followed by a line comment, then the min column token is 2504 // that '{'. Otherwise, the min column token of the line is the first token of 2505 // the line. 2506 // 2507 // If Line starts with a token other than a line comment, then FormatTok 2508 // continues the comment section if its original column is greater than the 2509 // original start column of the min column token of the line. 2510 // 2511 // For example, the second line comment continues the first in these cases: 2512 // 2513 // // first line 2514 // // second line 2515 // 2516 // and: 2517 // 2518 // // first line 2519 // // second line 2520 // 2521 // and: 2522 // 2523 // int i; // first line 2524 // // second line 2525 // 2526 // and: 2527 // 2528 // do { // first line 2529 // // second line 2530 // int i; 2531 // } while (true); 2532 // 2533 // and: 2534 // 2535 // enum { 2536 // a, // first line 2537 // // second line 2538 // b 2539 // }; 2540 // 2541 // The second line comment doesn't continue the first in these cases: 2542 // 2543 // // first line 2544 // // second line 2545 // 2546 // and: 2547 // 2548 // int i; // first line 2549 // // second line 2550 // 2551 // and: 2552 // 2553 // do { // first line 2554 // // second line 2555 // int i; 2556 // } while (true); 2557 // 2558 // and: 2559 // 2560 // enum { 2561 // a, // first line 2562 // // second line 2563 // }; 2564 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 2565 2566 // Scan for '{//'. If found, use the column of '{' as a min column for line 2567 // comment section continuation. 2568 const FormatToken *PreviousToken = nullptr; 2569 for (const UnwrappedLineNode &Node : Line.Tokens) { 2570 if (PreviousToken && PreviousToken->is(tok::l_brace) && 2571 isLineComment(*Node.Tok)) { 2572 MinColumnToken = PreviousToken; 2573 break; 2574 } 2575 PreviousToken = Node.Tok; 2576 2577 // Grab the last newline preceding a token in this unwrapped line. 2578 if (Node.Tok->NewlinesBefore > 0) { 2579 MinColumnToken = Node.Tok; 2580 } 2581 } 2582 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 2583 MinColumnToken = PreviousToken; 2584 } 2585 2586 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 2587 MinColumnToken); 2588 } 2589 2590 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 2591 bool JustComments = Line->Tokens.empty(); 2592 for (SmallVectorImpl<FormatToken *>::const_iterator 2593 I = CommentsBeforeNextToken.begin(), 2594 E = CommentsBeforeNextToken.end(); 2595 I != E; ++I) { 2596 // Line comments that belong to the same line comment section are put on the 2597 // same line since later we might want to reflow content between them. 2598 // Additional fine-grained breaking of line comment sections is controlled 2599 // by the class BreakableLineCommentSection in case it is desirable to keep 2600 // several line comment sections in the same unwrapped line. 2601 // 2602 // FIXME: Consider putting separate line comment sections as children to the 2603 // unwrapped line instead. 2604 (*I)->ContinuesLineCommentSection = 2605 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 2606 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 2607 addUnwrappedLine(); 2608 pushToken(*I); 2609 } 2610 if (NewlineBeforeNext && JustComments) 2611 addUnwrappedLine(); 2612 CommentsBeforeNextToken.clear(); 2613 } 2614 2615 void UnwrappedLineParser::nextToken(int LevelDifference) { 2616 if (eof()) 2617 return; 2618 flushComments(isOnNewLine(*FormatTok)); 2619 pushToken(FormatTok); 2620 FormatToken *Previous = FormatTok; 2621 if (Style.Language != FormatStyle::LK_JavaScript) 2622 readToken(LevelDifference); 2623 else 2624 readTokenWithJavaScriptASI(); 2625 FormatTok->Previous = Previous; 2626 } 2627 2628 void UnwrappedLineParser::distributeComments( 2629 const SmallVectorImpl<FormatToken *> &Comments, 2630 const FormatToken *NextTok) { 2631 // Whether or not a line comment token continues a line is controlled by 2632 // the method continuesLineCommentSection, with the following caveat: 2633 // 2634 // Define a trail of Comments to be a nonempty proper postfix of Comments such 2635 // that each comment line from the trail is aligned with the next token, if 2636 // the next token exists. If a trail exists, the beginning of the maximal 2637 // trail is marked as a start of a new comment section. 2638 // 2639 // For example in this code: 2640 // 2641 // int a; // line about a 2642 // // line 1 about b 2643 // // line 2 about b 2644 // int b; 2645 // 2646 // the two lines about b form a maximal trail, so there are two sections, the 2647 // first one consisting of the single comment "// line about a" and the 2648 // second one consisting of the next two comments. 2649 if (Comments.empty()) 2650 return; 2651 bool ShouldPushCommentsInCurrentLine = true; 2652 bool HasTrailAlignedWithNextToken = false; 2653 unsigned StartOfTrailAlignedWithNextToken = 0; 2654 if (NextTok) { 2655 // We are skipping the first element intentionally. 2656 for (unsigned i = Comments.size() - 1; i > 0; --i) { 2657 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 2658 HasTrailAlignedWithNextToken = true; 2659 StartOfTrailAlignedWithNextToken = i; 2660 } 2661 } 2662 } 2663 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 2664 FormatToken *FormatTok = Comments[i]; 2665 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 2666 FormatTok->ContinuesLineCommentSection = false; 2667 } else { 2668 FormatTok->ContinuesLineCommentSection = 2669 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 2670 } 2671 if (!FormatTok->ContinuesLineCommentSection && 2672 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 2673 ShouldPushCommentsInCurrentLine = false; 2674 } 2675 if (ShouldPushCommentsInCurrentLine) { 2676 pushToken(FormatTok); 2677 } else { 2678 CommentsBeforeNextToken.push_back(FormatTok); 2679 } 2680 } 2681 } 2682 2683 void UnwrappedLineParser::readToken(int LevelDifference) { 2684 SmallVector<FormatToken *, 1> Comments; 2685 do { 2686 FormatTok = Tokens->getNextToken(); 2687 assert(FormatTok); 2688 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 2689 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 2690 distributeComments(Comments, FormatTok); 2691 Comments.clear(); 2692 // If there is an unfinished unwrapped line, we flush the preprocessor 2693 // directives only after that unwrapped line was finished later. 2694 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 2695 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 2696 assert((LevelDifference >= 0 || 2697 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 2698 "LevelDifference makes Line->Level negative"); 2699 Line->Level += LevelDifference; 2700 // Comments stored before the preprocessor directive need to be output 2701 // before the preprocessor directive, at the same level as the 2702 // preprocessor directive, as we consider them to apply to the directive. 2703 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 2704 PPBranchLevel > 0) 2705 Line->Level += PPBranchLevel; 2706 flushComments(isOnNewLine(*FormatTok)); 2707 parsePPDirective(); 2708 } 2709 while (FormatTok->Type == TT_ConflictStart || 2710 FormatTok->Type == TT_ConflictEnd || 2711 FormatTok->Type == TT_ConflictAlternative) { 2712 if (FormatTok->Type == TT_ConflictStart) { 2713 conditionalCompilationStart(/*Unreachable=*/false); 2714 } else if (FormatTok->Type == TT_ConflictAlternative) { 2715 conditionalCompilationAlternative(); 2716 } else if (FormatTok->Type == TT_ConflictEnd) { 2717 conditionalCompilationEnd(); 2718 } 2719 FormatTok = Tokens->getNextToken(); 2720 FormatTok->MustBreakBefore = true; 2721 } 2722 2723 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 2724 !Line->InPPDirective) { 2725 continue; 2726 } 2727 2728 if (!FormatTok->Tok.is(tok::comment)) { 2729 distributeComments(Comments, FormatTok); 2730 Comments.clear(); 2731 return; 2732 } 2733 2734 Comments.push_back(FormatTok); 2735 } while (!eof()); 2736 2737 distributeComments(Comments, nullptr); 2738 Comments.clear(); 2739 } 2740 2741 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 2742 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 2743 if (MustBreakBeforeNextToken) { 2744 Line->Tokens.back().Tok->MustBreakBefore = true; 2745 MustBreakBeforeNextToken = false; 2746 } 2747 } 2748 2749 } // end namespace format 2750 } // end namespace clang 2751